1
1

consolidate part of HP/LP fields.

This commit was SVN r11528.
Этот коммит содержится в:
Gleb Natapov 2006-09-05 16:00:18 +00:00
родитель b6bac100b0
Коммит fe932ca7bf
6 изменённых файлов: 192 добавлений и 269 удалений

Просмотреть файл

@ -655,11 +655,11 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_WRITE;
/* check for a send wqe */
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
opal_list_append(&endpoint->pending_frags[BTL_OPENIB_LP_QP], (opal_list_item_t *)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
return OMPI_SUCCESS;
@ -712,21 +712,21 @@ int mca_btl_openib_get( mca_btl_base_module_t* btl,
frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_READ;
/* check for a send wqe */
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
opal_list_append(&endpoint->pending_frags[BTL_OPENIB_LP_QP], (opal_list_item_t *)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
return OMPI_SUCCESS;
/* check for a get token */
} else if(OPAL_THREAD_ADD32(&endpoint->get_tokens,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],1);
OPAL_THREAD_ADD32(&endpoint->get_tokens,1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t*)frag);
opal_list_append(&endpoint->pending_frags[BTL_OPENIB_LP_QP], (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
return OMPI_SUCCESS;

Просмотреть файл

@ -192,16 +192,10 @@ struct mca_btl_openib_module_t {
int32_t rd_num;
int32_t rd_low;
int32_t sd_tokens_hp;
/**< number of high priority frags that can be outstanding (down counter) */
int32_t sd_tokens_lp;
/**< number of low priority frags that can be outstanding (down counter) */
int32_t sd_tokens[2];
/**< number of frags that can be outstanding (down counter) */
opal_list_t pending_frags_hp;
/**< list of pending high priority frags */
opal_list_t pending_frags_lp;
/**< list of pending low priority frags */
opal_list_t pending_frags[2]; /**< list of pending frags */
opal_mutex_t eager_rdma_lock;
size_t eager_rdma_frag_size; /**< length of eager frag */
@ -483,6 +477,9 @@ int mca_btl_openib_create_cq_srq(mca_btl_openib_module_t* openib_btl);
#endif
#define BTL_OPENIB_HP_QP 0
#define BTL_OPENIB_LP_QP 1
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -76,6 +76,9 @@ static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl,
size_t byte_len);
static char* btl_openib_component_status_to_string(enum ibv_wc_status status);
static int btl_openib_component_progress(void);
static void btl_openib_frag_progress_pending(
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
int prio);
mca_btl_openib_component_t mca_btl_openib_component = {
@ -550,13 +553,13 @@ btl_openib_component_init(int *num_btl_modules,
mca_btl_openib_component.rd_rsv;
openib_btl->rd_low = mca_btl_openib_component.rd_low;
openib_btl->num_peers = 0;
openib_btl->sd_tokens_hp = openib_btl->sd_tokens_lp =
mca_btl_openib_component.srq_sd_max;
openib_btl->sd_tokens[BTL_OPENIB_HP_QP] =
openib_btl->sd_tokens[BTL_OPENIB_LP_QP] = mca_btl_openib_component.srq_sd_max;
/* Initialize module state */
OBJ_CONSTRUCT(&openib_btl->pending_frags_hp, opal_list_t);
OBJ_CONSTRUCT(&openib_btl->pending_frags_lp, opal_list_t);
OBJ_CONSTRUCT(&openib_btl->pending_frags[BTL_OPENIB_HP_QP], opal_list_t);
OBJ_CONSTRUCT(&openib_btl->pending_frags[BTL_OPENIB_LP_QP], opal_list_t);
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
OBJ_CONSTRUCT(&openib_btl->send_free_eager, ompi_free_list_t);
@ -717,7 +720,8 @@ static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl,
}
if (!mca_btl_openib_component.use_srq) {
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp, frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->sd_tokens[BTL_OPENIB_HP_QP],
frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,
frag->hdr->rdma_credits);
}
@ -731,6 +735,7 @@ static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_connect_eager_rdma(endpoint);
}
/* repost receive descriptors */
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_openib_component.use_srq) {
@ -743,27 +748,7 @@ static int btl_openib_handle_incoming_hp(mca_btl_openib_module_t *openib_btl,
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 0);
}
/* check to see if we need to progress any pending desciptors */
if(endpoint->sd_tokens_hp > 0 ||
endpoint->eager_rdma_remote.tokens > 0) {
while(!opal_list_is_empty(&endpoint->pending_frags_hp) &&
endpoint->sd_wqe_hp > 0 &&
(endpoint->sd_tokens_hp > 0 ||
endpoint->eager_rdma_remote.tokens > 0)) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(endpoint->pending_frags_hp));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
break;
if(OMPI_SUCCESS !=
mca_btl_openib_endpoint_send(frag->endpoint, frag)) {
BTL_ERROR(("error in posting pending send\n"));
break;
}
}
}
btl_openib_frag_progress_pending(openib_btl, endpoint, BTL_OPENIB_HP_QP);
/* check to see if we need to return credits */
if((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win ||
@ -856,15 +841,80 @@ static char* btl_openib_component_status_to_string(enum ibv_wc_status status)
}
static inline int btl_openib_frag_progress_one(
mca_btl_openib_module_t* openib_btl,
mca_btl_openib_frag_t* frag)
{
int res;
switch(frag->wr_desc.sr_desc.opcode) {
case IBV_WR_SEND:
res = mca_btl_openib_endpoint_send(frag->endpoint, frag);
break;
case IBV_WR_RDMA_WRITE:
res = mca_btl_openib_put((mca_btl_base_module_t*) openib_btl,
frag->endpoint,
(mca_btl_base_descriptor_t*) frag);
break;
case IBV_WR_RDMA_READ:
res = mca_btl_openib_get((mca_btl_base_module_t *) openib_btl,
frag->endpoint,
(mca_btl_base_descriptor_t*) frag);
break;
default:
res = OMPI_ERROR;
BTL_ERROR(("error in posting pending operation, "
"invalide opcode %d\n", frag->wr_desc.sr_desc.opcode));
break;
}
return res;
}
#define BTL_OPENIB_TOKENS(E, P) ((E)->sd_tokens[(P)] + \
(((P) == BTL_OPENIB_HP_QP)?(E)->eager_rdma_remote.tokens:0))
static void btl_openib_frag_progress_pending(
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
int prio)
{
opal_list_item_t *frag_item;
mca_btl_openib_frag_t* frag;
/* check to see if we need to progress any pending desciptors */
while(!opal_list_is_empty(&endpoint->pending_frags[prio]) &&
endpoint->sd_wqe[prio] > 0 &&
BTL_OPENIB_TOKENS(endpoint, prio) > 0) {
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(endpoint->pending_frags[prio]));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
break;
if(btl_openib_frag_progress_one(openib_btl, frag) ==
OMPI_ERR_OUT_OF_RESOURCE)
break;
}
if(!mca_btl_openib_component.use_srq)
return;
while(!opal_list_is_empty(&openib_btl->pending_frags[prio]) &&
openib_btl->sd_tokens[prio] > 0) {
/* dequeue resources due to global flow control */
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
frag_item = opal_list_remove_first(&openib_btl->pending_frags[prio]);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
break;
if(btl_openib_frag_progress_one(openib_btl, frag) ==
OMPI_ERR_OUT_OF_RESOURCE)
break;
}
}
/*
* IB component progress.
*/
static void btl_openib_frag_progress(mca_btl_base_endpoint_t *endpoint, int p)
{
}
static int btl_openib_component_progress(void)
{
static char *qp_name[] = {"HP", "LP"};
@ -952,43 +1002,20 @@ static int btl_openib_component_progress(void)
OMPI_SUCCESS);
/* return send wqe */
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp, 1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_HP_QP], 1);
if(mca_btl_openib_component.use_srq)
OPAL_THREAD_ADD32(&openib_btl->sd_tokens[BTL_OPENIB_HP_QP], 1);
/* check to see if we need to progress any pending desciptors */
while (!opal_list_is_empty(&endpoint->pending_frags_hp) &&
endpoint->sd_wqe_hp > 0 && (endpoint->sd_tokens_hp > 0 || endpoint->eager_rdma_remote.tokens > 0)) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(endpoint->pending_frags_hp));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
break;
if(OMPI_SUCCESS != mca_btl_openib_endpoint_send(frag->endpoint, frag)) {
BTL_ERROR(("error in posting pending send\n"));
break;
}
}
btl_openib_frag_progress_pending(openib_btl, endpoint,
BTL_OPENIB_HP_QP);
if(!mca_btl_openib_component.use_srq) {
/* check to see if we need to return credits */
if((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win || endpoint->eager_rdma_local.credits >= mca_btl_openib_component.rd_win) &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, 1) == 1) {
mca_btl_openib_endpoint_send_credits_hp(endpoint);
}
} else if(OPAL_THREAD_ADD32(&openib_btl->sd_tokens_hp, 1) > 0
&& !opal_list_is_empty(&openib_btl->pending_frags_hp)) {
/* dequeue resources due to global flow control */
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
frag_item = opal_list_remove_first(&openib_btl->pending_frags_hp);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
if(NULL != (frag = (mca_btl_openib_frag_t *) frag_item) &&
OMPI_SUCCESS != mca_btl_openib_endpoint_send(frag->endpoint, frag)) {
BTL_ERROR(("error in posting pending send\n"));
}
}
count++;
@ -1013,12 +1040,11 @@ static int btl_openib_component_progress(void)
}
ne=ibv_poll_cq(openib_btl->ib_cq_lp, 1, &wc );
ne=ibv_poll_cq(openib_btl->ib_cq_lp, 1, &wc);
if(ne != 0) {
if(ne < 0 || wc.status != IBV_WC_SUCCESS)
goto error_lp;
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id;
endpoint = frag->endpoint;
/* Handle n/w completions */
@ -1028,37 +1054,20 @@ static int btl_openib_component_progress(void)
frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, OMPI_SUCCESS);
/* return send wqe */
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp, 1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP], 1);
if(mca_btl_openib_component.use_srq)
OPAL_THREAD_ADD32(&openib_btl->sd_tokens[BTL_OPENIB_LP_QP], 1);
/* check to see if we need to progress any pending desciptors */
while (!opal_list_is_empty(&endpoint->pending_frags_lp) &&
endpoint->sd_wqe_lp > 0 && endpoint->sd_tokens_lp > 0) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(endpoint->pending_frags_lp));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
break;
MCA_BTL_IB_FRAG_PROGRESS(frag);
}
btl_openib_frag_progress_pending(openib_btl, endpoint,
BTL_OPENIB_LP_QP);
if( !mca_btl_openib_component.use_srq) {
if(!mca_btl_openib_component.use_srq) {
/* check to see if we need to return credits */
if( endpoint->rd_credits_lp >= mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_lp, 1) == 1) {
mca_btl_openib_endpoint_send_credits_lp(endpoint);
}
/* SRQ case */
} else if(OPAL_THREAD_ADD32(&openib_btl->sd_tokens_lp, 1) > 0
&& !opal_list_is_empty(&openib_btl->pending_frags_lp)) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
frag_item = opal_list_remove_first(&openib_btl->pending_frags_lp);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
if(NULL != (frag = (mca_btl_openib_frag_t *) frag_item)) {
MCA_BTL_IB_FRAG_PROGRESS(frag);
}
}
count++;
break;
@ -1073,29 +1082,12 @@ static int btl_openib_component_progress(void)
frag->base.des_cbfunc(&openib_btl->super, frag->endpoint, &frag->base, OMPI_SUCCESS);
/* return send wqe */
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp, 1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP], 1);
/* check for pending frags */
if(!opal_list_is_empty(&endpoint->pending_frags_lp)) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&endpoint->pending_frags_lp);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL != (frag = (mca_btl_openib_frag_t *) frag_item)) {
MCA_BTL_IB_FRAG_PROGRESS(frag);
}
}
if (mca_btl_openib_component.use_srq &&
endpoint->sd_wqe_lp > 0 &&
!opal_list_is_empty(&openib_btl->pending_frags_lp)) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
frag_item = opal_list_remove_first(&openib_btl->pending_frags_lp);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
if(NULL != (frag = (mca_btl_openib_frag_t *) frag_item)) {
MCA_BTL_IB_FRAG_PROGRESS(frag);
}
}
btl_openib_frag_progress_pending(openib_btl, endpoint,
BTL_OPENIB_LP_QP);
count++;
break;
@ -1114,44 +1106,31 @@ static int btl_openib_component_progress(void)
openib_btl->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_max), (ompi_free_list_item_t*) frag);
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_openib_component.use_srq) {
/* repost receive descriptors */
OPAL_THREAD_ADD32((int32_t*) &openib_btl->srd_posted_lp, -1);
MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, 0);
} else {
#endif
/* repost receive descriptors */
OPAL_THREAD_ADD32((int32_t*) &endpoint->rd_posted_lp, -1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, 0);
/* check to see if we need to progress any pending desciptors */
if( OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp, credits) > 0) {
while(!opal_list_is_empty(&endpoint->pending_frags_lp) &&
endpoint->sd_wqe_lp > 0 && endpoint->sd_tokens_lp > 0) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(endpoint->pending_frags_lp));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
break;
MCA_BTL_IB_FRAG_PROGRESS(frag);
}
}
OPAL_THREAD_ADD32(&endpoint->sd_tokens[BTL_OPENIB_LP_QP],
credits);
/* check to see if we need to progress any pending desciptors */
btl_openib_frag_progress_pending(openib_btl, endpoint,
BTL_OPENIB_LP_QP);
/* check to see if we need to return credits */
if( endpoint->rd_credits_lp >= mca_btl_openib_component.rd_win &&
if(endpoint->rd_credits_lp >= mca_btl_openib_component.rd_win &&
OPAL_THREAD_ADD32(&endpoint->sd_credits_lp, 1) == 1) {
mca_btl_openib_endpoint_send_credits_lp(endpoint);
}
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
}
#endif
count++;
break;
default:
BTL_ERROR(("Unhandled work completion opcode is %d", wc.opcode));
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL);

Просмотреть файл

@ -68,6 +68,50 @@ int mca_btl_openib_endpoint_qp_init_query(
/*
* post a send to the work queue
*/
static int btl_openib_acquire_send_resources(
mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_frag_t *frag, int prio, int *do_rdma)
{
if(OPAL_THREAD_ADD32(&endpoint->sd_wqe[prio], -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe[prio], 1);
opal_list_append(&endpoint->pending_frags[prio],
(opal_list_item_t *)frag);
return OMPI_ERR_OUT_OF_RESOURCE;
}
if(mca_btl_openib_component.use_srq) {
if(OPAL_THREAD_ADD32(&openib_btl->sd_tokens[prio], -1) < 0) {
OPAL_THREAD_ADD32(&openib_btl->sd_tokens[prio], 1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[prio], 1);
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
opal_list_append(&openib_btl->pending_frags[prio],
(opal_list_item_t *)frag);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
return OMPI_ERR_OUT_OF_RESOURCE;
}
} else {
if(BTL_OPENIB_HP_QP == prio) {
if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
} else {
*do_rdma = 1;
return OMPI_SUCCESS;
}
}
if(OPAL_THREAD_ADD32(&endpoint->sd_tokens[prio], -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_tokens[prio], 1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[prio], 1);
opal_list_append(&endpoint->pending_frags[prio],
(opal_list_item_t *)frag);
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
return OMPI_SUCCESS;
}
/* this function os called with endpoint->endpoint_lock held */
static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* openib_btl,
mca_btl_openib_endpoint_t * endpoint,
mca_btl_openib_frag_t * frag)
@ -77,40 +121,12 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
struct ibv_send_wr* bad_wr;
frag->sg_entry.addr = (unsigned long) frag->hdr;
if((frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY) &&
frag->size <= openib_btl->super.btl_eager_limit){
/* check for a send wqe */
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
opal_list_append(&endpoint->pending_frags_hp,
(opal_list_item_t *)frag);
return OMPI_SUCCESS;
}
/* check for rdma tocken */
if (OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,1);
/* check for a token */
if(!mca_btl_openib_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,1);
opal_list_append(&endpoint->pending_frags_hp,
(opal_list_item_t *)frag);
return OMPI_SUCCESS;
} else if( mca_btl_openib_component.use_srq &&
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_hp,-1) < 0) {
/* queue the request */
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_hp,1);
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
opal_list_append(&openib_btl->pending_frags_hp,
(opal_list_item_t *)frag);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
return OMPI_SUCCESS;
}
} else {
do_rdma = 1;
}
if(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY) {
assert(frag->size <= openib_btl->super.btl_eager_limit);
if(btl_openib_acquire_send_resources(openib_btl, endpoint, frag,
BTL_OPENIB_HP_QP, &do_rdma) == OMPI_ERR_OUT_OF_RESOURCE)
return MPI_SUCCESS;
frag->hdr->credits =
(endpoint->rd_credits_hp > 0) ? endpoint->rd_credits_hp : 0;
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -frag->hdr->credits);
@ -119,39 +135,15 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
-frag->hdr->rdma_credits);
ib_qp = endpoint->lcl_qp_hp;
} else {
/* check for a send wqe */
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
if(btl_openib_acquire_send_resources(openib_btl, endpoint, frag,
BTL_OPENIB_LP_QP, NULL) == OMPI_ERR_OUT_OF_RESOURCE)
return MPI_SUCCESS;
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
return OMPI_SUCCESS;
/* check for a token */
} else if(!mca_btl_openib_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,-1) < 0 ) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,1);
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
return OMPI_SUCCESS;
} else if(mca_btl_openib_component.use_srq &&
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_lp,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,1);
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_lp,1);
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
opal_list_append(&openib_btl->pending_frags_lp, (opal_list_item_t *)frag);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
return OMPI_SUCCESS;
/* queue the request */
} else {
frag->hdr->credits = (endpoint->rd_credits_lp > 0) ? endpoint->rd_credits_lp : 0;
frag->hdr->credits = (endpoint->rd_credits_lp > 0) ?
endpoint->rd_credits_lp : 0;
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -frag->hdr->credits);
ib_qp = endpoint->lcl_qp_lp;
}
}
frag->sg_entry.length =
frag->segment.seg_len + sizeof(mca_btl_openib_header_t) +
@ -188,6 +180,7 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
} else {
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
}
if(ibv_post_send(ib_qp,
&frag->wr_desc.sr_desc,
&bad_wr)) {
@ -196,17 +189,13 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
return OMPI_ERROR;
}
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_openib_component.use_srq) {
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 1);
MCA_BTL_OPENIB_POST_SRR_LOW(openib_btl, 1);
} else {
#endif
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 1);
MCA_BTL_OPENIB_ENDPOINT_POST_RR_LOW(endpoint, 1);
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
}
#endif
return OMPI_SUCCESS;
}
@ -231,8 +220,8 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->endpoint_retries = 0;
OBJ_CONSTRUCT(&endpoint->endpoint_lock, opal_mutex_t);
OBJ_CONSTRUCT(&endpoint->pending_send_frags, opal_list_t);
OBJ_CONSTRUCT(&endpoint->pending_frags_hp, opal_list_t);
OBJ_CONSTRUCT(&endpoint->pending_frags_lp, opal_list_t);
OBJ_CONSTRUCT(&endpoint->pending_frags[BTL_OPENIB_HP_QP], opal_list_t);
OBJ_CONSTRUCT(&endpoint->pending_frags[BTL_OPENIB_LP_QP], opal_list_t);
endpoint->lcl_qp_attr_hp = (struct ibv_qp_attr *) malloc(sizeof(struct ibv_qp_attr));
endpoint->lcl_qp_attr_lp = (struct ibv_qp_attr *) malloc(sizeof(struct ibv_qp_attr));
@ -243,8 +232,8 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->rd_posted_lp = 0;
/* number of available send wqes */
endpoint->sd_wqe_hp = mca_btl_openib_component.rd_num;
endpoint->sd_wqe_lp = mca_btl_openib_component.rd_num;
endpoint->sd_wqe[BTL_OPENIB_HP_QP] = mca_btl_openib_component.rd_num;
endpoint->sd_wqe[BTL_OPENIB_LP_QP] = mca_btl_openib_component.rd_num;
/* zero these out w/ initial posting, so that we start out w/
* zero credits to return to peer
@ -255,8 +244,8 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->sd_credits_lp = 0;
/* initialize the high and low priority tokens */
endpoint->sd_tokens_hp = mca_btl_openib_component.rd_num;
endpoint->sd_tokens_lp = mca_btl_openib_component.rd_num;
endpoint->sd_tokens[BTL_OPENIB_HP_QP] = mca_btl_openib_component.rd_num;
endpoint->sd_tokens[BTL_OPENIB_LP_QP] = mca_btl_openib_component.rd_num;
endpoint->get_tokens = mca_btl_openib_component.ib_qp_ous_rd_atom;
/* initialize RDMA eager related parts */
@ -1068,7 +1057,7 @@ static void mca_btl_openib_endpoint_credits_lp(
int32_t credits;
/* we don't acquire a wqe or token for credit message - so decrement */
OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_LP_QP],-1);
/* check to see if there are addditional credits to return */
if ((credits = OPAL_THREAD_ADD32(&endpoint->sd_credits_lp,-1)) > 0) {
@ -1138,7 +1127,7 @@ static void mca_btl_openib_endpoint_credits_hp(
int32_t credits;
/* we don't acquire a wqe or token for credit message - so decrement */
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,-1);
OPAL_THREAD_ADD32(&endpoint->sd_wqe[BTL_OPENIB_HP_QP],-1);
/* check to see if there are addditional credits to return */
if ((credits = OPAL_THREAD_ADD32(&endpoint->sd_credits_hp,-1)) > 0) {
@ -1234,7 +1223,7 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey;
rdma_hdr->rdma_start.pval = endpoint->eager_rdma_local.base.pval;
frag->segment.seg_len = sizeof(mca_btl_openib_eager_rdma_header_t);
if (mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag) !=
if (mca_btl_openib_endpoint_send(endpoint, frag) !=
OMPI_SUCCESS) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag);
BTL_ERROR(("Error sending RDMA buffer", strerror(errno)));

Просмотреть файл

@ -127,13 +127,7 @@ struct mca_btl_base_endpoint_t {
opal_list_t pending_send_frags;
/**< list of pending send frags for this endpotint */
opal_list_t pending_frags_hp;
/**< list of pending high priority frags */
opal_list_t pending_frags_lp;
/**< list of pending low priority frags */
opal_list_t pending_frags[2]; /**< list of pending frags */
mca_btl_openib_rem_info_t rem_info;
@ -149,8 +143,7 @@ struct mca_btl_base_endpoint_t {
struct ibv_qp_attr* lcl_qp_attr_lp;
/* Local QP attributes (Low and High) */
int32_t sd_tokens_hp; /**< number of high priority send tokens */
int32_t sd_tokens_lp; /**< number of low priority send tokens */
int32_t sd_tokens[2]; /**< number of send tokens */
int32_t get_tokens; /**< number of available get tokens */
int32_t rd_posted_hp; /**< number of high priority descriptors posted to the nic*/
@ -159,8 +152,7 @@ struct mca_btl_base_endpoint_t {
int32_t rd_credits_lp; /**< number of low priority credits to return to peer */
int32_t sd_credits_hp; /**< number of send wqe entries being used to return credits */
int32_t sd_credits_lp; /**< number of send wqe entries being used to return credits */
int32_t sd_wqe_hp; /**< number of available send wqe entries */
int32_t sd_wqe_lp; /**< number of available send wqe entries */
int32_t sd_wqe[2]; /**< number of available send wqe entries */
uint16_t subnet; /**< subnet of this endpoint*/

Просмотреть файл

@ -179,40 +179,6 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_control_t);
} while(0); \
}
#define MCA_BTL_IB_FRAG_PROGRESS(frag) \
do { \
switch(frag->wr_desc.sr_desc.opcode) { \
case IBV_WR_SEND: \
if(OMPI_SUCCESS != mca_btl_openib_endpoint_send(frag->endpoint, frag)) { \
BTL_ERROR(("error in posting pending send\n")); \
} \
break; \
case IBV_WR_RDMA_WRITE: \
if(OMPI_SUCCESS != mca_btl_openib_put((mca_btl_base_module_t*) openib_btl, \
frag->endpoint, \
(mca_btl_base_descriptor_t*) frag)) { \
BTL_ERROR(("error in posting pending rdma write\n")); \
} \
break; \
case IBV_WR_RDMA_READ: \
if(OMPI_SUCCESS != mca_btl_openib_get((mca_btl_base_module_t *) openib_btl, \
frag->endpoint, \
(mca_btl_base_descriptor_t*) frag)) { \
BTL_ERROR(("error in posting pending rdma read\n")); \
} \
break; \
default: \
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->wr_desc.sr_desc.opcode)); \
break; \
} \
} while (0)
struct mca_btl_openib_module_t;
#if defined(c_plusplus) || defined(__cplusplus)