flow control
This commit was SVN r8039.
Этот коммит содержится в:
родитель
bcf8744bf6
Коммит
2d9c509add
@ -128,10 +128,10 @@ int mca_btl_mvapi_add_procs(
|
|||||||
if( 0 == mvapi_btl->num_peers ) {
|
if( 0 == mvapi_btl->num_peers ) {
|
||||||
mvapi_btl->num_peers += nprocs;
|
mvapi_btl->num_peers += nprocs;
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
if(mca_btl_mvapi_component.use_srq) {
|
||||||
mvapi_btl->rd_buf_max = mca_btl_mvapi_component.ib_rr_buf_max + log2(nprocs) * mca_btl_mvapi_component.rd_per_peer;
|
mvapi_btl->rd_num = mca_btl_mvapi_component.rd_num + log2(nprocs) * mca_btl_mvapi_component.rd_per_peer;
|
||||||
free(mvapi_btl->rr_desc_post);
|
free(mvapi_btl->rr_desc_post);
|
||||||
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_buf_max * sizeof(VAPI_rr_desc_t)));
|
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_num * sizeof(VAPI_rr_desc_t)));
|
||||||
mvapi_btl->rd_buf_min = mvapi_btl->rd_buf_max / 2;
|
mvapi_btl->rd_low = mvapi_btl->rd_num / 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -506,7 +506,7 @@ int mca_btl_mvapi_finalize(struct mca_btl_base_module_t* btl)
|
|||||||
mca_btl_mvapi_module_t* mvapi_btl;
|
mca_btl_mvapi_module_t* mvapi_btl;
|
||||||
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
if(mvapi_btl->send_free_eager.fl_num_allocated !=
|
if(mvapi_btl->send_free_eager.fl_num_allocated !=
|
||||||
mvapi_btl->send_free_eager.super.opal_list_length){
|
mvapi_btl->send_free_eager.super.opal_list_length){
|
||||||
opal_output(0, "btl ib send_free_eager frags: %d allocated %d returned \n",
|
opal_output(0, "btl ib send_free_eager frags: %d allocated %d returned \n",
|
||||||
@ -579,30 +579,33 @@ int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
|
|||||||
frag->sr_desc.opcode = VAPI_RDMA_WRITE;
|
frag->sr_desc.opcode = VAPI_RDMA_WRITE;
|
||||||
/* atomically test and acquire a token */
|
/* atomically test and acquire a token */
|
||||||
if(!mca_btl_mvapi_component.use_srq &&
|
if(!mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,-1) < 0) {
|
||||||
BTL_VERBOSE(("Queing because no rdma write tokens \n"));
|
|
||||||
BTL_MVAPI_INSERT_PENDING(frag, endpoint->pending_frags_lp,
|
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||||
endpoint->wr_sq_tokens_lp, endpoint->endpoint_lock, rc);
|
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t*)frag);
|
||||||
|
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||||
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,1);
|
||||||
rc = OMPI_SUCCESS;
|
rc = OMPI_SUCCESS;
|
||||||
|
|
||||||
} else if(mca_btl_mvapi_component.use_srq &&
|
} else if(mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,-1) < 0) {
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&mvapi_btl->btl_lock);
|
||||||
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
|
OPAL_THREAD_UNLOCK(&mvapi_btl->btl_lock);
|
||||||
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,1);
|
||||||
rc = OMPI_SUCCESS;
|
rc = OMPI_SUCCESS;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_lp;
|
||||||
|
|
||||||
|
|
||||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
|
|
||||||
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
|
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
|
||||||
frag->sr_desc.r_key = frag->base.des_dst->seg_key.key32[0];
|
frag->sr_desc.r_key = frag->base.des_dst->seg_key.key32[0];
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
|
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
|
||||||
frag->sg_entry.len = frag->base.des_src->seg_len;
|
frag->sg_entry.len = frag->base.des_src->seg_len;
|
||||||
|
|
||||||
frag->ret = VAPI_post_sr(mvapi_btl->nic,
|
frag->ret = VAPI_post_sr(mvapi_btl->nic,
|
||||||
endpoint->lcl_qp_hndl_low,
|
endpoint->lcl_qp_hndl_lp,
|
||||||
&frag->sr_desc);
|
&frag->sr_desc);
|
||||||
if(VAPI_OK != frag->ret){
|
if(VAPI_OK != frag->ret){
|
||||||
rc = OMPI_ERROR;
|
rc = OMPI_ERROR;
|
||||||
@ -640,27 +643,41 @@ int mca_btl_mvapi_get( mca_btl_base_module_t* btl,
|
|||||||
frag->endpoint = endpoint;
|
frag->endpoint = endpoint;
|
||||||
/* atomically test and acquire a token */
|
/* atomically test and acquire a token */
|
||||||
if(!mca_btl_mvapi_component.use_srq &&
|
if(!mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,-1) < 0) {
|
||||||
BTL_VERBOSE(("Queing because no rdma write tokens \n"));
|
|
||||||
BTL_MVAPI_INSERT_PENDING(frag, endpoint->pending_frags_lp,
|
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||||
endpoint->wr_sq_tokens_lp, endpoint->endpoint_lock, rc);
|
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t*)frag);
|
||||||
|
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||||
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,1);
|
||||||
rc = OMPI_SUCCESS;
|
rc = OMPI_SUCCESS;
|
||||||
|
|
||||||
} else if(mca_btl_mvapi_component.use_srq &&
|
} else if(mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,-1) < 0) {
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&mvapi_btl->btl_lock);
|
||||||
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
|
OPAL_THREAD_UNLOCK(&mvapi_btl->btl_lock);
|
||||||
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,1);
|
||||||
rc = OMPI_SUCCESS;
|
rc = OMPI_SUCCESS;
|
||||||
|
|
||||||
} else {
|
} else if(OPAL_THREAD_ADD32(&endpoint->get_tokens,-1) < 0) {
|
||||||
|
|
||||||
|
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||||
|
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t*)frag);
|
||||||
|
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||||
|
OPAL_THREAD_ADD32(&endpoint->get_tokens,1);
|
||||||
|
rc = OMPI_SUCCESS;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
|
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_lp;
|
||||||
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
|
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
|
||||||
frag->sr_desc.r_key = frag->base.des_src->seg_key.key32[0];
|
frag->sr_desc.r_key = frag->base.des_src->seg_key.key32[0];
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
|
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_dst->seg_addr.pval;
|
||||||
frag->sg_entry.len = frag->base.des_dst->seg_len;
|
frag->sg_entry.len = frag->base.des_dst->seg_len;
|
||||||
|
|
||||||
frag->ret = VAPI_post_sr(mvapi_btl->nic,
|
frag->ret = VAPI_post_sr(mvapi_btl->nic,
|
||||||
endpoint->lcl_qp_hndl_low,
|
endpoint->lcl_qp_hndl_lp,
|
||||||
&frag->sr_desc);
|
&frag->sr_desc);
|
||||||
if(VAPI_OK != frag->ret){
|
if(VAPI_OK != frag->ret){
|
||||||
rc = OMPI_ERROR;
|
rc = OMPI_ERROR;
|
||||||
@ -720,7 +737,7 @@ static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
|
|||||||
|
|
||||||
case VAPI_SRQ_LIMIT_REACHED:
|
case VAPI_SRQ_LIMIT_REACHED:
|
||||||
{
|
{
|
||||||
int i;
|
size_t i;
|
||||||
BTL_ERROR(("SRQ limit is reached, posting more buffers %s\n", VAPI_event_record_sym(event_p->type)));
|
BTL_ERROR(("SRQ limit is reached, posting more buffers %s\n", VAPI_event_record_sym(event_p->type)));
|
||||||
for(i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++) {
|
for(i = 0; i < mca_btl_mvapi_component.ib_num_btls; i++) {
|
||||||
mca_btl_mvapi_module_t* mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
mca_btl_mvapi_module_t* mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
||||||
@ -731,7 +748,7 @@ static void async_event_handler(VAPI_hca_hndl_t hca_hndl,
|
|||||||
}
|
}
|
||||||
|
|
||||||
case VAPI_RECEIVE_QUEUE_DRAINED: {
|
case VAPI_RECEIVE_QUEUE_DRAINED: {
|
||||||
|
fprintf(stderr, "VAPI_RECEIVE_QUEUE_DRAINEDD\n");
|
||||||
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -763,8 +780,8 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
if(mca_btl_mvapi_component.use_srq) {
|
||||||
mvapi_btl->srr_posted_high = 0;
|
mvapi_btl->srd_posted_hp = 0;
|
||||||
mvapi_btl->srr_posted_low = 0;
|
mvapi_btl->srd_posted_lp = 0;
|
||||||
srq_attr.pd_hndl = mvapi_btl->ptag;
|
srq_attr.pd_hndl = mvapi_btl->ptag;
|
||||||
srq_attr.max_outs_wr = mca_btl_mvapi_component.ib_wq_size;
|
srq_attr.max_outs_wr = mca_btl_mvapi_component.ib_wq_size;
|
||||||
srq_attr.max_sentries = mca_btl_mvapi_component.ib_sg_list_size;
|
srq_attr.max_sentries = mca_btl_mvapi_component.ib_sg_list_size;
|
||||||
@ -772,7 +789,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
srq_attr_mod.srq_limit = 16;/* mca_btl_mvapi_component.ib_wq_size; */
|
srq_attr_mod.srq_limit = 16;/* mca_btl_mvapi_component.ib_wq_size; */
|
||||||
ret = VAPI_create_srq(mvapi_btl->nic,
|
ret = VAPI_create_srq(mvapi_btl->nic,
|
||||||
&srq_attr,
|
&srq_attr,
|
||||||
&mvapi_btl->srq_hndl_high,
|
&mvapi_btl->srq_hndl_hp,
|
||||||
&srq_attr_out);
|
&srq_attr_out);
|
||||||
if(ret != VAPI_OK) {
|
if(ret != VAPI_OK) {
|
||||||
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
||||||
@ -785,7 +802,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
ret = VAPI_modify_srq
|
ret = VAPI_modify_srq
|
||||||
(
|
(
|
||||||
mvapi_btl->nic,
|
mvapi_btl->nic,
|
||||||
mvapi_btl->srq_hndl_high,
|
mvapi_btl->srq_hndl_hp,
|
||||||
&srq_attr_mod,
|
&srq_attr_mod,
|
||||||
srq_attr_mask,
|
srq_attr_mask,
|
||||||
&max_outs_wr
|
&max_outs_wr
|
||||||
@ -798,7 +815,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
|
|
||||||
ret = VAPI_create_srq(mvapi_btl->nic,
|
ret = VAPI_create_srq(mvapi_btl->nic,
|
||||||
&srq_attr,
|
&srq_attr,
|
||||||
&mvapi_btl->srq_hndl_low,
|
&mvapi_btl->srq_hndl_lp,
|
||||||
&srq_attr_out);
|
&srq_attr_out);
|
||||||
if(ret != VAPI_OK) {
|
if(ret != VAPI_OK) {
|
||||||
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
BTL_ERROR(("error in VAPI_create_srq: %s", VAPI_strerror(ret)));
|
||||||
@ -811,7 +828,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
ret = VAPI_modify_srq
|
ret = VAPI_modify_srq
|
||||||
(
|
(
|
||||||
mvapi_btl->nic,
|
mvapi_btl->nic,
|
||||||
mvapi_btl->srq_hndl_low,
|
mvapi_btl->srq_hndl_lp,
|
||||||
&srq_attr_mod,
|
&srq_attr_mod,
|
||||||
srq_attr_mask,
|
srq_attr_mask,
|
||||||
&max_outs_wr
|
&max_outs_wr
|
||||||
@ -824,11 +841,11 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
mvapi_btl->srq_hndl_high = VAPI_INVAL_SRQ_HNDL;
|
mvapi_btl->srq_hndl_hp = VAPI_INVAL_SRQ_HNDL;
|
||||||
mvapi_btl->srq_hndl_low = VAPI_INVAL_SRQ_HNDL;
|
mvapi_btl->srq_hndl_lp = VAPI_INVAL_SRQ_HNDL;
|
||||||
}
|
}
|
||||||
ret = VAPI_create_cq(mvapi_btl->nic, mca_btl_mvapi_component.ib_cq_size,
|
ret = VAPI_create_cq(mvapi_btl->nic, mca_btl_mvapi_component.ib_cq_size,
|
||||||
&mvapi_btl->cq_hndl_low, &cqe_cnt);
|
&mvapi_btl->cq_hndl_lp, &cqe_cnt);
|
||||||
|
|
||||||
|
|
||||||
if( VAPI_OK != ret) {
|
if( VAPI_OK != ret) {
|
||||||
@ -837,7 +854,7 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret = VAPI_create_cq(mvapi_btl->nic, mca_btl_mvapi_component.ib_cq_size,
|
ret = VAPI_create_cq(mvapi_btl->nic, mca_btl_mvapi_component.ib_cq_size,
|
||||||
&mvapi_btl->cq_hndl_high, &cqe_cnt);
|
&mvapi_btl->cq_hndl_hp, &cqe_cnt);
|
||||||
|
|
||||||
|
|
||||||
if( VAPI_OK != ret) {
|
if( VAPI_OK != ret) {
|
||||||
|
@ -86,14 +86,13 @@ struct mca_btl_mvapi_component_t {
|
|||||||
|
|
||||||
char* ib_mpool_name;
|
char* ib_mpool_name;
|
||||||
/**< name of ib memory pool */
|
/**< name of ib memory pool */
|
||||||
|
|
||||||
uint32_t ib_rr_buf_max;
|
int32_t rd_num; /**< the number of receive descriptors to post to each queue pair */
|
||||||
/**< the maximum number of posted rr */
|
int32_t rd_low; /**< low water mark to reach before posting additional receive descriptors */
|
||||||
|
int32_t rd_win; /**< ack credits when window size exceeded */
|
||||||
uint32_t ib_rr_buf_min;
|
int32_t rd_rsv; /**< descriptors held in reserve for control messages */
|
||||||
/**< the minimum number of posted rr */
|
|
||||||
|
|
||||||
uint32_t rd_per_peer;
|
int32_t rd_per_peer;
|
||||||
/**< the number of recv desc posted per log(peer) in SRQ mode */
|
/**< the number of recv desc posted per log(peer) in SRQ mode */
|
||||||
|
|
||||||
size_t eager_limit;
|
size_t eager_limit;
|
||||||
@ -144,8 +143,8 @@ struct mca_btl_mvapi_module_t {
|
|||||||
VAPI_hca_hndl_t nic; /**< NIC handle */
|
VAPI_hca_hndl_t nic; /**< NIC handle */
|
||||||
VAPI_pd_hndl_t ptag; /**< Protection Domain tag */
|
VAPI_pd_hndl_t ptag; /**< Protection Domain tag */
|
||||||
|
|
||||||
VAPI_cq_hndl_t cq_hndl_high; /**< High Priority Completion Queue handle */
|
VAPI_cq_hndl_t cq_hndl_hp; /**< High Priority Completion Queue handle */
|
||||||
VAPI_cq_hndl_t cq_hndl_low; /**< Low Priority Completion Queue handle */
|
VAPI_cq_hndl_t cq_hndl_lp; /**< Low Priority Completion Queue handle */
|
||||||
|
|
||||||
EVAPI_async_handler_hndl_t async_handler;
|
EVAPI_async_handler_hndl_t async_handler;
|
||||||
/**< Async event handler used to detect weird/unknown events */
|
/**< Async event handler used to detect weird/unknown events */
|
||||||
@ -163,32 +162,26 @@ struct mca_btl_mvapi_module_t {
|
|||||||
opal_list_t repost; /**< list of buffers to repost */
|
opal_list_t repost; /**< list of buffers to repost */
|
||||||
opal_mutex_t ib_lock; /**< module level lock */
|
opal_mutex_t ib_lock; /**< module level lock */
|
||||||
|
|
||||||
VAPI_rr_desc_t* rr_desc_post;
|
VAPI_rr_desc_t* rr_desc_post; /**< an array to allow posting of rr in one swoop */
|
||||||
VAPI_srq_hndl_t srq_hndl_high; /**< A high priority shared receive queue
|
VAPI_srq_hndl_t srq_hndl_hp; /**< A high priority shared receive queue
|
||||||
runtime optional, can also use a receive queue
|
runtime optional, can also use a receive queue
|
||||||
per queue pair.. */
|
per queue pair.. */
|
||||||
VAPI_srq_hndl_t srq_hndl_low; /**< A low priority shared receive queue */
|
VAPI_srq_hndl_t srq_hndl_lp; /**< A low priority shared receive queue */
|
||||||
|
|
||||||
uint32_t srr_posted_high; /**< number of high priority shared rr posted to the nic*/
|
|
||||||
uint32_t srr_posted_low; /**< number of low priority shared rr posted to the nic*/
|
|
||||||
|
|
||||||
/**< an array to allow posting of rr in one swoop */
|
|
||||||
size_t ib_inline_max; /**< max size of inline send*/
|
size_t ib_inline_max; /**< max size of inline send*/
|
||||||
|
int32_t num_peers;
|
||||||
uint32_t num_peers;
|
|
||||||
uint32_t rd_buf_max;
|
|
||||||
uint32_t rd_buf_min;
|
|
||||||
|
|
||||||
int32_t wr_sq_tokens_hp;
|
|
||||||
/**< number of high priority frags that can be outstanding (down counter) */
|
|
||||||
int32_t wr_sq_tokens_lp;
|
|
||||||
/**< number of low priority frags that can be outstanding (down counter) */
|
|
||||||
|
|
||||||
opal_list_t pending_frags_hp;
|
|
||||||
/**< list of pending high priority frags */
|
|
||||||
|
|
||||||
opal_list_t pending_frags_lp;
|
int32_t srd_posted_hp; /**< number of high priority shared receive descriptors posted to the nic*/
|
||||||
/**< list of pending low priority frags */
|
int32_t srd_posted_lp; /**< number of low priority shared receive descriptors posted to the nic*/
|
||||||
|
|
||||||
|
int32_t rd_num; /**< number of receive descriptors to post to srq */
|
||||||
|
int32_t rd_low; /**< low water mark before reposting descriptors to srq */
|
||||||
|
|
||||||
|
int32_t sd_tokens_hp; /**< number of send tokens available on high priority srq */
|
||||||
|
int32_t sd_tokens_lp; /**< number of send tokens available on low priority srq */
|
||||||
|
|
||||||
|
opal_list_t pending_frags_hp; /**< list of pending high priority frags */
|
||||||
|
opal_list_t pending_frags_lp; /**< list of pending low priority frags */
|
||||||
|
|
||||||
|
|
||||||
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
|
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
|
||||||
@ -200,15 +193,15 @@ struct mca_btl_mvapi_module_t {
|
|||||||
{ \
|
{ \
|
||||||
do { \
|
do { \
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
||||||
if(mvapi_btl->srr_posted_high <= mvapi_btl->rd_buf_min+additional && \
|
if(mvapi_btl->srd_posted_hp <= mvapi_btl->rd_low+additional && \
|
||||||
mvapi_btl->srr_posted_high < mvapi_btl->rd_buf_max){ \
|
mvapi_btl->srd_posted_hp < mvapi_btl->rd_num){ \
|
||||||
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_buf_max - \
|
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_num - \
|
||||||
mvapi_btl->srr_posted_high, \
|
mvapi_btl->srd_posted_hp, \
|
||||||
mvapi_btl, \
|
mvapi_btl, \
|
||||||
&mvapi_btl->recv_free_eager, \
|
&mvapi_btl->recv_free_eager, \
|
||||||
&mvapi_btl->srr_posted_high, \
|
&mvapi_btl->srd_posted_hp, \
|
||||||
mvapi_btl->nic, \
|
mvapi_btl->nic, \
|
||||||
mvapi_btl->srq_hndl_high); \
|
mvapi_btl->srq_hndl_hp); \
|
||||||
} \
|
} \
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
||||||
}while(0);\
|
}while(0);\
|
||||||
@ -219,15 +212,15 @@ struct mca_btl_mvapi_module_t {
|
|||||||
{ \
|
{ \
|
||||||
do { \
|
do { \
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
||||||
if(mvapi_btl->srr_posted_low <= mvapi_btl->rd_buf_min+additional && \
|
if(mvapi_btl->srd_posted_lp <= mvapi_btl->rd_low+additional && \
|
||||||
mvapi_btl->srr_posted_low < mvapi_btl->rd_buf_max){ \
|
mvapi_btl->srd_posted_lp < mvapi_btl->rd_num){ \
|
||||||
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_buf_max - \
|
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_num - \
|
||||||
mvapi_btl->srr_posted_low, \
|
mvapi_btl->srd_posted_lp, \
|
||||||
mvapi_btl, \
|
mvapi_btl, \
|
||||||
&mvapi_btl->recv_free_max, \
|
&mvapi_btl->recv_free_max, \
|
||||||
&mvapi_btl->srr_posted_low, \
|
&mvapi_btl->srd_posted_lp, \
|
||||||
mvapi_btl->nic, \
|
mvapi_btl->nic, \
|
||||||
mvapi_btl->srq_hndl_low); \
|
mvapi_btl->srq_hndl_lp); \
|
||||||
} \
|
} \
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
||||||
} while(0); \
|
} while(0); \
|
||||||
@ -237,12 +230,12 @@ struct mca_btl_mvapi_module_t {
|
|||||||
#define MCA_BTL_MVAPI_POST_SRR_SUB(cnt, \
|
#define MCA_BTL_MVAPI_POST_SRR_SUB(cnt, \
|
||||||
mvapi_btl, \
|
mvapi_btl, \
|
||||||
frag_list, \
|
frag_list, \
|
||||||
srr_posted, \
|
srd_posted, \
|
||||||
nic, \
|
nic, \
|
||||||
srq_hndl) \
|
srq_hndl) \
|
||||||
{\
|
{\
|
||||||
do { \
|
do { \
|
||||||
uint32_t i; \
|
int32_t i; \
|
||||||
VAPI_ret_t ret; \
|
VAPI_ret_t ret; \
|
||||||
uint32_t rwqe_posted = 0; \
|
uint32_t rwqe_posted = 0; \
|
||||||
int rc; \
|
int rc; \
|
||||||
@ -268,7 +261,7 @@ struct mca_btl_mvapi_module_t {
|
|||||||
} else if(rwqe_posted < 1) { \
|
} else if(rwqe_posted < 1) { \
|
||||||
BTL_ERROR(("error posting receive descriptors to shared receive queue, number of entries posted is %d", rwqe_posted)); \
|
BTL_ERROR(("error posting receive descriptors to shared receive queue, number of entries posted is %d", rwqe_posted)); \
|
||||||
} else {\
|
} else {\
|
||||||
OPAL_THREAD_ADD32(srr_posted, cnt); \
|
OPAL_THREAD_ADD32(srd_posted, cnt); \
|
||||||
}\
|
}\
|
||||||
} while(0);\
|
} while(0);\
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include "ompi/include/constants.h"
|
#include "ompi/include/constants.h"
|
||||||
#include "opal/event/event.h"
|
#include "opal/event/event.h"
|
||||||
|
#include "opal/include/sys/timer.h"
|
||||||
#include "opal/util/if.h"
|
#include "opal/util/if.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
@ -42,6 +43,8 @@
|
|||||||
#include "mca/pml/base/pml_base_module_exchange.h"
|
#include "mca/pml/base/pml_base_module_exchange.h"
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
mca_btl_mvapi_component_t mca_btl_mvapi_component = {
|
mca_btl_mvapi_component_t mca_btl_mvapi_component = {
|
||||||
{
|
{
|
||||||
/* First, the mca_base_component_t struct containing meta information
|
/* First, the mca_base_component_t struct containing meta information
|
||||||
@ -130,15 +133,11 @@ int mca_btl_mvapi_component_open(void)
|
|||||||
mca_btl_mvapi_param_register_int ("free_list_num", "intial size of free lists",
|
mca_btl_mvapi_param_register_int ("free_list_num", "intial size of free lists",
|
||||||
8, &mca_btl_mvapi_component.ib_free_list_num);
|
8, &mca_btl_mvapi_component.ib_free_list_num);
|
||||||
mca_btl_mvapi_param_register_int ("free_list_max", "maximum size of free lists",
|
mca_btl_mvapi_param_register_int ("free_list_max", "maximum size of free lists",
|
||||||
1024, &mca_btl_mvapi_component.ib_free_list_max);
|
-1, &mca_btl_mvapi_component.ib_free_list_max);
|
||||||
mca_btl_mvapi_param_register_int ("free_list_inc", "increment size of free lists",
|
mca_btl_mvapi_param_register_int ("free_list_inc", "increment size of free lists",
|
||||||
32, &mca_btl_mvapi_component.ib_free_list_inc);
|
32, &mca_btl_mvapi_component.ib_free_list_inc);
|
||||||
mca_btl_mvapi_param_register_string("mpool", "name of the memory pool to be used",
|
mca_btl_mvapi_param_register_string("mpool", "name of the memory pool to be used",
|
||||||
"mvapi", &mca_btl_mvapi_component.ib_mpool_name);
|
"mvapi", &mca_btl_mvapi_component.ib_mpool_name);
|
||||||
mca_btl_mvapi_param_register_int("rd_max", "maximum number of receive descriptors to post to a QP",
|
|
||||||
16, (int*) &mca_btl_mvapi_component.ib_rr_buf_max);
|
|
||||||
mca_btl_mvapi_param_register_int("rd_min", "minimum number of receive descriptors before reposting occurs",
|
|
||||||
8, (int*) &mca_btl_mvapi_component.ib_rr_buf_min);
|
|
||||||
mca_btl_mvapi_param_register_int("reg_mru_len", "length of the registration cache most recently used list",
|
mca_btl_mvapi_param_register_int("reg_mru_len", "length of the registration cache most recently used list",
|
||||||
16, (int*) &mca_btl_mvapi_component.reg_mru_len);
|
16, (int*) &mca_btl_mvapi_component.reg_mru_len);
|
||||||
mca_btl_mvapi_param_register_int("use_srq", "if 1 use the IB shared receive queue to post receive descriptors",
|
mca_btl_mvapi_param_register_int("use_srq", "if 1 use the IB shared receive queue to post receive descriptors",
|
||||||
@ -175,10 +174,16 @@ int mca_btl_mvapi_component_open(void)
|
|||||||
mca_btl_mvapi_param_register_int("ib_src_path_bits", "IB source path bits",
|
mca_btl_mvapi_param_register_int("ib_src_path_bits", "IB source path bits",
|
||||||
0, (int*) &mca_btl_mvapi_component.ib_src_path_bits);
|
0, (int*) &mca_btl_mvapi_component.ib_src_path_bits);
|
||||||
|
|
||||||
|
mca_btl_mvapi_param_register_int("rd_num", "number of receive descriptors to post to a QP",
|
||||||
|
16, (int*) &mca_btl_mvapi_component.rd_num);
|
||||||
|
mca_btl_mvapi_param_register_int("rd_low", "low water mark before reposting occurs",
|
||||||
|
12, (int*) &mca_btl_mvapi_component.rd_low);
|
||||||
|
mca_btl_mvapi_param_register_int("rd_win", "window size at which generate explicity credit message",
|
||||||
|
8, (int*) &mca_btl_mvapi_component.rd_win);
|
||||||
|
mca_btl_mvapi_component.rd_rsv = ((mca_btl_mvapi_component.rd_num<<1)-1) / mca_btl_mvapi_component.rd_win;
|
||||||
|
|
||||||
mca_btl_mvapi_param_register_int("rd_per_peer", "receive descriptors posted per peer, SRQ mode only",
|
mca_btl_mvapi_param_register_int("rd_per_peer", "receive descriptors posted per peer, SRQ mode only",
|
||||||
16, (int*) &mca_btl_mvapi_component.rd_per_peer);
|
16, (int*) &mca_btl_mvapi_component.rd_per_peer);
|
||||||
mca_btl_mvapi_param_register_int ("exclusivity", "BTL exclusivity",
|
mca_btl_mvapi_param_register_int ("exclusivity", "BTL exclusivity",
|
||||||
MCA_BTL_EXCLUSIVITY_DEFAULT, (int*) &mca_btl_mvapi_module.super.btl_exclusivity);
|
MCA_BTL_EXCLUSIVITY_DEFAULT, (int*) &mca_btl_mvapi_module.super.btl_exclusivity);
|
||||||
mca_btl_mvapi_param_register_int ("eager_limit", "eager send limit",
|
mca_btl_mvapi_param_register_int ("eager_limit", "eager send limit",
|
||||||
@ -256,6 +261,27 @@ mca_btl_mvapi_modex_send(void)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Callback function on control message.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_mvapi_control(
|
||||||
|
struct mca_btl_base_module_t* btl,
|
||||||
|
mca_btl_base_tag_t tag,
|
||||||
|
mca_btl_base_descriptor_t* descriptor,
|
||||||
|
void* cbdata)
|
||||||
|
{
|
||||||
|
/* dont return credits used for control messages */
|
||||||
|
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*)descriptor;
|
||||||
|
mca_btl_mvapi_endpoint_t* endpoint = frag->endpoint;
|
||||||
|
if(frag->size == mca_btl_mvapi_component.eager_limit) {
|
||||||
|
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -1);
|
||||||
|
} else {
|
||||||
|
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IB component initialization:
|
* IB component initialization:
|
||||||
@ -284,9 +310,12 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
mca_btl_base_selected_module_t* ib_selected;
|
mca_btl_base_selected_module_t* ib_selected;
|
||||||
opal_list_item_t* item;
|
opal_list_item_t* item;
|
||||||
|
|
||||||
|
#if 0
|
||||||
/* ugly HACK!! */
|
/* ugly HACK!! */
|
||||||
/* mallopt(M_TRIM_THRESHOLD, -1); */
|
mallopt(M_TRIM_THRESHOLD, -1);
|
||||||
/* mallopt(M_MMAP_MAX, 0); */
|
mallopt(M_MMAP_MAX, 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* initialization */
|
/* initialization */
|
||||||
*num_btl_modules = 0;
|
*num_btl_modules = 0;
|
||||||
|
|
||||||
@ -356,6 +385,8 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
mvapi_btl->port_id = (IB_port_t) j;
|
mvapi_btl->port_id = (IB_port_t) j;
|
||||||
mvapi_btl->port = hca_port;
|
mvapi_btl->port = hca_port;
|
||||||
mvapi_btl->port_info.subnet = hca_port.sm_lid;
|
mvapi_btl->port_info.subnet = hca_port.sm_lid;
|
||||||
|
mvapi_btl->ib_reg[MCA_BTL_TAG_BTL].cbfunc = mca_btl_mvapi_control;
|
||||||
|
mvapi_btl->ib_reg[MCA_BTL_TAG_BTL].cbdata = NULL;
|
||||||
|
|
||||||
opal_list_append(&btl_list, (opal_list_item_t*) ib_selected);
|
opal_list_append(&btl_list, (opal_list_item_t*) ib_selected);
|
||||||
mca_btl_mvapi_component.ib_num_btls ++;
|
mca_btl_mvapi_component.ib_num_btls ++;
|
||||||
@ -397,11 +428,10 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
free(mvapi_btl);
|
free(mvapi_btl);
|
||||||
|
|
||||||
mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
||||||
mvapi_btl->rd_buf_max = mca_btl_mvapi_component.ib_rr_buf_max;
|
mvapi_btl->rd_num = mca_btl_mvapi_component.rd_num + mca_btl_mvapi_component.rd_rsv;
|
||||||
mvapi_btl->rd_buf_min = mca_btl_mvapi_component.ib_rr_buf_min;
|
mvapi_btl->rd_low = mca_btl_mvapi_component.rd_low;
|
||||||
mvapi_btl->num_peers = 0;
|
mvapi_btl->num_peers = 0;
|
||||||
mvapi_btl->wr_sq_tokens_hp =
|
mvapi_btl->sd_tokens_hp = mvapi_btl->sd_tokens_lp = mca_btl_mvapi_component.max_wr_sq_tokens;
|
||||||
mvapi_btl->wr_sq_tokens_lp = mca_btl_mvapi_component.max_total_wr_sq_tokens;
|
|
||||||
|
|
||||||
/* Initialize module state */
|
/* Initialize module state */
|
||||||
|
|
||||||
@ -450,7 +480,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
ompi_free_list_init(&mvapi_btl->send_free_eager,
|
ompi_free_list_init(&mvapi_btl->send_free_eager,
|
||||||
length,
|
length,
|
||||||
OBJ_CLASS(mca_btl_mvapi_send_frag_eager_t),
|
OBJ_CLASS(mca_btl_mvapi_send_frag_eager_t),
|
||||||
2*mvapi_btl->rd_buf_max,
|
2*mvapi_btl->rd_num,
|
||||||
mca_btl_mvapi_component.ib_free_list_max,
|
mca_btl_mvapi_component.ib_free_list_max,
|
||||||
mca_btl_mvapi_component.ib_free_list_inc,
|
mca_btl_mvapi_component.ib_free_list_inc,
|
||||||
mvapi_btl->super.btl_mpool);
|
mvapi_btl->super.btl_mpool);
|
||||||
@ -458,7 +488,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
ompi_free_list_init(&mvapi_btl->recv_free_eager,
|
ompi_free_list_init(&mvapi_btl->recv_free_eager,
|
||||||
length,
|
length,
|
||||||
OBJ_CLASS(mca_btl_mvapi_recv_frag_eager_t),
|
OBJ_CLASS(mca_btl_mvapi_recv_frag_eager_t),
|
||||||
2*mvapi_btl->rd_buf_max,
|
2*mvapi_btl->rd_num,
|
||||||
mca_btl_mvapi_component.ib_free_list_max,
|
mca_btl_mvapi_component.ib_free_list_max,
|
||||||
mca_btl_mvapi_component.ib_free_list_inc,
|
mca_btl_mvapi_component.ib_free_list_inc,
|
||||||
mvapi_btl->super.btl_mpool);
|
mvapi_btl->super.btl_mpool);
|
||||||
@ -474,7 +504,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
ompi_free_list_init(&mvapi_btl->send_free_max,
|
ompi_free_list_init(&mvapi_btl->send_free_max,
|
||||||
length,
|
length,
|
||||||
OBJ_CLASS(mca_btl_mvapi_send_frag_max_t),
|
OBJ_CLASS(mca_btl_mvapi_send_frag_max_t),
|
||||||
2*mvapi_btl->rd_buf_max,
|
2*mvapi_btl->rd_num,
|
||||||
mca_btl_mvapi_component.ib_free_list_max,
|
mca_btl_mvapi_component.ib_free_list_max,
|
||||||
mca_btl_mvapi_component.ib_free_list_inc,
|
mca_btl_mvapi_component.ib_free_list_inc,
|
||||||
mvapi_btl->super.btl_mpool);
|
mvapi_btl->super.btl_mpool);
|
||||||
@ -485,7 +515,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
ompi_free_list_init (&mvapi_btl->recv_free_max,
|
ompi_free_list_init (&mvapi_btl->recv_free_max,
|
||||||
length,
|
length,
|
||||||
OBJ_CLASS (mca_btl_mvapi_recv_frag_max_t),
|
OBJ_CLASS (mca_btl_mvapi_recv_frag_max_t),
|
||||||
2*mvapi_btl->rd_buf_max,
|
2*mvapi_btl->rd_num,
|
||||||
mca_btl_mvapi_component.ib_free_list_max,
|
mca_btl_mvapi_component.ib_free_list_max,
|
||||||
mca_btl_mvapi_component.ib_free_list_inc,
|
mca_btl_mvapi_component.ib_free_list_inc,
|
||||||
mvapi_btl->super.btl_mpool);
|
mvapi_btl->super.btl_mpool);
|
||||||
@ -509,7 +539,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
|||||||
|
|
||||||
|
|
||||||
/* Initialize the rr_desc_post array for posting of rr*/
|
/* Initialize the rr_desc_post array for posting of rr*/
|
||||||
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_buf_max * sizeof(VAPI_rr_desc_t)));
|
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_num * sizeof(VAPI_rr_desc_t)));
|
||||||
btls[i] = &mvapi_btl->super;
|
btls[i] = &mvapi_btl->super;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -532,6 +562,8 @@ int mca_btl_mvapi_component_progress()
|
|||||||
{
|
{
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
int32_t credits;
|
||||||
|
|
||||||
mca_btl_mvapi_frag_t* frag;
|
mca_btl_mvapi_frag_t* frag;
|
||||||
mca_btl_mvapi_endpoint_t* endpoint;
|
mca_btl_mvapi_endpoint_t* endpoint;
|
||||||
/* Poll for completions */
|
/* Poll for completions */
|
||||||
@ -545,7 +577,7 @@ int mca_btl_mvapi_component_progress()
|
|||||||
* we will check the high priority and process them until there are none left.
|
* we will check the high priority and process them until there are none left.
|
||||||
* note that low priority messages are only processed one per progress call.
|
* note that low priority messages are only processed one per progress call.
|
||||||
*/
|
*/
|
||||||
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_high, &comp);
|
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_hp, &comp);
|
||||||
if(VAPI_OK == ret) {
|
if(VAPI_OK == ret) {
|
||||||
if(comp.status != VAPI_SUCCESS) {
|
if(comp.status != VAPI_SUCCESS) {
|
||||||
BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
|
BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
|
||||||
@ -561,73 +593,87 @@ int mca_btl_mvapi_component_progress()
|
|||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
case VAPI_CQE_SQ_SEND_DATA :
|
case VAPI_CQE_SQ_SEND_DATA :
|
||||||
case VAPI_CQE_SQ_RDMA_READ:
|
|
||||||
case VAPI_CQE_SQ_RDMA_WRITE:
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
|
||||||
/* Process a completed send or an rdma write */
|
|
||||||
frag->rc = OMPI_SUCCESS;
|
|
||||||
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
|
|
||||||
count++;
|
|
||||||
/* check and see if we need to progress pending sends */
|
|
||||||
if( !mca_btl_mvapi_component.use_srq &&
|
|
||||||
OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_hp, 1) > 0
|
|
||||||
&& !opal_list_is_empty(&(frag->endpoint->pending_frags_hp))) {
|
|
||||||
opal_list_item_t *frag_item;
|
|
||||||
OPAL_THREAD_LOCK(&frag->endpoint->endpoint_lock);
|
|
||||||
frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_hp));
|
|
||||||
OPAL_THREAD_UNLOCK(&frag->endpoint->endpoint_lock);
|
|
||||||
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
|
||||||
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
|
/* Process a completed send */
|
||||||
BTL_ERROR(("error in posting pending send\n"));
|
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
||||||
}
|
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
||||||
}
|
|
||||||
|
frag->rc = OMPI_SUCCESS;
|
||||||
|
frag->base.des_cbfunc(&mvapi_btl->super, endpoint, &frag->base, frag->rc);
|
||||||
|
count++;
|
||||||
|
|
||||||
|
/* check and see if we need to progress pending sends */
|
||||||
if( mca_btl_mvapi_component.use_srq &&
|
if( mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp, 1) > 0
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_hp, 1) > 0
|
||||||
&& !opal_list_is_empty(&mvapi_btl->pending_frags_hp)) {
|
&& !opal_list_is_empty(&mvapi_btl->pending_frags_hp)) {
|
||||||
opal_list_item_t *frag_item;
|
opal_list_item_t *frag_item;
|
||||||
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_hp);
|
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_hp);
|
||||||
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
|
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(endpoint, frag)) {
|
||||||
BTL_ERROR(("error in posting pending send\n"));
|
BTL_ERROR(("error in posting pending send\n"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VAPI_CQE_RQ_SEND_DATA:
|
case VAPI_CQE_RQ_SEND_DATA:
|
||||||
|
|
||||||
/* Process a RECV */
|
/* process a RECV */
|
||||||
BTL_VERBOSE(("Got a recv completion"));
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
||||||
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
||||||
|
credits = frag->hdr->credits;
|
||||||
|
|
||||||
frag->rc=OMPI_SUCCESS;
|
/* repost receive descriptors */
|
||||||
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
|
||||||
/* advance the segment address past the header and subtract from the length..*/
|
|
||||||
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
|
|
||||||
|
|
||||||
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_eager), (opal_list_item_t*) frag);
|
|
||||||
|
|
||||||
|
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
if(mca_btl_mvapi_component.use_srq) {
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->srr_posted_high, -1);
|
OPAL_THREAD_ADD32(&mvapi_btl->srd_posted_hp, -1);
|
||||||
MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 0);
|
MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, 0);
|
||||||
} else {
|
} else {
|
||||||
OPAL_THREAD_ADD32(&endpoint->rr_posted_high, -1);
|
OPAL_THREAD_ADD32(&endpoint->rd_posted_hp, -1);
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(((mca_btl_mvapi_frag_t*) (unsigned long) comp.id)->endpoint, 0);
|
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* advance the segment address past the header and subtract from the length..*/
|
||||||
|
frag->rc=OMPI_SUCCESS;
|
||||||
|
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||||
|
/* call registered callback */
|
||||||
|
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||||
|
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_eager), (opal_list_item_t*) frag);
|
||||||
|
|
||||||
|
/* check to see if we need to progress any pending desciptors */
|
||||||
|
if( !mca_btl_mvapi_component.use_srq &&
|
||||||
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp, credits) > 0
|
||||||
|
&& !opal_list_is_empty(&(endpoint->pending_frags_hp))) {
|
||||||
|
|
||||||
|
do {
|
||||||
|
opal_list_item_t *frag_item;
|
||||||
|
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||||
|
frag_item = opal_list_remove_first(&(endpoint->pending_frags_hp));
|
||||||
|
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||||
|
if(NULL == (frag = (mca_btl_mvapi_frag_t *) frag_item))
|
||||||
|
break;
|
||||||
|
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
|
||||||
|
BTL_ERROR(("error in posting pending send\n"));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while(endpoint->sd_tokens_hp > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check to see if we need to return credits */
|
||||||
|
if( !mca_btl_mvapi_component.use_srq &&
|
||||||
|
endpoint->rd_credits_hp >= mca_btl_mvapi_component.rd_win) {
|
||||||
|
mca_btl_mvapi_endpoint_send_credits(endpoint, endpoint->lcl_qp_hndl_hp, endpoint->rem_info.rem_qp_num_hp, &endpoint->rd_credits_hp);
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case VAPI_CQE_SQ_RDMA_READ:
|
||||||
|
case VAPI_CQE_SQ_RDMA_WRITE:
|
||||||
default:
|
default:
|
||||||
BTL_ERROR(("Unhandled work completion opcode is %d", comp.opcode));
|
BTL_ERROR(("Unhandled work completion opcode is %d", comp.opcode));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_low, &comp);
|
ret = VAPI_poll_cq(mvapi_btl->nic, mvapi_btl->cq_hndl_lp, &comp);
|
||||||
if(VAPI_OK == ret) {
|
if(VAPI_OK == ret) {
|
||||||
if(comp.status != VAPI_SUCCESS) {
|
if(comp.status != VAPI_SUCCESS) {
|
||||||
BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
|
BTL_ERROR(("Got error : %s, Vendor code : %d Frag : %p",
|
||||||
@ -642,100 +688,101 @@ int mca_btl_mvapi_component_progress()
|
|||||||
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
BTL_ERROR(("Got an RDMA with Immediate data!, not supported!"));
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
|
|
||||||
case VAPI_CQE_SQ_RDMA_READ:
|
|
||||||
case VAPI_CQE_SQ_RDMA_WRITE:
|
|
||||||
case VAPI_CQE_SQ_SEND_DATA :
|
case VAPI_CQE_SQ_SEND_DATA :
|
||||||
|
|
||||||
/* Process a completed send */
|
/* Process a completed send - receiver must return tokens */
|
||||||
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
||||||
frag->rc = OMPI_SUCCESS;
|
frag->rc = OMPI_SUCCESS;
|
||||||
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
|
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||||
count++;
|
count++;
|
||||||
/* check and see if we need to progress pending sends */
|
|
||||||
|
/* if we have tokens, process pending sends */
|
||||||
|
if(mca_btl_mvapi_component.use_srq &&
|
||||||
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp, 1) > 0
|
||||||
|
&& !opal_list_is_empty(&mvapi_btl->pending_frags_lp)) {
|
||||||
|
opal_list_item_t *frag_item;
|
||||||
|
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_lp);
|
||||||
|
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
||||||
|
MCA_BTL_IB_FRAG_PROGRESS(frag);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case VAPI_CQE_SQ_RDMA_READ:
|
||||||
|
|
||||||
|
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
||||||
|
OPAL_THREAD_ADD32(&frag->endpoint->get_tokens, 1);
|
||||||
|
/* fall through */
|
||||||
|
|
||||||
|
case VAPI_CQE_SQ_RDMA_WRITE:
|
||||||
|
|
||||||
|
/* Process a completed write - returns send tokens immediately */
|
||||||
|
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
||||||
|
endpoint = frag->endpoint;
|
||||||
|
frag->rc = OMPI_SUCCESS;
|
||||||
|
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||||
|
count++;
|
||||||
|
|
||||||
|
if(mca_btl_mvapi_component.use_srq &&
|
||||||
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp, 1) > 0
|
||||||
|
&& !opal_list_is_empty(&mvapi_btl->pending_frags_lp)) {
|
||||||
|
opal_list_item_t *frag_item;
|
||||||
|
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_lp);
|
||||||
|
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
||||||
|
MCA_BTL_IB_FRAG_PROGRESS(frag);
|
||||||
|
}
|
||||||
if(!mca_btl_mvapi_component.use_srq &&
|
if(!mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_lp, 1) > 0 &&
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp, 1) > 0 &&
|
||||||
!opal_list_is_empty(&(frag->endpoint->pending_frags_lp))) {
|
!opal_list_is_empty(&(endpoint->pending_frags_lp))) {
|
||||||
opal_list_item_t *frag_item;
|
opal_list_item_t *frag_item;
|
||||||
OPAL_THREAD_LOCK(&frag->endpoint->endpoint_lock);
|
OPAL_THREAD_LOCK(&frag->endpoint->endpoint_lock);
|
||||||
frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_lp));
|
frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_lp));
|
||||||
OPAL_THREAD_UNLOCK(&frag->endpoint->endpoint_lock);
|
OPAL_THREAD_UNLOCK(&frag->endpoint->endpoint_lock);
|
||||||
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
||||||
switch(frag->sr_desc.opcode){
|
MCA_BTL_IB_FRAG_PROGRESS(frag);
|
||||||
case VAPI_SEND:
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
|
|
||||||
BTL_ERROR(("error in posting pending send\n"));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case VAPI_RDMA_WRITE:
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t*) mvapi_btl,
|
|
||||||
frag->endpoint,
|
|
||||||
(mca_btl_base_descriptor_t*) frag)) {
|
|
||||||
BTL_ERROR(("error in posting pending rdma write\n"));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case VAPI_RDMA_READ:
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t *) mvapi_btl,
|
|
||||||
frag->endpoint,
|
|
||||||
(mca_btl_base_descriptor_t*) frag)) {
|
|
||||||
BTL_ERROR(("error in posting pending rdma read\n"));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->sr_desc.opcode));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if(mca_btl_mvapi_component.use_srq &&
|
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp, 1) > 0
|
|
||||||
&& !opal_list_is_empty(&mvapi_btl->pending_frags_lp)) {
|
|
||||||
opal_list_item_t *frag_item;
|
|
||||||
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_lp);
|
|
||||||
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
|
||||||
switch(frag->sr_desc.opcode){
|
|
||||||
case VAPI_SEND:
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
|
|
||||||
BTL_ERROR(("error in posting pending send\n"));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case VAPI_RDMA_WRITE:
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t*) mvapi_btl,
|
|
||||||
frag->endpoint,
|
|
||||||
(mca_btl_base_descriptor_t*) frag)) {
|
|
||||||
BTL_ERROR(("error in posting pending rdma write\n"));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case VAPI_RDMA_READ:
|
|
||||||
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t *) mvapi_btl,
|
|
||||||
frag->endpoint,
|
|
||||||
(mca_btl_base_descriptor_t*) frag)) {
|
|
||||||
BTL_ERROR(("error in posting pending rdma read\n"));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->sr_desc.opcode));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VAPI_CQE_RQ_SEND_DATA:
|
case VAPI_CQE_RQ_SEND_DATA:
|
||||||
|
|
||||||
BTL_VERBOSE(("Got a recv completion"));
|
|
||||||
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
|
||||||
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
|
||||||
|
credits = frag->hdr->credits;
|
||||||
|
|
||||||
|
/* post descriptors before processing receive */
|
||||||
|
if(mca_btl_mvapi_component.use_srq) {
|
||||||
|
OPAL_THREAD_ADD32(&mvapi_btl->srd_posted_lp, -1);
|
||||||
|
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 0);
|
||||||
|
} else {
|
||||||
|
OPAL_THREAD_ADD32(&endpoint->rd_posted_lp, -1);
|
||||||
|
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* process received frag */
|
||||||
frag->rc=OMPI_SUCCESS;
|
frag->rc=OMPI_SUCCESS;
|
||||||
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||||
/* advance the segment address past the header and subtract from the length..*/
|
/* advance the segment address past the header and subtract from the length..*/
|
||||||
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
|
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||||
|
|
||||||
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_max), (opal_list_item_t*) frag);
|
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_max), (opal_list_item_t*) frag);
|
||||||
|
|
||||||
|
/* check to see if we need to progress pending descriptors */
|
||||||
if(mca_btl_mvapi_component.use_srq) {
|
if(!mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->srr_posted_low, -1);
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp, credits) > 0 &&
|
||||||
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 0);
|
!opal_list_is_empty(&(endpoint->pending_frags_lp))) {
|
||||||
} else {
|
do {
|
||||||
OPAL_THREAD_ADD32(&endpoint->rr_posted_low, -1);
|
opal_list_item_t *frag_item;
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(((mca_btl_mvapi_frag_t*) (unsigned long) comp.id)->endpoint, 0);
|
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||||
|
frag_item = opal_list_remove_first(&(endpoint->pending_frags_lp));
|
||||||
|
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||||
|
if(NULL == (frag = (mca_btl_mvapi_frag_t *) frag_item))
|
||||||
|
break;
|
||||||
|
MCA_BTL_IB_FRAG_PROGRESS(frag);
|
||||||
|
} while(endpoint->sd_tokens_lp > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check to see if we need to return credits */
|
||||||
|
if( !mca_btl_mvapi_component.use_srq &&
|
||||||
|
endpoint->rd_credits_lp >= mca_btl_mvapi_component.rd_win) {
|
||||||
|
mca_btl_mvapi_endpoint_send_credits(endpoint, endpoint->lcl_qp_hndl_lp, endpoint->rem_info.rem_qp_num_lp, &endpoint->rd_credits_lp);
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
break;
|
break;
|
||||||
|
@ -74,51 +74,48 @@ static inline int mca_btl_mvapi_endpoint_post_send(
|
|||||||
|
|
||||||
/* atomically test and acquire a token */
|
/* atomically test and acquire a token */
|
||||||
if(!mca_btl_mvapi_component.use_srq &&
|
if(!mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,-1) < 0) {
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,-1) < 0) {
|
||||||
BTL_VERBOSE(("Queing because no send tokens \n"));
|
BTL_VERBOSE(("Queing because no send tokens \n"));
|
||||||
opal_list_append(&endpoint->pending_frags_hp, (opal_list_item_t *)frag);
|
opal_list_append(&endpoint->pending_frags_hp, (opal_list_item_t *)frag);
|
||||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,1);
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,1);
|
||||||
|
|
||||||
/* OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); */
|
|
||||||
/* mca_btl_mvapi_component_progress(); */
|
|
||||||
/* OPAL_THREAD_LOCK(&endpoint->endpoint_lock); */
|
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
} else if( mca_btl_mvapi_component.use_srq &&
|
} else if( mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp,-1) < 0) {
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_hp,-1) < 0) {
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp,1);
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_hp,1);
|
||||||
opal_list_append(&mvapi_btl->pending_frags_hp, (opal_list_item_t *)frag);
|
opal_list_append(&mvapi_btl->pending_frags_hp, (opal_list_item_t *)frag);
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}else {
|
} else {
|
||||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_high;
|
frag->hdr->credits = endpoint->rd_credits_hp;
|
||||||
qp_hndl = endpoint->lcl_qp_hndl_high;
|
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, - frag->hdr->credits);
|
||||||
|
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_hp;
|
||||||
|
qp_hndl = endpoint->lcl_qp_hndl_hp;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
/* atomically test and acquire a token */
|
/* atomically test and acquire a token */
|
||||||
if(!mca_btl_mvapi_component.use_srq &&
|
if(!mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0 ) {
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,-1) < 0 ) {
|
||||||
BTL_VERBOSE(("Queing because no send tokens \n"));
|
BTL_VERBOSE(("Queing because no send tokens \n"));
|
||||||
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
|
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
|
||||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,1);
|
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp,1);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
} else if(mca_btl_mvapi_component.use_srq &&
|
} else if(mca_btl_mvapi_component.use_srq &&
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,-1) < 0) {
|
||||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
|
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp,1);
|
||||||
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
|
frag->hdr->credits = endpoint->rd_credits_lp;
|
||||||
qp_hndl = endpoint->lcl_qp_hndl_low;
|
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, - frag->hdr->credits);
|
||||||
|
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_lp;
|
||||||
|
qp_hndl = endpoint->lcl_qp_hndl_lp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
frag->sg_entry.len = frag->segment.seg_len +
|
frag->sg_entry.len = frag->segment.seg_len + sizeof(mca_btl_mvapi_header_t);
|
||||||
((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
|
||||||
|
|
||||||
if(frag->sg_entry.len <= mvapi_btl->ib_inline_max) {
|
if(frag->sg_entry.len <= mvapi_btl->ib_inline_max) {
|
||||||
frag->ret = EVAPI_post_inline_sr(mvapi_btl->nic,
|
frag->ret = EVAPI_post_inline_sr(mvapi_btl->nic,
|
||||||
qp_hndl,
|
qp_hndl,
|
||||||
@ -165,15 +162,23 @@ static void mca_btl_mvapi_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
|||||||
OBJ_CONSTRUCT(&endpoint->pending_send_frags, opal_list_t);
|
OBJ_CONSTRUCT(&endpoint->pending_send_frags, opal_list_t);
|
||||||
OBJ_CONSTRUCT(&endpoint->pending_frags_hp, opal_list_t);
|
OBJ_CONSTRUCT(&endpoint->pending_frags_hp, opal_list_t);
|
||||||
OBJ_CONSTRUCT(&endpoint->pending_frags_lp, opal_list_t);
|
OBJ_CONSTRUCT(&endpoint->pending_frags_lp, opal_list_t);
|
||||||
|
OBJ_CONSTRUCT(&endpoint->pending_frags_get, opal_list_t);
|
||||||
|
|
||||||
|
endpoint->rd_posted_hp = 0;
|
||||||
endpoint->rr_posted_high = 0;
|
endpoint->rd_posted_lp = 0;
|
||||||
endpoint->rr_posted_low = 0;
|
|
||||||
|
/* zero these out w/ initial posting, so that we start out w/
|
||||||
|
* zero credits to return to peer
|
||||||
|
*/
|
||||||
|
endpoint->rd_credits_hp = -(mca_btl_mvapi_component.rd_num + mca_btl_mvapi_component.rd_rsv);
|
||||||
|
endpoint->rd_credits_lp = -(mca_btl_mvapi_component.rd_num + mca_btl_mvapi_component.rd_rsv);
|
||||||
|
|
||||||
/* initialize the high and low priority tokens */
|
/* initialize the high and low priority tokens */
|
||||||
endpoint->wr_sq_tokens_hp = mca_btl_mvapi_component.max_wr_sq_tokens;
|
endpoint->sd_tokens_hp = mca_btl_mvapi_component.rd_num;
|
||||||
endpoint->wr_sq_tokens_lp = mca_btl_mvapi_component.max_wr_sq_tokens;
|
endpoint->sd_tokens_lp = mca_btl_mvapi_component.rd_num;
|
||||||
endpoint->rem_info.rem_qp_num_high = 0;
|
endpoint->get_tokens = mca_btl_mvapi_component.ib_qp_ous_rd_atom;
|
||||||
endpoint->rem_info.rem_qp_num_low = 0;
|
endpoint->rem_info.rem_qp_num_hp = 0;
|
||||||
|
endpoint->rem_info.rem_qp_num_lp = 0;
|
||||||
endpoint->rem_info.rem_lid = 0;
|
endpoint->rem_info.rem_lid = 0;
|
||||||
endpoint->rem_info.rem_subnet = 0;
|
endpoint->rem_info.rem_subnet = 0;
|
||||||
}
|
}
|
||||||
@ -185,6 +190,11 @@ static void mca_btl_mvapi_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
|||||||
|
|
||||||
static void mca_btl_mvapi_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
static void mca_btl_mvapi_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
|
||||||
{
|
{
|
||||||
|
OBJ_DESTRUCT(&endpoint->endpoint_lock);
|
||||||
|
OBJ_DESTRUCT(&endpoint->pending_send_frags);
|
||||||
|
OBJ_DESTRUCT(&endpoint->pending_frags_hp);
|
||||||
|
OBJ_DESTRUCT(&endpoint->pending_frags_lp);
|
||||||
|
OBJ_DESTRUCT(&endpoint->pending_frags_get);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -214,13 +224,13 @@ static int mca_btl_mvapi_endpoint_send_connect_data(mca_btl_base_endpoint_t* end
|
|||||||
|
|
||||||
/* pack the info in the send buffer */
|
/* pack the info in the send buffer */
|
||||||
|
|
||||||
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_high.qp_num, 1, ORTE_UINT32);
|
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_hp.qp_num, 1, ORTE_UINT32);
|
||||||
if(rc != ORTE_SUCCESS) {
|
if(rc != ORTE_SUCCESS) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_low.qp_num, 1, ORTE_UINT32);
|
rc = orte_dps.pack(buffer, &endpoint->lcl_qp_prop_lp.qp_num, 1, ORTE_UINT32);
|
||||||
if(rc != ORTE_SUCCESS) {
|
if(rc != ORTE_SUCCESS) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -269,8 +279,8 @@ static int mca_btl_mvapi_endpoint_send_connect_data(mca_btl_base_endpoint_t* end
|
|||||||
|
|
||||||
|
|
||||||
BTL_VERBOSE(("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_VERBOSE(("Sending High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_prop_high.qp_num,
|
endpoint->lcl_qp_prop_hp.qp_num,
|
||||||
endpoint->lcl_qp_prop_low.qp_num,
|
endpoint->lcl_qp_prop_lp.qp_num,
|
||||||
endpoint->endpoint_btl->port.lid));
|
endpoint->endpoint_btl->port.lid));
|
||||||
|
|
||||||
if(rc < 0) {
|
if(rc < 0) {
|
||||||
@ -294,8 +304,8 @@ static int mca_btl_mvapi_endpoint_set_remote_info(mca_btl_base_endpoint_t* endpo
|
|||||||
memcpy(&((mca_btl_mvapi_endpoint_t*) endpoint)->rem_info, rem_info, sizeof(mca_btl_mvapi_rem_info_t));
|
memcpy(&((mca_btl_mvapi_endpoint_t*) endpoint)->rem_info, rem_info, sizeof(mca_btl_mvapi_rem_info_t));
|
||||||
|
|
||||||
BTL_VERBOSE(("Setting High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
BTL_VERBOSE(("Setting High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
||||||
endpoint->rem_info.rem_qp_num_high,
|
endpoint->rem_info.rem_qp_num_hp,
|
||||||
endpoint->rem_info.rem_qp_num_low,
|
endpoint->rem_info.rem_qp_num_lp,
|
||||||
endpoint->rem_info.rem_lid));
|
endpoint->rem_info.rem_lid));
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
@ -321,10 +331,10 @@ static int mca_btl_mvapi_endpoint_start_connect(mca_btl_base_endpoint_t* endpoin
|
|||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
||||||
endpoint->endpoint_btl->nic,
|
endpoint->endpoint_btl->nic,
|
||||||
endpoint->endpoint_btl->ptag,
|
endpoint->endpoint_btl->ptag,
|
||||||
endpoint->endpoint_btl->cq_hndl_high,
|
endpoint->endpoint_btl->cq_hndl_hp,
|
||||||
endpoint->endpoint_btl->srq_hndl_high,
|
endpoint->endpoint_btl->srq_hndl_hp,
|
||||||
&endpoint->lcl_qp_hndl_high,
|
&endpoint->lcl_qp_hndl_hp,
|
||||||
&endpoint->lcl_qp_prop_high,
|
&endpoint->lcl_qp_prop_hp,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
@ -335,10 +345,10 @@ static int mca_btl_mvapi_endpoint_start_connect(mca_btl_base_endpoint_t* endpoin
|
|||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
||||||
endpoint->endpoint_btl->nic,
|
endpoint->endpoint_btl->nic,
|
||||||
endpoint->endpoint_btl->ptag,
|
endpoint->endpoint_btl->ptag,
|
||||||
endpoint->endpoint_btl->cq_hndl_low,
|
endpoint->endpoint_btl->cq_hndl_lp,
|
||||||
endpoint->endpoint_btl->srq_hndl_low,
|
endpoint->endpoint_btl->srq_hndl_lp,
|
||||||
&endpoint->lcl_qp_hndl_low,
|
&endpoint->lcl_qp_hndl_lp,
|
||||||
&endpoint->lcl_qp_prop_low,
|
&endpoint->lcl_qp_prop_lp,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
|
|
||||||
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
@ -356,8 +366,8 @@ static int mca_btl_mvapi_endpoint_start_connect(mca_btl_base_endpoint_t* endpoin
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
BTL_VERBOSE(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_VERBOSE(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_prop_high.qp_num,
|
endpoint->lcl_qp_prop_hp.qp_num,
|
||||||
endpoint->lcl_qp_prop_low.qp_num,
|
endpoint->lcl_qp_prop_lp.qp_num,
|
||||||
endpoint->endpoint_btl->port.lid));
|
endpoint->endpoint_btl->port.lid));
|
||||||
|
|
||||||
/* Send connection info over to remote endpoint */
|
/* Send connection info over to remote endpoint */
|
||||||
@ -383,10 +393,10 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
|
|||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
||||||
endpoint->endpoint_btl->nic,
|
endpoint->endpoint_btl->nic,
|
||||||
endpoint->endpoint_btl->ptag,
|
endpoint->endpoint_btl->ptag,
|
||||||
endpoint->endpoint_btl->cq_hndl_high,
|
endpoint->endpoint_btl->cq_hndl_hp,
|
||||||
endpoint->endpoint_btl->srq_hndl_high,
|
endpoint->endpoint_btl->srq_hndl_hp,
|
||||||
&endpoint->lcl_qp_hndl_high,
|
&endpoint->lcl_qp_hndl_hp,
|
||||||
&endpoint->lcl_qp_prop_high,
|
&endpoint->lcl_qp_prop_hp,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
@ -397,10 +407,10 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
|
|||||||
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
if(OMPI_SUCCESS != (rc = mca_btl_mvapi_endpoint_create_qp(endpoint->endpoint_btl,
|
||||||
endpoint->endpoint_btl->nic,
|
endpoint->endpoint_btl->nic,
|
||||||
endpoint->endpoint_btl->ptag,
|
endpoint->endpoint_btl->ptag,
|
||||||
endpoint->endpoint_btl->cq_hndl_low,
|
endpoint->endpoint_btl->cq_hndl_lp,
|
||||||
endpoint->endpoint_btl->srq_hndl_low,
|
endpoint->endpoint_btl->srq_hndl_lp,
|
||||||
&endpoint->lcl_qp_hndl_low,
|
&endpoint->lcl_qp_hndl_lp,
|
||||||
&endpoint->lcl_qp_prop_low,
|
&endpoint->lcl_qp_prop_lp,
|
||||||
VAPI_TS_RC))) {
|
VAPI_TS_RC))) {
|
||||||
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
BTL_ERROR(("error creating queue pair, error code %d", rc));
|
||||||
return rc;
|
return rc;
|
||||||
@ -416,8 +426,8 @@ static int mca_btl_mvapi_endpoint_reply_start_connect(mca_btl_mvapi_endpoint_t *
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
BTL_VERBOSE(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
BTL_VERBOSE(("Initialized High Priority QP num = %d, Low Priority QP num = %d, LID = %d",
|
||||||
endpoint->lcl_qp_prop_high.qp_num,
|
endpoint->lcl_qp_prop_hp.qp_num,
|
||||||
endpoint->lcl_qp_prop_low.qp_num,
|
endpoint->lcl_qp_prop_lp.qp_num,
|
||||||
endpoint->endpoint_btl->port.lid));
|
endpoint->endpoint_btl->port.lid));
|
||||||
|
|
||||||
|
|
||||||
@ -497,12 +507,12 @@ static void mca_btl_mvapi_endpoint_recv(
|
|||||||
/* start by unpacking data first so we know who is knocking at
|
/* start by unpacking data first so we know who is knocking at
|
||||||
our door */
|
our door */
|
||||||
|
|
||||||
rc = orte_dps.unpack(buffer, &rem_info.rem_qp_num_high, &cnt, ORTE_UINT32);
|
rc = orte_dps.unpack(buffer, &rem_info.rem_qp_num_hp, &cnt, ORTE_UINT32);
|
||||||
if(ORTE_SUCCESS != rc) {
|
if(ORTE_SUCCESS != rc) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
rc = orte_dps.unpack(buffer, &rem_info.rem_qp_num_low, &cnt, ORTE_UINT32);
|
rc = orte_dps.unpack(buffer, &rem_info.rem_qp_num_lp, &cnt, ORTE_UINT32);
|
||||||
if(ORTE_SUCCESS != rc) {
|
if(ORTE_SUCCESS != rc) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return;
|
return;
|
||||||
@ -545,8 +555,8 @@ static void mca_btl_mvapi_endpoint_recv(
|
|||||||
|
|
||||||
|
|
||||||
BTL_VERBOSE(("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
BTL_VERBOSE(("Received High Priority QP num = %d, Low Priority QP num %d, LID = %d",
|
||||||
rem_info.rem_qp_num_high,
|
rem_info.rem_qp_num_hp,
|
||||||
rem_info.rem_qp_num_low,
|
rem_info.rem_qp_num_lp,
|
||||||
rem_info.rem_lid));
|
rem_info.rem_lid));
|
||||||
|
|
||||||
|
|
||||||
@ -763,15 +773,15 @@ int mca_btl_mvapi_endpoint_connect(
|
|||||||
/* Connection establishment RC */
|
/* Connection establishment RC */
|
||||||
rc = mca_btl_mvapi_endpoint_qp_init_query(endpoint->endpoint_btl,
|
rc = mca_btl_mvapi_endpoint_qp_init_query(endpoint->endpoint_btl,
|
||||||
endpoint->endpoint_btl->nic,
|
endpoint->endpoint_btl->nic,
|
||||||
endpoint->lcl_qp_hndl_high,
|
endpoint->lcl_qp_hndl_hp,
|
||||||
endpoint->rem_info.rem_qp_num_high,
|
endpoint->rem_info.rem_qp_num_hp,
|
||||||
endpoint->rem_info.rem_lid,
|
endpoint->rem_info.rem_lid,
|
||||||
endpoint->endpoint_btl->port_id);
|
endpoint->endpoint_btl->port_id);
|
||||||
|
|
||||||
rc = mca_btl_mvapi_endpoint_qp_init_query(endpoint->endpoint_btl,
|
rc = mca_btl_mvapi_endpoint_qp_init_query(endpoint->endpoint_btl,
|
||||||
endpoint->endpoint_btl->nic,
|
endpoint->endpoint_btl->nic,
|
||||||
endpoint->lcl_qp_hndl_low,
|
endpoint->lcl_qp_hndl_lp,
|
||||||
endpoint->rem_info.rem_qp_num_low,
|
endpoint->rem_info.rem_qp_num_lp,
|
||||||
endpoint->rem_info.rem_lid,
|
endpoint->rem_info.rem_lid,
|
||||||
endpoint->endpoint_btl->port_id);
|
endpoint->endpoint_btl->port_id);
|
||||||
|
|
||||||
@ -844,13 +854,14 @@ int mca_btl_mvapi_endpoint_create_qp(
|
|||||||
|
|
||||||
VAPI_ret_t ret;
|
VAPI_ret_t ret;
|
||||||
VAPI_qp_init_attr_t qp_init_attr;
|
VAPI_qp_init_attr_t qp_init_attr;
|
||||||
VAPI_qp_init_attr_ext_t qp_init_attr_ext;
|
VAPI_qp_init_attr_ext_t qp_init_attr_ext;
|
||||||
|
|
||||||
|
/* worst case number of credit messages could be queued */
|
||||||
switch(transport_type) {
|
switch(transport_type) {
|
||||||
|
|
||||||
case VAPI_TS_RC: /* Set up RC qp parameters */
|
case VAPI_TS_RC: /* Set up RC qp parameters */
|
||||||
qp_init_attr.cap.max_oust_wr_rq = mca_btl_mvapi_component.ib_rr_buf_max;
|
qp_init_attr.cap.max_oust_wr_rq = mca_btl_mvapi_component.rd_num + mca_btl_mvapi_component.rd_rsv;
|
||||||
qp_init_attr.cap.max_oust_wr_sq = mca_btl_mvapi_component.max_wr_sq_tokens;
|
qp_init_attr.cap.max_oust_wr_sq = mca_btl_mvapi_component.rd_num + mca_btl_mvapi_component.rd_rsv;
|
||||||
qp_init_attr.cap.max_sg_size_rq = mca_btl_mvapi_component.ib_sg_list_size;
|
qp_init_attr.cap.max_sg_size_rq = mca_btl_mvapi_component.ib_sg_list_size;
|
||||||
qp_init_attr.cap.max_sg_size_sq = mca_btl_mvapi_component.ib_sg_list_size;
|
qp_init_attr.cap.max_sg_size_sq = mca_btl_mvapi_component.ib_sg_list_size;
|
||||||
qp_init_attr.pd_hndl = ptag;
|
qp_init_attr.pd_hndl = ptag;
|
||||||
@ -1013,3 +1024,59 @@ int mca_btl_mvapi_endpoint_qp_init_query(
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return control fragment.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void mca_btl_mvapi_endpoint_control_cb(
|
||||||
|
mca_btl_base_module_t* btl,
|
||||||
|
struct mca_btl_base_endpoint_t* ep,
|
||||||
|
struct mca_btl_base_descriptor_t* descriptor,
|
||||||
|
int status)
|
||||||
|
{
|
||||||
|
MCA_BTL_IB_FRAG_RETURN_EAGER((mca_btl_mvapi_module_t*)btl, (mca_btl_mvapi_frag_t*)descriptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
void mca_btl_mvapi_endpoint_send_credits(
|
||||||
|
mca_btl_mvapi_endpoint_t* endpoint,
|
||||||
|
VAPI_qp_hndl_t local_qp,
|
||||||
|
VAPI_qp_num_t remote_qp,
|
||||||
|
int32_t* credits)
|
||||||
|
{
|
||||||
|
mca_btl_mvapi_module_t* btl = endpoint->endpoint_btl;
|
||||||
|
mca_btl_mvapi_frag_t* frag;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* fprintf(stderr, "sending credits %d\n", *credits); */
|
||||||
|
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
|
||||||
|
if(NULL == frag) {
|
||||||
|
BTL_ERROR(("error allocating fragment"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
frag->base.des_cbfunc = mca_btl_mvapi_endpoint_control_cb;
|
||||||
|
frag->base.des_cbdata = NULL;
|
||||||
|
|
||||||
|
frag->hdr->tag = MCA_BTL_TAG_BTL;
|
||||||
|
frag->hdr->credits = *credits;
|
||||||
|
OPAL_THREAD_ADD32(credits, -frag->hdr->credits);
|
||||||
|
|
||||||
|
frag->sr_desc.remote_qkey = 0;
|
||||||
|
frag->sr_desc.opcode = VAPI_SEND;
|
||||||
|
frag->sr_desc.remote_qp = remote_qp;
|
||||||
|
|
||||||
|
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
|
||||||
|
frag->sg_entry.len = sizeof(mca_btl_mvapi_header_t);
|
||||||
|
|
||||||
|
rc = EVAPI_post_inline_sr(btl->nic, endpoint->lcl_qp_hndl_hp, &frag->sr_desc);
|
||||||
|
if(VAPI_SUCCESS != rc) {
|
||||||
|
BTL_ERROR(("error calling EVAPI_post_inline_sr: %s\n", VAPI_strerror(rc)));
|
||||||
|
MCA_BTL_IB_FRAG_RETURN_EAGER(btl, frag);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -34,7 +34,6 @@
|
|||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
#define MAX_POST_RR (16)
|
|
||||||
OBJ_CLASS_DECLARATION(mca_btl_mvapi_endpoint_t);
|
OBJ_CLASS_DECLARATION(mca_btl_mvapi_endpoint_t);
|
||||||
|
|
||||||
|
|
||||||
@ -77,10 +76,10 @@ typedef enum {
|
|||||||
|
|
||||||
struct mca_btl_mvapi_rem_info_t {
|
struct mca_btl_mvapi_rem_info_t {
|
||||||
|
|
||||||
VAPI_qp_num_t rem_qp_num_high;
|
VAPI_qp_num_t rem_qp_num_hp;
|
||||||
/* High priority remote side QP number */
|
/* High priority remote side QP number */
|
||||||
|
|
||||||
VAPI_qp_num_t rem_qp_num_low;
|
VAPI_qp_num_t rem_qp_num_lp;
|
||||||
/* Low prioirty remote size QP number */
|
/* Low prioirty remote size QP number */
|
||||||
|
|
||||||
IB_lid_t rem_lid;
|
IB_lid_t rem_lid;
|
||||||
@ -124,36 +123,27 @@ struct mca_btl_base_endpoint_t {
|
|||||||
opal_list_t pending_send_frags;
|
opal_list_t pending_send_frags;
|
||||||
/**< list of pending send frags for this endpoint */
|
/**< list of pending send frags for this endpoint */
|
||||||
|
|
||||||
opal_list_t pending_frags_hp;
|
opal_list_t pending_frags_hp; /**< list of pending high priority frags */
|
||||||
/**< list of pending high priority frags */
|
opal_list_t pending_frags_lp; /**< list of pending low priority frags */
|
||||||
|
opal_list_t pending_frags_get; /**< list of pending get operations */
|
||||||
|
|
||||||
opal_list_t pending_frags_lp;
|
|
||||||
/**< list of pending low priority frags */
|
|
||||||
|
|
||||||
int32_t wr_sq_tokens_hp;
|
|
||||||
/**< number of high priority frags that can be outstanding (down counter) */
|
|
||||||
|
|
||||||
int32_t wr_sq_tokens_lp;
|
|
||||||
/**< number of low priority frags that can be outstanding (down counter) */
|
|
||||||
|
|
||||||
mca_btl_mvapi_rem_info_t rem_info;
|
mca_btl_mvapi_rem_info_t rem_info;
|
||||||
|
|
||||||
VAPI_qp_hndl_t lcl_qp_hndl_high;
|
VAPI_qp_hndl_t lcl_qp_hndl_hp; /* High priority local QP handle */
|
||||||
/* High priority local QP handle */
|
VAPI_qp_hndl_t lcl_qp_hndl_lp; /* Low priority local QP handle */
|
||||||
|
|
||||||
|
VAPI_qp_prop_t lcl_qp_prop_hp; /* High priority local QP properties */
|
||||||
|
VAPI_qp_prop_t lcl_qp_prop_lp; /* Low priority local QP properties */
|
||||||
|
|
||||||
|
int32_t sd_tokens_hp; /**< number of high priority send tokens */
|
||||||
|
int32_t sd_tokens_lp; /**< number of low priority send tokens */
|
||||||
|
int32_t get_tokens; /**< number of available get tokens */
|
||||||
|
|
||||||
VAPI_qp_hndl_t lcl_qp_hndl_low;
|
int32_t rd_posted_hp; /**< number of high priority descriptors posted to the nic*/
|
||||||
/* Low priority local QP handle */
|
int32_t rd_posted_lp; /**< number of low priority descriptors posted to the nic*/
|
||||||
|
int32_t rd_credits_hp; /**< number of high priority credits to return to peer */
|
||||||
VAPI_qp_prop_t lcl_qp_prop_high;
|
int32_t rd_credits_lp; /**< number of low priority credits to return to peer */
|
||||||
/* High priority local QP properties */
|
|
||||||
|
|
||||||
VAPI_qp_prop_t lcl_qp_prop_low;
|
|
||||||
/* Low priority local QP properties */
|
|
||||||
|
|
||||||
|
|
||||||
uint32_t rr_posted_high; /**< number of high priority rr posted to the nic*/
|
|
||||||
uint32_t rr_posted_low; /**< number of low priority rr posted to the nic*/
|
|
||||||
|
|
||||||
uint32_t subnet;
|
uint32_t subnet;
|
||||||
#if 0
|
#if 0
|
||||||
mca_btl_mvapi_rdma_buf_t *rdma_buf;
|
mca_btl_mvapi_rdma_buf_t *rdma_buf;
|
||||||
@ -164,6 +154,11 @@ typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
|||||||
typedef mca_btl_base_endpoint_t mca_btl_mvapi_endpoint_t;
|
typedef mca_btl_base_endpoint_t mca_btl_mvapi_endpoint_t;
|
||||||
int mca_btl_mvapi_endpoint_send(mca_btl_base_endpoint_t* endpoint, struct mca_btl_mvapi_frag_t* frag);
|
int mca_btl_mvapi_endpoint_send(mca_btl_base_endpoint_t* endpoint, struct mca_btl_mvapi_frag_t* frag);
|
||||||
int mca_btl_mvapi_endpoint_connect(mca_btl_base_endpoint_t*);
|
int mca_btl_mvapi_endpoint_connect(mca_btl_base_endpoint_t*);
|
||||||
|
void mca_btl_mvapi_endpoint_send_credits(
|
||||||
|
mca_btl_base_endpoint_t*,
|
||||||
|
VAPI_qp_hndl_t local,
|
||||||
|
VAPI_qp_num_t rem,
|
||||||
|
int32_t* credits);
|
||||||
void mca_btl_mvapi_post_recv(void);
|
void mca_btl_mvapi_post_recv(void);
|
||||||
|
|
||||||
|
|
||||||
@ -173,15 +168,16 @@ void mca_btl_mvapi_post_recv(void);
|
|||||||
do { \
|
do { \
|
||||||
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
|
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
||||||
if(endpoint->rr_posted_high <= mca_btl_mvapi_component.ib_rr_buf_min+additional && \
|
if(endpoint->rd_posted_hp <= mca_btl_mvapi_component.rd_low+additional && \
|
||||||
endpoint->rr_posted_high < mvapi_btl->rd_buf_max){ \
|
endpoint->rd_posted_hp < mvapi_btl->rd_num){ \
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_buf_max - \
|
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_num - \
|
||||||
endpoint->rr_posted_high, \
|
endpoint->rd_posted_hp, \
|
||||||
endpoint, \
|
endpoint, \
|
||||||
&mvapi_btl->recv_free_eager, \
|
&mvapi_btl->recv_free_eager, \
|
||||||
&endpoint->rr_posted_high, \
|
endpoint->rd_posted_hp, \
|
||||||
|
endpoint->rd_credits_hp, \
|
||||||
mvapi_btl->nic, \
|
mvapi_btl->nic, \
|
||||||
endpoint->lcl_qp_hndl_high); \
|
endpoint->lcl_qp_hndl_hp); \
|
||||||
} \
|
} \
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
||||||
} while(0); \
|
} while(0); \
|
||||||
@ -193,15 +189,16 @@ void mca_btl_mvapi_post_recv(void);
|
|||||||
do { \
|
do { \
|
||||||
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
|
mca_btl_mvapi_module_t * mvapi_btl = endpoint->endpoint_btl; \
|
||||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
||||||
if(endpoint->rr_posted_low <= mca_btl_mvapi_component.ib_rr_buf_min+additional && \
|
if(endpoint->rd_posted_lp <= mca_btl_mvapi_component.rd_low+additional && \
|
||||||
endpoint->rr_posted_low < mvapi_btl->rd_buf_max){ \
|
endpoint->rd_posted_lp < mvapi_btl->rd_num){ \
|
||||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_buf_max - \
|
MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(mvapi_btl->rd_num - \
|
||||||
endpoint->rr_posted_low, \
|
endpoint->rd_posted_lp, \
|
||||||
endpoint, \
|
endpoint, \
|
||||||
&mvapi_btl->recv_free_max, \
|
&mvapi_btl->recv_free_max, \
|
||||||
&endpoint->rr_posted_low, \
|
endpoint->rd_posted_lp, \
|
||||||
|
endpoint->rd_credits_lp, \
|
||||||
mvapi_btl->nic, \
|
mvapi_btl->nic, \
|
||||||
endpoint->lcl_qp_hndl_low); \
|
endpoint->lcl_qp_hndl_lp); \
|
||||||
} \
|
} \
|
||||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
||||||
} while(0); \
|
} while(0); \
|
||||||
@ -211,18 +208,20 @@ void mca_btl_mvapi_post_recv(void);
|
|||||||
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(cnt, \
|
#define MCA_BTL_MVAPI_ENDPOINT_POST_RR_SUB(cnt, \
|
||||||
my_endpoint, \
|
my_endpoint, \
|
||||||
frag_list, \
|
frag_list, \
|
||||||
rr_posted, \
|
rd_posted, \
|
||||||
|
rd_credits, \
|
||||||
nic, \
|
nic, \
|
||||||
qp ) \
|
qp ) \
|
||||||
{\
|
{ \
|
||||||
do { \
|
do { \
|
||||||
uint32_t i; \
|
int32_t i; \
|
||||||
int rc; \
|
int rc; \
|
||||||
opal_list_item_t* item; \
|
int32_t num_post = cnt; \
|
||||||
mca_btl_mvapi_frag_t* frag = NULL; \
|
|
||||||
mca_btl_mvapi_module_t *mvapi_btl = my_endpoint->endpoint_btl; \
|
mca_btl_mvapi_module_t *mvapi_btl = my_endpoint->endpoint_btl; \
|
||||||
VAPI_rr_desc_t* desc_post = mvapi_btl->rr_desc_post; \
|
VAPI_rr_desc_t* desc_post = mvapi_btl->rr_desc_post; \
|
||||||
for(i = 0; i < cnt; i++) { \
|
for(i = 0; i < num_post; i++) { \
|
||||||
|
opal_list_item_t* item; \
|
||||||
|
mca_btl_mvapi_frag_t* frag = NULL; \
|
||||||
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
|
OMPI_FREE_LIST_WAIT(frag_list, item, rc); \
|
||||||
frag = (mca_btl_mvapi_frag_t*) item; \
|
frag = (mca_btl_mvapi_frag_t*) item; \
|
||||||
frag->endpoint = my_endpoint; \
|
frag->endpoint = my_endpoint; \
|
||||||
@ -231,30 +230,22 @@ void mca_btl_mvapi_post_recv(void);
|
|||||||
(unsigned char*) frag->hdr); \
|
(unsigned char*) frag->hdr); \
|
||||||
desc_post[i] = frag->rr_desc; \
|
desc_post[i] = frag->rr_desc; \
|
||||||
}\
|
}\
|
||||||
frag->ret = EVAPI_post_rr_list( nic, \
|
rc = EVAPI_post_rr_list( nic, \
|
||||||
qp, \
|
qp, \
|
||||||
cnt, \
|
num_post, \
|
||||||
desc_post); \
|
desc_post); \
|
||||||
if(NULL != frag && VAPI_OK != frag->ret) { \
|
if(VAPI_OK != rc) { \
|
||||||
BTL_ERROR(("error posting receive descriptors: %s",\
|
BTL_ERROR(("error posting receive descriptors: %s",\
|
||||||
VAPI_strerror(frag->ret))); \
|
VAPI_strerror(rc))); \
|
||||||
} else if (NULL != frag){\
|
} else { \
|
||||||
OPAL_THREAD_ADD32(rr_posted, cnt); \
|
/* fprintf(stderr, "posting: %d to %d\n", num_post, rd_posted); */ \
|
||||||
|
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
|
||||||
|
/* fprintf(stderr, "credits: %d to %d\n", num_post, rd_credits); */ \
|
||||||
|
OPAL_THREAD_ADD32(&(rd_credits), num_post); \
|
||||||
}\
|
}\
|
||||||
} while(0); \
|
} while(0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define BTL_MVAPI_INSERT_PENDING(frag, frag_list, tokens, lock, rc) \
|
|
||||||
{ \
|
|
||||||
do{ \
|
|
||||||
OPAL_THREAD_LOCK(&lock); \
|
|
||||||
opal_list_append(&frag_list, (opal_list_item_t *)frag); \
|
|
||||||
OPAL_THREAD_UNLOCK(&lock); \
|
|
||||||
OPAL_THREAD_ADD32(&tokens, 1); \
|
|
||||||
rc = OMPI_SUCCESS; \
|
|
||||||
} while(0); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(c_plusplus) || defined(__cplusplus)
|
#if defined(c_plusplus) || defined(__cplusplus)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,23 +25,9 @@
|
|||||||
static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
|
static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
|
||||||
{
|
{
|
||||||
mca_mpool_mvapi_registration_t* mem_hndl = (mca_mpool_mvapi_registration_t*) frag->base.super.user_data;
|
mca_mpool_mvapi_registration_t* mem_hndl = (mca_mpool_mvapi_registration_t*) frag->base.super.user_data;
|
||||||
frag->hdr = (mca_btl_mvapi_header_t*) (frag+1); /* initialize the btl header to point to start at end of frag */
|
frag->hdr = (mca_btl_mvapi_header_t*) (frag+1); /* initialize btl header to point to start at end of frag */
|
||||||
#if 0
|
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_mvapi_header_t);
|
||||||
mod = (unsigned long) frag->hdr % MCA_BTL_IB_FRAG_ALIGN;
|
/* init the segment address to start after the btl header */
|
||||||
|
|
||||||
if(mod != 0) {
|
|
||||||
frag->hdr = (mca_btl_mvapi_header_t*) ((unsigned char*) frag->hdr + (MCA_BTL_IB_FRAG_ALIGN - mod));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_mvapi_header_t); /* init the segment address to start after the btl header */
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
mod = (frag->segment.seg_addr.lval) % MCA_BTL_IB_FRAG_ALIGN;
|
|
||||||
if(mod != 0) {
|
|
||||||
frag->segment.seg_addr.lval += (MCA_BTL_IB_FRAG_ALIGN - mod);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* frag->mem_hndl = mem_hndl->hndl; */
|
/* frag->mem_hndl = mem_hndl->hndl; */
|
||||||
frag->segment.seg_len = frag->size;
|
frag->segment.seg_len = frag->size;
|
||||||
@ -49,7 +35,6 @@ static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
|
|||||||
frag->sg_entry.lkey = mem_hndl->l_key;
|
frag->sg_entry.lkey = mem_hndl->l_key;
|
||||||
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
|
frag->sg_entry.addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->hdr;
|
||||||
frag->base.des_flags = 0;
|
frag->base.des_flags = 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -34,7 +34,11 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_mvapi_frag_t);
|
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_mvapi_frag_t);
|
||||||
|
|
||||||
typedef mca_btl_base_header_t mca_btl_mvapi_header_t;
|
struct mca_btl_mvapi_header_t {
|
||||||
|
mca_btl_base_tag_t tag;
|
||||||
|
int16_t credits;
|
||||||
|
};
|
||||||
|
typedef struct mca_btl_mvapi_header_t mca_btl_mvapi_header_t;
|
||||||
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
@ -140,6 +144,33 @@ OBJ_CLASS_DECLARATION(mca_btl_mvapi_recv_frag_max_t);
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define MCA_BTL_IB_FRAG_PROGRESS(frag) \
|
||||||
|
do { \
|
||||||
|
switch(frag->sr_desc.opcode) { \
|
||||||
|
case VAPI_SEND: \
|
||||||
|
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) { \
|
||||||
|
BTL_ERROR(("error in posting pending send\n")); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
case VAPI_RDMA_WRITE: \
|
||||||
|
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t*) mvapi_btl, \
|
||||||
|
frag->endpoint, \
|
||||||
|
(mca_btl_base_descriptor_t*) frag)) { \
|
||||||
|
BTL_ERROR(("error in posting pending rdma write\n")); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
case VAPI_RDMA_READ: \
|
||||||
|
if(OMPI_SUCCESS != mca_btl_mvapi_get((mca_btl_base_module_t *) mvapi_btl, \
|
||||||
|
frag->endpoint, \
|
||||||
|
(mca_btl_base_descriptor_t*) frag)) { \
|
||||||
|
BTL_ERROR(("error in posting pending rdma read\n")); \
|
||||||
|
} \
|
||||||
|
break; \
|
||||||
|
default: \
|
||||||
|
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->sr_desc.opcode)); \
|
||||||
|
break; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
struct mca_btl_mvapi_module_t;
|
struct mca_btl_mvapi_module_t;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user