intial checking of SRQ flow control support for mvapi
This commit was SVN r7796.
Этот коммит содержится в:
родитель
d0cd752e33
Коммит
4d2d39b0a6
@ -127,6 +127,9 @@ int mca_btl_mvapi_add_procs(
|
||||
mvapi_btl->num_peers += nprocs;
|
||||
if(mca_btl_mvapi_component.use_srq) {
|
||||
mvapi_btl->rd_buf_max = mca_btl_mvapi_component.ib_rr_buf_max + log2(nprocs) * mca_btl_mvapi_component.rd_per_peer;
|
||||
free(mvapi_btl->rr_desc_post);
|
||||
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_buf_max * sizeof(VAPI_rr_desc_t)));
|
||||
mvapi_btl->rd_buf_min = mvapi_btl->rd_buf_max / 2;
|
||||
}
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
@ -568,18 +571,26 @@ int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
|
||||
int rc;
|
||||
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
|
||||
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
assert(endpoint->endpoint_state == MCA_BTL_IB_CONNECTED ||
|
||||
endpoint->endpoint_state == MCA_BTL_IB_WAITING_ACK);
|
||||
frag->sr_desc.opcode = VAPI_RDMA_WRITE;
|
||||
/* atomically test and acquire a token */
|
||||
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
|
||||
if(!mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
|
||||
BTL_VERBOSE(("Queing because no rdma write tokens \n"));
|
||||
BTL_MVAPI_INSERT_PENDING(frag, endpoint->pending_frags_lp,
|
||||
endpoint->wr_sq_tokens_lp, endpoint->endpoint_lock, rc);
|
||||
rc = OMPI_SUCCESS;
|
||||
} else if(mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
|
||||
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
} else {
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
|
||||
|
||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
|
||||
@ -624,14 +635,22 @@ int mca_btl_mvapi_get( mca_btl_base_module_t* btl,
|
||||
assert(endpoint->endpoint_state == MCA_BTL_IB_CONNECTED ||
|
||||
endpoint->endpoint_state == MCA_BTL_IB_WAITING_ACK);
|
||||
frag->sr_desc.opcode = VAPI_RDMA_READ;
|
||||
|
||||
frag->endpoint = endpoint;
|
||||
/* atomically test and acquire a token */
|
||||
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
|
||||
if(!mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
|
||||
BTL_VERBOSE(("Queing because no rdma write tokens \n"));
|
||||
BTL_MVAPI_INSERT_PENDING(frag, endpoint->pending_frags_lp,
|
||||
endpoint->wr_sq_tokens_lp, endpoint->endpoint_lock, rc);
|
||||
rc = OMPI_SUCCESS;
|
||||
} else if(mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
|
||||
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
} else {
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
|
||||
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
|
||||
frag->sr_desc.r_key = frag->base.des_src->seg_key.key32[0];
|
||||
|
@ -118,6 +118,7 @@ struct mca_btl_mvapi_component_t {
|
||||
uint32_t ib_src_path_bits;
|
||||
/* number of send tokes available */
|
||||
uint32_t max_wr_sq_tokens;
|
||||
uint32_t max_total_wr_sq_tokens;
|
||||
|
||||
}; typedef struct mca_btl_mvapi_component_t mca_btl_mvapi_component_t;
|
||||
|
||||
@ -174,44 +175,60 @@ struct mca_btl_mvapi_module_t {
|
||||
|
||||
uint32_t num_peers;
|
||||
uint32_t rd_buf_max;
|
||||
|
||||
uint32_t rd_buf_min;
|
||||
|
||||
int32_t wr_sq_tokens_hp;
|
||||
/**< number of high priority frags that can be outstanding (down counter) */
|
||||
int32_t wr_sq_tokens_lp;
|
||||
/**< number of low priority frags that can be outstanding (down counter) */
|
||||
|
||||
opal_list_t pending_frags_hp;
|
||||
/**< list of pending high priority frags */
|
||||
|
||||
opal_list_t pending_frags_lp;
|
||||
/**< list of pending low priority frags */
|
||||
|
||||
|
||||
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
|
||||
|
||||
|
||||
|
||||
#define MCA_BTL_MVAPI_POST_SRR_HIGH(post_srr_high_mvapi_btl, \
|
||||
post_srr_high_additional) \
|
||||
#define MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, \
|
||||
additional) \
|
||||
{ \
|
||||
OPAL_THREAD_LOCK(&post_srr_high_mvapi_btl->ib_lock); \
|
||||
if(post_srr_high_mvapi_btl->srr_posted_high <= mca_btl_mvapi_component.ib_rr_buf_min+post_srr_high_additional && \
|
||||
post_srr_high_mvapi_btl->srr_posted_high < mca_btl_mvapi_component.ib_rr_buf_max){ \
|
||||
MCA_BTL_MVAPI_POST_SRR_SUB(mca_btl_mvapi_component.ib_rr_buf_max - \
|
||||
post_srr_high_mvapi_btl->srr_posted_high, \
|
||||
post_srr_high_mvapi_btl, \
|
||||
&post_srr_high_mvapi_btl->recv_free_eager, \
|
||||
&post_srr_high_mvapi_btl->srr_posted_high, \
|
||||
post_srr_high_mvapi_btl->nic, \
|
||||
post_srr_high_mvapi_btl->srq_hndl_high); \
|
||||
do { \
|
||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
||||
if(mvapi_btl->srr_posted_high <= mvapi_btl->rd_buf_min+additional && \
|
||||
mvapi_btl->srr_posted_high < mvapi_btl->rd_buf_max){ \
|
||||
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_buf_max - \
|
||||
mvapi_btl->srr_posted_high, \
|
||||
mvapi_btl, \
|
||||
&mvapi_btl->recv_free_eager, \
|
||||
&mvapi_btl->srr_posted_high, \
|
||||
mvapi_btl->nic, \
|
||||
mvapi_btl->srq_hndl_high); \
|
||||
} \
|
||||
OPAL_THREAD_UNLOCK(&post_srr_high_mvapi_btl->ib_lock); \
|
||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
||||
}while(0);\
|
||||
}
|
||||
|
||||
#define MCA_BTL_MVAPI_POST_SRR_LOW(post_srr_low_mvapi_btl, \
|
||||
post_srr_low_additional) \
|
||||
#define MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, \
|
||||
additional) \
|
||||
{ \
|
||||
OPAL_THREAD_LOCK(&post_srr_low_mvapi_btl->ib_lock); \
|
||||
if(post_srr_low_mvapi_btl->srr_posted_low <= mca_btl_mvapi_component.ib_rr_buf_min+post_srr_low_additional && \
|
||||
post_srr_low_mvapi_btl->srr_posted_low < mca_btl_mvapi_component.ib_rr_buf_max){ \
|
||||
MCA_BTL_MVAPI_POST_SRR_SUB(mca_btl_mvapi_component.ib_rr_buf_max - \
|
||||
post_srr_low_mvapi_btl->srr_posted_low, \
|
||||
post_srr_low_mvapi_btl, \
|
||||
&post_srr_low_mvapi_btl->recv_free_max, \
|
||||
&post_srr_low_mvapi_btl->srr_posted_low, \
|
||||
post_srr_low_mvapi_btl->nic, \
|
||||
post_srr_low_mvapi_btl->srq_hndl_low); \
|
||||
do { \
|
||||
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
|
||||
if(mvapi_btl->srr_posted_low <= mvapi_btl->rd_buf_min+additional && \
|
||||
mvapi_btl->srr_posted_low < mvapi_btl->rd_buf_max){ \
|
||||
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_buf_max - \
|
||||
mvapi_btl->srr_posted_low, \
|
||||
mvapi_btl, \
|
||||
&mvapi_btl->recv_free_max, \
|
||||
&mvapi_btl->srr_posted_low, \
|
||||
mvapi_btl->nic, \
|
||||
mvapi_btl->srq_hndl_low); \
|
||||
} \
|
||||
OPAL_THREAD_UNLOCK(&post_srr_low_mvapi_btl->ib_lock); \
|
||||
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
|
||||
} while(0); \
|
||||
}
|
||||
|
||||
|
||||
|
@ -187,11 +187,11 @@ int mca_btl_mvapi_component_open(void)
|
||||
mca_btl_mvapi_module.super.btl_exclusivity =
|
||||
mca_btl_mvapi_param_register_int ("exclusivity", MCA_BTL_EXCLUSIVITY_DEFAULT);
|
||||
mca_btl_mvapi_module.super.btl_eager_limit =
|
||||
mca_btl_mvapi_param_register_int ("eager_limit", (64*1024))
|
||||
mca_btl_mvapi_param_register_int ("eager_limit", (32*1024))
|
||||
- sizeof(mca_btl_mvapi_header_t);
|
||||
|
||||
mca_btl_mvapi_module.super.btl_min_send_size =
|
||||
mca_btl_mvapi_param_register_int ("min_send_size", (64*1024))
|
||||
mca_btl_mvapi_param_register_int ("min_send_size", (32*1024))
|
||||
- sizeof(mca_btl_mvapi_header_t);
|
||||
|
||||
mca_btl_mvapi_module.super.btl_max_send_size =
|
||||
@ -221,7 +221,14 @@ int mca_btl_mvapi_component_open(void)
|
||||
16,
|
||||
&(mca_btl_mvapi_component.max_wr_sq_tokens));
|
||||
|
||||
|
||||
mca_base_param_reg_int(&mca_btl_mvapi_component.super.btl_version,
|
||||
"max_total_wr_sq_tokens",
|
||||
"Maximum number of send/rdma work request tokens peer btl",
|
||||
false,
|
||||
false,
|
||||
32,
|
||||
&(mca_btl_mvapi_component.max_total_wr_sq_tokens));
|
||||
|
||||
mca_btl_mvapi_component.max_send_size = mca_btl_mvapi_module.super.btl_max_send_size;
|
||||
mca_btl_mvapi_component.eager_limit = mca_btl_mvapi_module.super.btl_eager_limit;
|
||||
|
||||
@ -413,10 +420,17 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
|
||||
|
||||
mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
|
||||
mvapi_btl->rd_buf_max = mca_btl_mvapi_component.ib_rr_buf_max;
|
||||
mvapi_btl->rd_buf_min = mca_btl_mvapi_component.ib_rr_buf_min;
|
||||
mvapi_btl->num_peers = 0;
|
||||
mvapi_btl->wr_sq_tokens_hp =
|
||||
mvapi_btl->wr_sq_tokens_lp = mca_btl_mvapi_component.max_total_wr_sq_tokens;
|
||||
|
||||
/* Initialize module state */
|
||||
|
||||
OBJ_CONSTRUCT(&mvapi_btl->pending_frags_hp, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mvapi_btl->pending_frags_lp, opal_list_t);
|
||||
|
||||
|
||||
OBJ_CONSTRUCT(&mvapi_btl->ib_lock, opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&mvapi_btl->send_free_eager, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mvapi_btl->send_free_max, ompi_free_list_t);
|
||||
@ -577,8 +591,9 @@ int mca_btl_mvapi_component_progress()
|
||||
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||
count++;
|
||||
/* check and see if we need to progress pending sends */
|
||||
if(OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_hp, 1) > 0
|
||||
&& !opal_list_is_empty(&(frag->endpoint->pending_frags_hp))) {
|
||||
if( !mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_hp, 1) > 0
|
||||
&& !opal_list_is_empty(&(frag->endpoint->pending_frags_hp))) {
|
||||
opal_list_item_t *frag_item;
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->endpoint_lock);
|
||||
frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_hp));
|
||||
@ -589,6 +604,17 @@ int mca_btl_mvapi_component_progress()
|
||||
BTL_ERROR(("error in posting pending send\n"));
|
||||
}
|
||||
}
|
||||
if( mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp, 1) > 0
|
||||
&& !opal_list_is_empty(&mvapi_btl->pending_frags_hp)) {
|
||||
opal_list_item_t *frag_item;
|
||||
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_hp);
|
||||
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
|
||||
BTL_ERROR(("error in posting pending send\n"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
break;
|
||||
|
||||
@ -648,8 +674,9 @@ int mca_btl_mvapi_component_progress()
|
||||
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
|
||||
count++;
|
||||
/* check and see if we need to progress pending sends */
|
||||
if(OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_lp, 1) > 0
|
||||
&& !opal_list_is_empty(&(frag->endpoint->pending_frags_lp))) {
|
||||
if(!mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_lp, 1) > 0 &&
|
||||
!opal_list_is_empty(&(frag->endpoint->pending_frags_lp))) {
|
||||
opal_list_item_t *frag_item;
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->endpoint_lock);
|
||||
frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_lp));
|
||||
@ -679,6 +706,36 @@ int mca_btl_mvapi_component_progress()
|
||||
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->sr_desc.opcode));
|
||||
}
|
||||
}
|
||||
if(mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp, 1) > 0
|
||||
&& !opal_list_is_empty(&mvapi_btl->pending_frags_lp)) {
|
||||
opal_list_item_t *frag_item;
|
||||
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_lp);
|
||||
frag = (mca_btl_mvapi_frag_t *) frag_item;
|
||||
switch(frag->sr_desc.opcode){
|
||||
case VAPI_SEND:
|
||||
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
|
||||
BTL_ERROR(("error in posting pending send\n"));
|
||||
}
|
||||
break;
|
||||
case VAPI_RDMA_WRITE:
|
||||
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t*) mvapi_btl,
|
||||
frag->endpoint,
|
||||
(mca_btl_base_descriptor_t*) frag)) {
|
||||
BTL_ERROR(("error in posting pending rdma write\n"));
|
||||
}
|
||||
break;
|
||||
case VAPI_RDMA_READ:
|
||||
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t *) mvapi_btl,
|
||||
frag->endpoint,
|
||||
(mca_btl_base_descriptor_t*) frag)) {
|
||||
BTL_ERROR(("error in posting pending rdma read\n"));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->sr_desc.opcode));
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
|
@ -71,12 +71,23 @@ static inline int mca_btl_mvapi_endpoint_post_send(
|
||||
if(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY && frag->size <= mvapi_btl->super.btl_eager_limit){
|
||||
|
||||
/* atomically test and acquire a token */
|
||||
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,-1) < 0) {
|
||||
if(!mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,-1) < 0) {
|
||||
BTL_VERBOSE(("Queing because no send tokens \n"));
|
||||
opal_list_append(&endpoint->pending_frags_hp, (opal_list_item_t *)frag);
|
||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,1);
|
||||
|
||||
/* OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); */
|
||||
/* mca_btl_mvapi_component_progress(); */
|
||||
/* OPAL_THREAD_LOCK(&endpoint->endpoint_lock); */
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
} else if( mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp,-1) < 0) {
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp,1);
|
||||
opal_list_append(&mvapi_btl->pending_frags_hp, (opal_list_item_t *)frag);
|
||||
return OMPI_SUCCESS;
|
||||
}else {
|
||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_high;
|
||||
qp_hndl = endpoint->lcl_qp_hndl_high;
|
||||
}
|
||||
@ -84,10 +95,18 @@ static inline int mca_btl_mvapi_endpoint_post_send(
|
||||
} else {
|
||||
|
||||
/* atomically test and acquire a token */
|
||||
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
|
||||
if(!mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0 ) {
|
||||
BTL_VERBOSE(("Queing because no send tokens \n"));
|
||||
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
|
||||
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,1);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
} else if(mca_btl_mvapi_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
|
||||
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
|
||||
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
|
||||
@ -761,8 +780,8 @@ int mca_btl_mvapi_endpoint_connect(
|
||||
}
|
||||
|
||||
if(mca_btl_mvapi_component.use_srq) {
|
||||
MCA_BTL_MVAPI_POST_SRR_HIGH(endpoint->endpoint_btl, 1);
|
||||
MCA_BTL_MVAPI_POST_SRR_LOW(endpoint->endpoint_btl, 1);
|
||||
MCA_BTL_MVAPI_POST_SRR_HIGH(endpoint->endpoint_btl, 0);
|
||||
MCA_BTL_MVAPI_POST_SRR_LOW(endpoint->endpoint_btl, 0);
|
||||
} else {
|
||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 0);
|
||||
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 0);
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user