1
1

intial checking of SRQ flow control support for mvapi

This commit was SVN r7796.
Этот коммит содержится в:
Galen Shipman 2005-10-18 14:55:11 +00:00
родитель d0cd752e33
Коммит 4d2d39b0a6
4 изменённых файлов: 157 добавлений и 45 удалений

Просмотреть файл

@ -127,6 +127,9 @@ int mca_btl_mvapi_add_procs(
mvapi_btl->num_peers += nprocs;
if(mca_btl_mvapi_component.use_srq) {
mvapi_btl->rd_buf_max = mca_btl_mvapi_component.ib_rr_buf_max + log2(nprocs) * mca_btl_mvapi_component.rd_per_peer;
free(mvapi_btl->rr_desc_post);
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_buf_max * sizeof(VAPI_rr_desc_t)));
mvapi_btl->rd_buf_min = mvapi_btl->rd_buf_max / 2;
}
}
return OMPI_SUCCESS;
@ -568,18 +571,26 @@ int mca_btl_mvapi_put( mca_btl_base_module_t* btl,
int rc;
mca_btl_mvapi_module_t* mvapi_btl = (mca_btl_mvapi_module_t*) btl;
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*) descriptor;
frag->endpoint = endpoint;
assert(endpoint->endpoint_state == MCA_BTL_IB_CONNECTED ||
endpoint->endpoint_state == MCA_BTL_IB_WAITING_ACK);
frag->sr_desc.opcode = VAPI_RDMA_WRITE;
/* atomically test and acquire a token */
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
if(!mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
BTL_VERBOSE(("Queing because no rdma write tokens \n"));
BTL_MVAPI_INSERT_PENDING(frag, endpoint->pending_frags_lp,
endpoint->wr_sq_tokens_lp, endpoint->endpoint_lock, rc);
rc = OMPI_SUCCESS;
} else if(mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
rc = OMPI_SUCCESS;
} else {
frag->endpoint = endpoint;
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
@ -624,14 +635,22 @@ int mca_btl_mvapi_get( mca_btl_base_module_t* btl,
assert(endpoint->endpoint_state == MCA_BTL_IB_CONNECTED ||
endpoint->endpoint_state == MCA_BTL_IB_WAITING_ACK);
frag->sr_desc.opcode = VAPI_RDMA_READ;
frag->endpoint = endpoint;
/* atomically test and acquire a token */
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
if(!mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
BTL_VERBOSE(("Queing because no rdma write tokens \n"));
BTL_MVAPI_INSERT_PENDING(frag, endpoint->pending_frags_lp,
endpoint->wr_sq_tokens_lp, endpoint->endpoint_lock, rc);
rc = OMPI_SUCCESS;
} else if(mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
rc = OMPI_SUCCESS;
} else {
frag->endpoint = endpoint;
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
frag->sr_desc.remote_addr = (VAPI_virt_addr_t) (MT_virt_addr_t) frag->base.des_src->seg_addr.pval;
frag->sr_desc.r_key = frag->base.des_src->seg_key.key32[0];

Просмотреть файл

@ -118,6 +118,7 @@ struct mca_btl_mvapi_component_t {
uint32_t ib_src_path_bits;
/* number of send tokes available */
uint32_t max_wr_sq_tokens;
uint32_t max_total_wr_sq_tokens;
}; typedef struct mca_btl_mvapi_component_t mca_btl_mvapi_component_t;
@ -174,44 +175,60 @@ struct mca_btl_mvapi_module_t {
uint32_t num_peers;
uint32_t rd_buf_max;
uint32_t rd_buf_min;
int32_t wr_sq_tokens_hp;
/**< number of high priority frags that can be outstanding (down counter) */
int32_t wr_sq_tokens_lp;
/**< number of low priority frags that can be outstanding (down counter) */
opal_list_t pending_frags_hp;
/**< list of pending high priority frags */
opal_list_t pending_frags_lp;
/**< list of pending low priority frags */
}; typedef struct mca_btl_mvapi_module_t mca_btl_mvapi_module_t;
#define MCA_BTL_MVAPI_POST_SRR_HIGH(post_srr_high_mvapi_btl, \
post_srr_high_additional) \
#define MCA_BTL_MVAPI_POST_SRR_HIGH(mvapi_btl, \
additional) \
{ \
OPAL_THREAD_LOCK(&post_srr_high_mvapi_btl->ib_lock); \
if(post_srr_high_mvapi_btl->srr_posted_high <= mca_btl_mvapi_component.ib_rr_buf_min+post_srr_high_additional && \
post_srr_high_mvapi_btl->srr_posted_high < mca_btl_mvapi_component.ib_rr_buf_max){ \
MCA_BTL_MVAPI_POST_SRR_SUB(mca_btl_mvapi_component.ib_rr_buf_max - \
post_srr_high_mvapi_btl->srr_posted_high, \
post_srr_high_mvapi_btl, \
&post_srr_high_mvapi_btl->recv_free_eager, \
&post_srr_high_mvapi_btl->srr_posted_high, \
post_srr_high_mvapi_btl->nic, \
post_srr_high_mvapi_btl->srq_hndl_high); \
do { \
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
if(mvapi_btl->srr_posted_high <= mvapi_btl->rd_buf_min+additional && \
mvapi_btl->srr_posted_high < mvapi_btl->rd_buf_max){ \
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_buf_max - \
mvapi_btl->srr_posted_high, \
mvapi_btl, \
&mvapi_btl->recv_free_eager, \
&mvapi_btl->srr_posted_high, \
mvapi_btl->nic, \
mvapi_btl->srq_hndl_high); \
} \
OPAL_THREAD_UNLOCK(&post_srr_high_mvapi_btl->ib_lock); \
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
}while(0);\
}
#define MCA_BTL_MVAPI_POST_SRR_LOW(post_srr_low_mvapi_btl, \
post_srr_low_additional) \
#define MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, \
additional) \
{ \
OPAL_THREAD_LOCK(&post_srr_low_mvapi_btl->ib_lock); \
if(post_srr_low_mvapi_btl->srr_posted_low <= mca_btl_mvapi_component.ib_rr_buf_min+post_srr_low_additional && \
post_srr_low_mvapi_btl->srr_posted_low < mca_btl_mvapi_component.ib_rr_buf_max){ \
MCA_BTL_MVAPI_POST_SRR_SUB(mca_btl_mvapi_component.ib_rr_buf_max - \
post_srr_low_mvapi_btl->srr_posted_low, \
post_srr_low_mvapi_btl, \
&post_srr_low_mvapi_btl->recv_free_max, \
&post_srr_low_mvapi_btl->srr_posted_low, \
post_srr_low_mvapi_btl->nic, \
post_srr_low_mvapi_btl->srq_hndl_low); \
do { \
OPAL_THREAD_LOCK(&mvapi_btl->ib_lock); \
if(mvapi_btl->srr_posted_low <= mvapi_btl->rd_buf_min+additional && \
mvapi_btl->srr_posted_low < mvapi_btl->rd_buf_max){ \
MCA_BTL_MVAPI_POST_SRR_SUB(mvapi_btl->rd_buf_max - \
mvapi_btl->srr_posted_low, \
mvapi_btl, \
&mvapi_btl->recv_free_max, \
&mvapi_btl->srr_posted_low, \
mvapi_btl->nic, \
mvapi_btl->srq_hndl_low); \
} \
OPAL_THREAD_UNLOCK(&post_srr_low_mvapi_btl->ib_lock); \
OPAL_THREAD_UNLOCK(&mvapi_btl->ib_lock); \
} while(0); \
}

Просмотреть файл

@ -187,11 +187,11 @@ int mca_btl_mvapi_component_open(void)
mca_btl_mvapi_module.super.btl_exclusivity =
mca_btl_mvapi_param_register_int ("exclusivity", MCA_BTL_EXCLUSIVITY_DEFAULT);
mca_btl_mvapi_module.super.btl_eager_limit =
mca_btl_mvapi_param_register_int ("eager_limit", (64*1024))
mca_btl_mvapi_param_register_int ("eager_limit", (32*1024))
- sizeof(mca_btl_mvapi_header_t);
mca_btl_mvapi_module.super.btl_min_send_size =
mca_btl_mvapi_param_register_int ("min_send_size", (64*1024))
mca_btl_mvapi_param_register_int ("min_send_size", (32*1024))
- sizeof(mca_btl_mvapi_header_t);
mca_btl_mvapi_module.super.btl_max_send_size =
@ -221,7 +221,14 @@ int mca_btl_mvapi_component_open(void)
16,
&(mca_btl_mvapi_component.max_wr_sq_tokens));
mca_base_param_reg_int(&mca_btl_mvapi_component.super.btl_version,
"max_total_wr_sq_tokens",
"Maximum number of send/rdma work request tokens peer btl",
false,
false,
32,
&(mca_btl_mvapi_component.max_total_wr_sq_tokens));
mca_btl_mvapi_component.max_send_size = mca_btl_mvapi_module.super.btl_max_send_size;
mca_btl_mvapi_component.eager_limit = mca_btl_mvapi_module.super.btl_eager_limit;
@ -413,10 +420,17 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
mvapi_btl = &mca_btl_mvapi_component.mvapi_btls[i];
mvapi_btl->rd_buf_max = mca_btl_mvapi_component.ib_rr_buf_max;
mvapi_btl->rd_buf_min = mca_btl_mvapi_component.ib_rr_buf_min;
mvapi_btl->num_peers = 0;
mvapi_btl->wr_sq_tokens_hp =
mvapi_btl->wr_sq_tokens_lp = mca_btl_mvapi_component.max_total_wr_sq_tokens;
/* Initialize module state */
OBJ_CONSTRUCT(&mvapi_btl->pending_frags_hp, opal_list_t);
OBJ_CONSTRUCT(&mvapi_btl->pending_frags_lp, opal_list_t);
OBJ_CONSTRUCT(&mvapi_btl->ib_lock, opal_mutex_t);
OBJ_CONSTRUCT(&mvapi_btl->send_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->send_free_max, ompi_free_list_t);
@ -577,8 +591,9 @@ int mca_btl_mvapi_component_progress()
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
count++;
/* check and see if we need to progress pending sends */
if(OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_hp, 1) > 0
&& !opal_list_is_empty(&(frag->endpoint->pending_frags_hp))) {
if( !mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_hp, 1) > 0
&& !opal_list_is_empty(&(frag->endpoint->pending_frags_hp))) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&frag->endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_hp));
@ -589,6 +604,17 @@ int mca_btl_mvapi_component_progress()
BTL_ERROR(("error in posting pending send\n"));
}
}
if( mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp, 1) > 0
&& !opal_list_is_empty(&mvapi_btl->pending_frags_hp)) {
opal_list_item_t *frag_item;
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_hp);
frag = (mca_btl_mvapi_frag_t *) frag_item;
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
BTL_ERROR(("error in posting pending send\n"));
}
}
break;
@ -648,8 +674,9 @@ int mca_btl_mvapi_component_progress()
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
count++;
/* check and see if we need to progress pending sends */
if(OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_lp, 1) > 0
&& !opal_list_is_empty(&(frag->endpoint->pending_frags_lp))) {
if(!mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&frag->endpoint->wr_sq_tokens_lp, 1) > 0 &&
!opal_list_is_empty(&(frag->endpoint->pending_frags_lp))) {
opal_list_item_t *frag_item;
OPAL_THREAD_LOCK(&frag->endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(frag->endpoint->pending_frags_lp));
@ -679,6 +706,36 @@ int mca_btl_mvapi_component_progress()
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->sr_desc.opcode));
}
}
if(mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp, 1) > 0
&& !opal_list_is_empty(&mvapi_btl->pending_frags_lp)) {
opal_list_item_t *frag_item;
frag_item = opal_list_remove_first(&mvapi_btl->pending_frags_lp);
frag = (mca_btl_mvapi_frag_t *) frag_item;
switch(frag->sr_desc.opcode){
case VAPI_SEND:
if(OMPI_SUCCESS != mca_btl_mvapi_endpoint_send(frag->endpoint, frag)) {
BTL_ERROR(("error in posting pending send\n"));
}
break;
case VAPI_RDMA_WRITE:
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t*) mvapi_btl,
frag->endpoint,
(mca_btl_base_descriptor_t*) frag)) {
BTL_ERROR(("error in posting pending rdma write\n"));
}
break;
case VAPI_RDMA_READ:
if(OMPI_SUCCESS != mca_btl_mvapi_put((mca_btl_base_module_t *) mvapi_btl,
frag->endpoint,
(mca_btl_base_descriptor_t*) frag)) {
BTL_ERROR(("error in posting pending rdma read\n"));
}
break;
default:
BTL_ERROR(("error in posting pending operation, invalide opcode %d\n", frag->sr_desc.opcode));
}
}
break;

Просмотреть файл

@ -71,12 +71,23 @@ static inline int mca_btl_mvapi_endpoint_post_send(
if(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY && frag->size <= mvapi_btl->super.btl_eager_limit){
/* atomically test and acquire a token */
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,-1) < 0) {
if(!mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,-1) < 0) {
BTL_VERBOSE(("Queing because no send tokens \n"));
opal_list_append(&endpoint->pending_frags_hp, (opal_list_item_t *)frag);
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_hp,1);
/* OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); */
/* mca_btl_mvapi_component_progress(); */
/* OPAL_THREAD_LOCK(&endpoint->endpoint_lock); */
return OMPI_SUCCESS;
} else {
} else if( mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp,-1) < 0) {
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_hp,1);
opal_list_append(&mvapi_btl->pending_frags_hp, (opal_list_item_t *)frag);
return OMPI_SUCCESS;
}else {
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_high;
qp_hndl = endpoint->lcl_qp_hndl_high;
}
@ -84,10 +95,18 @@ static inline int mca_btl_mvapi_endpoint_post_send(
} else {
/* atomically test and acquire a token */
if(OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0) {
if(!mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,-1) < 0 ) {
BTL_VERBOSE(("Queing because no send tokens \n"));
opal_list_append(&endpoint->pending_frags_lp, (opal_list_item_t *)frag);
OPAL_THREAD_ADD32(&endpoint->wr_sq_tokens_lp,1);
return OMPI_SUCCESS;
} else if(mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,-1) < 0) {
OPAL_THREAD_ADD32(&mvapi_btl->wr_sq_tokens_lp,1);
opal_list_append(&mvapi_btl->pending_frags_lp, (opal_list_item_t *)frag);
return OMPI_SUCCESS;
} else {
frag->sr_desc.remote_qp = endpoint->rem_info.rem_qp_num_low;
@ -761,8 +780,8 @@ int mca_btl_mvapi_endpoint_connect(
}
if(mca_btl_mvapi_component.use_srq) {
MCA_BTL_MVAPI_POST_SRR_HIGH(endpoint->endpoint_btl, 1);
MCA_BTL_MVAPI_POST_SRR_LOW(endpoint->endpoint_btl, 1);
MCA_BTL_MVAPI_POST_SRR_HIGH(endpoint->endpoint_btl, 0);
MCA_BTL_MVAPI_POST_SRR_LOW(endpoint->endpoint_btl, 0);
} else {
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 0);
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 0);