1
1
This commit was SVN r8104.
Этот коммит содержится в:
Tim Woodall 2005-11-10 20:51:56 +00:00
родитель 4a06e8463c
Коммит 2013104d1a
6 изменённых файлов: 56 добавлений и 73 удалений

Просмотреть файл

@ -128,10 +128,12 @@ int mca_btl_mvapi_add_procs(
if( 0 == mvapi_btl->num_peers ) {
mvapi_btl->num_peers += nprocs;
if(mca_btl_mvapi_component.use_srq) {
mvapi_btl->rd_num = mca_btl_mvapi_component.rd_num + log2(nprocs) * mca_btl_mvapi_component.rd_per_peer;
mvapi_btl->rd_num = mca_btl_mvapi_component.rd_num + log2(nprocs) * mca_btl_mvapi_component.srq_rd_per_peer;
if(mvapi_btl->rd_num > mca_btl_mvapi_component.srq_rd_max)
mvapi_btl->rd_num = mca_btl_mvapi_component.srq_rd_max;
mvapi_btl->rd_low = mvapi_btl->rd_num - 1;
free(mvapi_btl->rr_desc_post);
mvapi_btl->rr_desc_post = (VAPI_rr_desc_t*) malloc((mvapi_btl->rd_num * sizeof(VAPI_rr_desc_t)));
mvapi_btl->rd_low = mvapi_btl->rd_num / 2;
}
}
return OMPI_SUCCESS;
@ -560,8 +562,7 @@ int mca_btl_mvapi_send(
mca_btl_mvapi_frag_t* frag = (mca_btl_mvapi_frag_t*)descriptor;
frag->endpoint = endpoint;
frag->hdr->tag = tag;
frag->rc = mca_btl_mvapi_endpoint_send(endpoint, frag);
return frag->rc;
return mca_btl_mvapi_endpoint_send(endpoint, frag);
}
/*
@ -785,10 +786,10 @@ int mca_btl_mvapi_module_init(mca_btl_mvapi_module_t *mvapi_btl)
mvapi_btl->srd_posted_hp = 0;
mvapi_btl->srd_posted_lp = 0;
srq_attr.pd_hndl = mvapi_btl->ptag;
srq_attr.max_outs_wr = mca_btl_mvapi_component.ib_wq_size;
srq_attr.max_outs_wr = mca_btl_mvapi_component.srq_rd_max;
srq_attr.max_sentries = mca_btl_mvapi_component.ib_sg_list_size;
srq_attr_mod.srq_limit = 16;/* mca_btl_mvapi_component.ib_wq_size; */
srq_attr_mod.srq_limit = mvapi_btl->rd_num * 0.9;
ret = VAPI_create_srq(mvapi_btl->nic,
&srq_attr,
&mvapi_btl->srq_hndl_hp,

Просмотреть файл

@ -92,13 +92,15 @@ struct mca_btl_mvapi_component_t {
int32_t rd_win; /**< ack credits when window size exceeded */
int32_t rd_rsv; /**< descriptors held in reserve for control messages */
int32_t rd_per_peer;
/* number of srq send tokes available */
int32_t srq_sd_max;
int32_t srq_rd_max;
int32_t srq_rd_per_peer;
/**< the number of recv desc posted per log(peer) in SRQ mode */
size_t eager_limit;
size_t max_send_size;
uint32_t leave_pinned;
uint32_t reg_mru_len;
uint32_t use_srq;
@ -117,9 +119,6 @@ struct mca_btl_mvapi_component_t {
uint32_t ib_service_level;
uint32_t ib_static_rate;
uint32_t ib_src_path_bits;
/* number of send tokes available */
uint32_t max_wr_sq_tokens;
uint32_t max_total_wr_sq_tokens;
}; typedef struct mca_btl_mvapi_component_t mca_btl_mvapi_component_t;
@ -156,10 +155,6 @@ struct mca_btl_mvapi_module_t {
ompi_free_list_t recv_free_eager; /**< High priority free list of buffer descriptors */
ompi_free_list_t recv_free_max; /**< Low priority free list of buffer descriptors */
opal_list_t reg_mru_list; /**< a most recently used list of mca_mpool_mvapi_registration_t
entries, this allows us to keep a working set of memory pinned */
opal_list_t repost; /**< list of buffers to repost */
opal_mutex_t ib_lock; /**< module level lock */
VAPI_rr_desc_t* rr_desc_post; /**< an array to allow posting of rr in one swoop */
@ -251,10 +246,10 @@ struct mca_btl_mvapi_module_t {
desc_post[i] = frag->rr_desc; \
}\
ret = VAPI_post_srq( nic, \
srq_hndl, \
cnt, \
desc_post, \
&rwqe_posted); \
srq_hndl, \
cnt, \
desc_post, \
&rwqe_posted); \
if(VAPI_OK != ret) { \
BTL_ERROR(("error posting receive descriptors to shared receive queue: %s",\
VAPI_strerror(ret))); \

Просмотреть файл

@ -118,9 +118,6 @@ static inline void mca_btl_mvapi_param_register_int(
int mca_btl_mvapi_component_open(void)
{
int param, value;
/* initialize state */
mca_btl_mvapi_component.ib_num_btls=0;
mca_btl_mvapi_component.mvapi_btls=NULL;
@ -143,8 +140,6 @@ int mca_btl_mvapi_component_open(void)
0, (int*) &mca_btl_mvapi_component.use_srq);
mca_btl_mvapi_param_register_int("ib_cq_size", "size of the IB completion queue",
10000, (int*) &mca_btl_mvapi_component.ib_cq_size);
mca_btl_mvapi_param_register_int("ib_wq_size", "size of the IB work queue",
10000, (int*) &mca_btl_mvapi_component.ib_wq_size);
mca_btl_mvapi_param_register_int("ib_sg_list_size", "size of IB segment list",
1, (int*) &mca_btl_mvapi_component.ib_sg_list_size);
mca_btl_mvapi_param_register_int("ib_pkey_ix", "IB pkey index",
@ -181,8 +176,13 @@ int mca_btl_mvapi_component_open(void)
8, (int*) &mca_btl_mvapi_component.rd_win);
mca_btl_mvapi_component.rd_rsv = ((mca_btl_mvapi_component.rd_num<<1)-1) / mca_btl_mvapi_component.rd_win;
mca_btl_mvapi_param_register_int("rd_per_peer", "receive descriptors posted per peer, SRQ mode only",
16, (int*) &mca_btl_mvapi_component.rd_per_peer);
mca_btl_mvapi_param_register_int("srq_rd_max", "Maximum number of receive descriptors posted per SRQ.\n",
1000, (int*) &mca_btl_mvapi_component.srq_rd_max);
mca_btl_mvapi_param_register_int("srq_rd_per_peer", "receive descriptors posted per peer, SRQ mode only",
16, (int*) &mca_btl_mvapi_component.srq_rd_per_peer);
mca_btl_mvapi_param_register_int("srq_sd_max", "Maximum number of send descriptors posted per process",
8, &mca_btl_mvapi_component.srq_sd_max);
mca_btl_mvapi_param_register_int ("exclusivity", "BTL exclusivity",
MCA_BTL_EXCLUSIVITY_DEFAULT, (int*) &mca_btl_mvapi_module.super.btl_exclusivity);
mca_btl_mvapi_param_register_int ("eager_limit", "eager send limit",
@ -202,14 +202,6 @@ int mca_btl_mvapi_component_open(void)
MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET, (int*) &mca_btl_mvapi_module.super.btl_flags);
mca_btl_mvapi_param_register_int("bandwidth", "Approximate maximum bandwidth of interconnect",
800, (int*) &mca_btl_mvapi_module.super.btl_bandwidth);
mca_btl_mvapi_param_register_int("max_wr_sq_tokens", "Maximum number of send/rdma work request tokens",
16, &mca_btl_mvapi_component.max_wr_sq_tokens);
mca_btl_mvapi_param_register_int("max_total_wr_sq_tokens", "Maximum number of send/rdma work request tokens peer btl",
32, &mca_btl_mvapi_component.max_total_wr_sq_tokens);
param = mca_base_param_find("mpi", NULL, "leave_pinned");
mca_base_param_lookup_int(param, &value);
mca_btl_mvapi_component.leave_pinned = value;
mca_btl_mvapi_component.max_send_size = mca_btl_mvapi_module.super.btl_max_send_size;
mca_btl_mvapi_component.eager_limit = mca_btl_mvapi_module.super.btl_eager_limit;
@ -430,7 +422,7 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
mvapi_btl->rd_num = mca_btl_mvapi_component.rd_num + mca_btl_mvapi_component.rd_rsv;
mvapi_btl->rd_low = mca_btl_mvapi_component.rd_low;
mvapi_btl->num_peers = 0;
mvapi_btl->sd_tokens_hp = mvapi_btl->sd_tokens_lp = mca_btl_mvapi_component.max_wr_sq_tokens;
mvapi_btl->sd_tokens_hp = mvapi_btl->sd_tokens_lp = mca_btl_mvapi_component.srq_sd_max;
/* Initialize module state */
@ -446,11 +438,6 @@ mca_btl_base_module_t** mca_btl_mvapi_component_init(int *num_btl_modules,
OBJ_CONSTRUCT(&mvapi_btl->recv_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->recv_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&mvapi_btl->repost, opal_list_t);
OBJ_CONSTRUCT(&mvapi_btl->reg_mru_list, opal_list_t);
if(mca_btl_mvapi_module_init(mvapi_btl) != OMPI_SUCCESS) {
free(hca_ids);
return NULL;
@ -596,10 +583,7 @@ int mca_btl_mvapi_component_progress()
/* Process a completed send */
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
frag->rc = OMPI_SUCCESS;
frag->base.des_cbfunc(&mvapi_btl->super, endpoint, &frag->base, frag->rc);
count++;
frag->base.des_cbfunc(&mvapi_btl->super, endpoint, &frag->base, OMPI_SUCCESS);
/* check and see if we need to progress pending sends */
if( mca_btl_mvapi_component.use_srq &&
@ -612,6 +596,7 @@ int mca_btl_mvapi_component_progress()
BTL_ERROR(("error in posting pending send\n"));
}
}
count++;
break;
case VAPI_CQE_RQ_SEND_DATA:
@ -621,6 +606,12 @@ int mca_btl_mvapi_component_progress()
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
credits = frag->hdr->credits;
/* advance the segment address past the header and subtract from the length..*/
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
/* call registered callback */
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_eager), (opal_list_item_t*) frag);
/* repost receive descriptors */
if(mca_btl_mvapi_component.use_srq) {
OPAL_THREAD_ADD32(&mvapi_btl->srd_posted_hp, -1);
@ -630,13 +621,6 @@ int mca_btl_mvapi_component_progress()
MCA_BTL_MVAPI_ENDPOINT_POST_RR_HIGH(endpoint, 0);
}
/* advance the segment address past the header and subtract from the length..*/
frag->rc=OMPI_SUCCESS;
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
/* call registered callback */
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_eager), (opal_list_item_t*) frag);
/* check to see if we need to progress any pending desciptors */
if( !mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp, credits) > 0
@ -659,7 +643,11 @@ int mca_btl_mvapi_component_progress()
/* check to see if we need to return credits */
if( !mca_btl_mvapi_component.use_srq &&
endpoint->rd_credits_hp >= mca_btl_mvapi_component.rd_win) {
mca_btl_mvapi_endpoint_send_credits(endpoint, endpoint->lcl_qp_hndl_hp, endpoint->rem_info.rem_qp_num_hp, &endpoint->rd_credits_hp);
mca_btl_mvapi_endpoint_send_credits(
endpoint,
endpoint->lcl_qp_hndl_hp,
endpoint->rem_info.rem_qp_num_hp,
&endpoint->rd_credits_hp);
}
count++;
break;
@ -691,8 +679,7 @@ int mca_btl_mvapi_component_progress()
/* Process a completed send - receiver must return tokens */
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
frag->rc = OMPI_SUCCESS;
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, OMPI_SUCCESS);
count++;
/* if we have tokens, process pending sends */
@ -714,12 +701,10 @@ int mca_btl_mvapi_component_progress()
case VAPI_CQE_SQ_RDMA_WRITE:
/* Process a completed write - returns send tokens immediately */
/* Process a completed write - returns tokens immediately */
frag = (mca_btl_mvapi_frag_t*) (unsigned long) comp.id;
endpoint = frag->endpoint;
frag->rc = OMPI_SUCCESS;
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, frag->rc);
count++;
frag->base.des_cbfunc(&mvapi_btl->super, frag->endpoint, &frag->base, OMPI_SUCCESS);
if(mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&mvapi_btl->sd_tokens_lp, 1) > 0
@ -739,6 +724,7 @@ int mca_btl_mvapi_component_progress()
frag = (mca_btl_mvapi_frag_t *) frag_item;
MCA_BTL_IB_FRAG_PROGRESS(frag);
}
count++;
break;
case VAPI_CQE_RQ_SEND_DATA:
@ -747,7 +733,14 @@ int mca_btl_mvapi_component_progress()
endpoint = (mca_btl_mvapi_endpoint_t*) frag->endpoint;
credits = frag->hdr->credits;
/* post descriptors before processing receive */
/* process received frag */
frag->rc=OMPI_SUCCESS;
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
/* call registered callback */
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_max), (opal_list_item_t*) frag);
/* post descriptors */
if(mca_btl_mvapi_component.use_srq) {
OPAL_THREAD_ADD32(&mvapi_btl->srd_posted_lp, -1);
MCA_BTL_MVAPI_POST_SRR_LOW(mvapi_btl, 0);
@ -756,13 +749,6 @@ int mca_btl_mvapi_component_progress()
MCA_BTL_MVAPI_ENDPOINT_POST_RR_LOW(endpoint, 0);
}
/* process received frag */
frag->rc=OMPI_SUCCESS;
frag->segment.seg_len = comp.byte_len-((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
/* advance the segment address past the header and subtract from the length..*/
mvapi_btl->ib_reg[frag->hdr->tag].cbfunc(&mvapi_btl->super, frag->hdr->tag, &frag->base, mvapi_btl->ib_reg[frag->hdr->tag].cbdata);
OMPI_FREE_LIST_RETURN(&(mvapi_btl->recv_free_max), (opal_list_item_t*) frag);
/* check to see if we need to progress pending descriptors */
if(!mca_btl_mvapi_component.use_srq &&
OPAL_THREAD_ADD32(&endpoint->sd_tokens_lp, credits) > 0 &&
@ -781,7 +767,11 @@ int mca_btl_mvapi_component_progress()
/* check to see if we need to return credits */
if( !mca_btl_mvapi_component.use_srq &&
endpoint->rd_credits_lp >= mca_btl_mvapi_component.rd_win) {
mca_btl_mvapi_endpoint_send_credits(endpoint, endpoint->lcl_qp_hndl_lp, endpoint->rem_info.rem_qp_num_lp, &endpoint->rd_credits_lp);
mca_btl_mvapi_endpoint_send_credits(
endpoint,
endpoint->lcl_qp_hndl_lp,
endpoint->rem_info.rem_qp_num_lp,
&endpoint->rd_credits_lp);
}
count++;
break;

Просмотреть файл

@ -176,6 +176,7 @@ static void mca_btl_mvapi_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
endpoint->sd_tokens_hp = mca_btl_mvapi_component.rd_num;
endpoint->sd_tokens_lp = mca_btl_mvapi_component.rd_num;
endpoint->get_tokens = mca_btl_mvapi_component.ib_qp_ous_rd_atom;
endpoint->rem_info.rem_qp_num_hp = 0;
endpoint->rem_info.rem_qp_num_lp = 0;
endpoint->rem_info.rem_lid = 0;
@ -1037,7 +1038,7 @@ static void mca_btl_mvapi_endpoint_control_cb(
}
/**
*
* Return credits to peer
*/
void mca_btl_mvapi_endpoint_send_credits(
@ -1050,7 +1051,6 @@ void mca_btl_mvapi_endpoint_send_credits(
mca_btl_mvapi_frag_t* frag;
int rc;
/* fprintf(stderr, "sending credits %d\n", *credits); */
MCA_BTL_IB_FRAG_ALLOC_EAGER(btl, frag, rc);
if(NULL == frag) {
BTL_ERROR(("error allocating fragment"));

Просмотреть файл

@ -237,9 +237,7 @@ do { \
BTL_ERROR(("error posting receive descriptors: %s",\
VAPI_strerror(rc))); \
} else { \
/* fprintf(stderr, "posting: %d to %d\n", num_post, rd_posted); */ \
OPAL_THREAD_ADD32(&(rd_posted), num_post); \
/* fprintf(stderr, "credits: %d to %d\n", num_post, rd_credits); */ \
OPAL_THREAD_ADD32(&(rd_credits), num_post); \
}\
} while(0); \

Просмотреть файл

@ -25,11 +25,10 @@
static void mca_btl_mvapi_frag_common_constructor( mca_btl_mvapi_frag_t* frag)
{
mca_mpool_mvapi_registration_t* mem_hndl = (mca_mpool_mvapi_registration_t*) frag->base.super.user_data;
frag->hdr = (mca_btl_mvapi_header_t*) (frag+1); /* initialize btl header to point to start at end of frag */
frag->hdr = (mca_btl_mvapi_header_t*) (frag+1); /* initialize btl header to start at end of frag */
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_mvapi_header_t);
/* init the segment address to start after the btl header */
/* frag->mem_hndl = mem_hndl->hndl; */
frag->segment.seg_len = frag->size;
frag->segment.seg_key.key32[0] = (uint32_t) mem_hndl->l_key;
frag->sg_entry.lkey = mem_hndl->l_key;