1
1
Moved a lot of the module-specific init from the component init to the module init.

Try keeping a pointer to reduce indexing, didn't seem to help - leaving in place
for now.

This commit was SVN r10485.
Этот коммит содержится в:
Andrew Friedley 2006-06-22 22:12:13 +00:00
родитель 9766c01e50
Коммит 046f4cd4ae
4 изменённых файлов: 157 добавлений и 210 удалений

Просмотреть файл

@ -132,7 +132,6 @@ int mca_btl_ud_add_procs(
ud_btl->rd_num = mca_btl_ud_component.rd_num + log2(nprocs) * mca_btl_ud_component.srq_rd_per_peer; ud_btl->rd_num = mca_btl_ud_component.rd_num + log2(nprocs) * mca_btl_ud_component.srq_rd_per_peer;
if(ud_btl->rd_num > mca_btl_ud_component.srq_rd_max) if(ud_btl->rd_num > mca_btl_ud_component.srq_rd_max)
ud_btl->rd_num = mca_btl_ud_component.srq_rd_max; ud_btl->rd_num = mca_btl_ud_component.srq_rd_max;
ud_btl->rd_low = ud_btl->rd_num - 1;
} }
#endif #endif
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -154,8 +153,7 @@ int mca_btl_ud_del_procs(struct mca_btl_base_module_t* btl,
/* /*
*Register callback function to support send/recv semantics *Register callback function to support send/recv semantics
*/ */
int mca_btl_ud_register( int mca_btl_ud_register(struct mca_btl_base_module_t* btl,
struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag, mca_btl_base_tag_t tag,
mca_btl_base_module_recv_cb_fn_t cbfunc, mca_btl_base_module_recv_cb_fn_t cbfunc,
void* cbdata) void* cbdata)
@ -197,12 +195,6 @@ mca_btl_base_descriptor_t* mca_btl_ud_alloc(
return NULL; return NULL;
} }
/* TODO - how much of this is needed? */
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
return (mca_btl_base_descriptor_t*)frag; return (mca_btl_base_descriptor_t*)frag;
} }
@ -279,7 +271,8 @@ mca_btl_base_descriptor_t* mca_btl_ud_prepare_src(
ud_btl = (mca_btl_ud_module_t*) btl; ud_btl = (mca_btl_ud_module_t*) btl;
ud_reg = (mca_mpool_openib_registration_t*) registration; ud_reg = (mca_mpool_openib_registration_t*) registration;
if(OPAL_UNLIKELY(NULL != ud_reg && 0 == ompi_convertor_need_buffers(convertor))) { if(OPAL_UNLIKELY(NULL != ud_reg &&
0 == ompi_convertor_need_buffers(convertor))) {
/* the memory is already pinned and we have contiguous user data */ /* the memory is already pinned and we have contiguous user data */
MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc); MCA_BTL_IB_FRAG_ALLOC_FRAG(btl, frag, rc);
@ -450,10 +443,13 @@ int mca_btl_ud_send(
*/ */
int mca_btl_ud_module_init(mca_btl_ud_module_t *ud_btl) int mca_btl_ud_module_init(mca_btl_ud_module_t *ud_btl)
{ {
/* Allocate Protection Domain */ struct mca_mpool_base_resources_t mpool_resources;
struct ibv_context *ctx; struct ibv_context *ctx = ud_btl->ib_dev_context;
struct ibv_recv_wr* bad_wr;
ctx = ud_btl->ib_dev_context; mca_btl_ud_frag_t* frag;
ompi_free_list_item_t* item;
uint32_t length;
int32_t rc, i;
ud_btl->ib_pd = ibv_alloc_pd(ctx); ud_btl->ib_pd = ibv_alloc_pd(ctx);
if(NULL == ud_btl->ib_pd) { if(NULL == ud_btl->ib_pd) {
@ -463,6 +459,17 @@ int mca_btl_ud_module_init(mca_btl_ud_module_t *ud_btl)
return OMPI_ERROR; return OMPI_ERROR;
} }
mpool_resources.ib_pd = ud_btl->ib_pd;
ud_btl->super.btl_mpool =
mca_mpool_base_module_create(mca_btl_ud_component.ib_mpool_name,
&ud_btl->super, &mpool_resources);
if(NULL == ud_btl->super.btl_mpool) {
BTL_ERROR(("error creating openib memory pool! aborting ud btl initialization"));
return OMPI_ERROR;
}
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ #ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_ud_component.use_srq) { if(mca_btl_ud_component.use_srq) {
struct ibv_srq_init_attr attr; struct ibv_srq_init_attr attr;
@ -494,32 +501,26 @@ int mca_btl_ud_module_init(mca_btl_ud_module_t *ud_btl)
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3 #if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
ud_btl->ib_cq_lp = ud_btl->ib_cq_lp =
ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size, NULL); ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size, NULL);
ud_btl->ib_cq_hp =
ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size, NULL);
#else #else
ud_btl->ib_cq_lp = ud_btl->ib_cq_lp = ibv_create_cq(ctx,
ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size, mca_btl_ud_component.ib_cq_size, NULL, NULL, 0);
NULL, NULL, 0);
ud_btl->ib_cq_hp = ibv_create_cq(ctx,
mca_btl_ud_component.ib_cq_size, NULL, NULL, 0);
#endif #endif
if(NULL == ud_btl->ib_cq_lp) { if(NULL == ud_btl->ib_cq_lp) {
BTL_ERROR(("error creating low priority cq for %s errno says %s\n", BTL_ERROR(("error creating low priority cq for %s errno says %s\n",
ibv_get_device_name(ud_btl->ib_dev), ibv_get_device_name(ud_btl->ib_dev), strerror(errno)));
strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
#if OMPI_MCA_BTL_OPENIB_IBV_CREATE_CQ_ARGS == 3
ud_btl->ib_cq_hp =
ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size, NULL);
#else
ud_btl->ib_cq_hp =
ibv_create_cq(ctx, mca_btl_ud_component.ib_cq_size,
NULL, NULL, 0);
#endif
if(NULL == ud_btl->ib_cq_hp) { if(NULL == ud_btl->ib_cq_hp) {
BTL_ERROR(("error creating high priority cq for %s errno says %s\n", BTL_ERROR(("error creating high priority cq for %s errno says %s\n",
ibv_get_device_name(ud_btl->ib_dev), ibv_get_device_name(ud_btl->ib_dev), strerror(errno)));
strerror(errno)));
return OMPI_ERROR; return OMPI_ERROR;
} }
@ -544,6 +545,95 @@ int mca_btl_ud_module_init(mca_btl_ud_module_t *ud_btl)
return OMPI_ERROR; return OMPI_ERROR;
} }
OBJ_CONSTRUCT(&ud_btl->ib_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ud_btl->send_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->send_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->send_free_frag, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->recv_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->recv_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->pending_frags_hp, opal_list_t);
OBJ_CONSTRUCT(&ud_btl->pending_frags_lp, opal_list_t);
/* Initialize pool of send fragments */
length = sizeof(mca_btl_ud_frag_t) + sizeof(mca_btl_ud_header_t) +
ud_btl->super.btl_eager_limit + 2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_eager,
length,
OBJ_CLASS(mca_btl_ud_send_frag_eager_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
ompi_free_list_init(&ud_btl->recv_free_eager,
length + sizeof(mca_btl_ud_ib_header_t),
OBJ_CLASS(mca_btl_ud_recv_frag_eager_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
length = sizeof(mca_btl_ud_frag_t) + sizeof(mca_btl_ud_header_t) +
ud_btl->super.btl_max_send_size + 2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_max,
length,
OBJ_CLASS(mca_btl_ud_send_frag_max_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
/* Initialize pool of receive fragments */
ompi_free_list_init (&ud_btl->recv_free_max,
length + sizeof(mca_btl_ud_ib_header_t),
OBJ_CLASS (mca_btl_ud_recv_frag_max_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
length = sizeof(mca_btl_ud_frag_t) +
sizeof(mca_btl_ud_header_t) + 2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_frag,
length,
OBJ_CLASS(mca_btl_ud_send_frag_frag_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
/* Post receive descriptors */
for(i = 0; i < ud_btl->rd_num; i++) {
OMPI_FREE_LIST_WAIT(&ud_btl->recv_free_eager, item, rc);
frag = (mca_btl_ud_frag_t*)item;
frag->sg_entry.length = frag->size +
sizeof(mca_btl_ud_header_t) +
sizeof(mca_btl_ud_ib_header_t);
if(ibv_post_recv(ud_btl->qp_hp,
&frag->wr_desc.rd_desc, &bad_wr)) {
BTL_ERROR(("error posting recv, errno %s\n", strerror(errno)));
return OMPI_ERROR;
}
OMPI_FREE_LIST_WAIT(&ud_btl->recv_free_max, item, rc);
frag = (mca_btl_ud_frag_t*)item;
frag->sg_entry.length = frag->size +
sizeof(mca_btl_ud_header_t) +
sizeof(mca_btl_ud_ib_header_t);
if(ibv_post_recv(ud_btl->qp_lp,
&frag->wr_desc.rd_desc, &bad_wr)) {
BTL_ERROR(("error posting recv, errno %s\n", strerror(errno)));
return OMPI_ERROR;
}
}
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -87,7 +87,6 @@ struct mca_btl_ud_component_t {
int32_t sd_num; /**< maximum number of send descriptors to post to a QP */ int32_t sd_num; /**< maximum number of send descriptors to post to a QP */
int32_t rd_num; /**< number of receive descriptors to post to each QP */ int32_t rd_num; /**< number of receive descriptors to post to each QP */
int32_t rd_low; /**< low water mark to reach before re-posting receive descriptors */
int32_t srq_rd_max; /* maximum number of receive descriptors posted */ int32_t srq_rd_max; /* maximum number of receive descriptors posted */
int32_t srq_rd_per_peer; /* number of receive descriptors to post per log2(peers) in SRQ mode */ int32_t srq_rd_per_peer; /* number of receive descriptors to post per log2(peers) in SRQ mode */
@ -96,7 +95,9 @@ struct mca_btl_ud_component_t {
size_t eager_limit; size_t eager_limit;
size_t max_send_size; size_t max_send_size;
uint32_t reg_mru_len; uint32_t reg_mru_len;
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
uint32_t use_srq; uint32_t use_srq;
#endif
uint32_t ib_cq_size; /**< Max outstanding CQE on the CQ */ uint32_t ib_cq_size; /**< Max outstanding CQE on the CQ */
uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ*/ uint32_t ib_sg_list_size; /**< Max scatter/gather descriptor entries on the WQ*/
@ -118,7 +119,7 @@ typedef mca_btl_base_recv_reg_t mca_btl_ud_recv_reg_t;
*/ */
#if OMPI_ENABLE_DEBUG #if OMPI_ENABLE_DEBUG
#define MCA_BTL_UD_ENABLE_PROFILE 1 #define MCA_BTL_UD_ENABLE_PROFILE 0
#else #else
#define MCA_BTL_UD_ENABLE_PROFILE 0 #define MCA_BTL_UD_ENABLE_PROFILE 0
#endif #endif
@ -184,10 +185,11 @@ struct mca_btl_ud_module_t {
#endif #endif
int32_t rd_num; int32_t rd_num;
int32_t rd_low;
#if 0
int32_t rd_posted_hp; /**< number of high priority descriptors posted */ int32_t rd_posted_hp; /**< number of high priority descriptors posted */
int32_t rd_posted_lp; /**< number of low priority descriptors posted */ int32_t rd_posted_lp; /**< number of low priority descriptors posted */
#endif
int32_t sd_wqe_hp; /**< number of available send wqe entries */ int32_t sd_wqe_hp; /**< number of available send wqe entries */
int32_t sd_wqe_lp; /**< number of available send wqe entries */ int32_t sd_wqe_lp; /**< number of available send wqe entries */

Просмотреть файл

@ -151,8 +151,10 @@ int mca_btl_ud_component_open(void)
"openib", &mca_btl_ud_component.ib_mpool_name); "openib", &mca_btl_ud_component.ib_mpool_name);
mca_btl_ud_param_register_int("reg_mru_len", "length of the registration cache most recently used list", mca_btl_ud_param_register_int("reg_mru_len", "length of the registration cache most recently used list",
16, (int*) &mca_btl_ud_component.reg_mru_len); 16, (int*) &mca_btl_ud_component.reg_mru_len);
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
mca_btl_ud_param_register_int("use_srq", "if 1 use the IB shared receive queue to post receive descriptors", mca_btl_ud_param_register_int("use_srq", "if 1 use the IB shared receive queue to post receive descriptors",
0, (int*) &mca_btl_ud_component.use_srq); 0, (int*) &mca_btl_ud_component.use_srq);
#endif
mca_btl_ud_param_register_int("ib_cq_size", "size of the IB completion queue", mca_btl_ud_param_register_int("ib_cq_size", "size of the IB completion queue",
2000, (int*) &mca_btl_ud_component.ib_cq_size); 2000, (int*) &mca_btl_ud_component.ib_cq_size);
mca_btl_ud_param_register_int("ib_sg_list_size", "size of IB segment list", mca_btl_ud_param_register_int("ib_sg_list_size", "size of IB segment list",
@ -173,8 +175,6 @@ int mca_btl_ud_component_open(void)
16, (int*) &mca_btl_ud_component.sd_num); 16, (int*) &mca_btl_ud_component.sd_num);
mca_btl_ud_param_register_int("rd_num", "number of receive descriptors to post to a QP", mca_btl_ud_param_register_int("rd_num", "number of receive descriptors to post to a QP",
500, (int*) &mca_btl_ud_component.rd_num); 500, (int*) &mca_btl_ud_component.rd_num);
mca_btl_ud_param_register_int("rd_low", "low water mark before reposting occurs",
300, (int*) &mca_btl_ud_component.rd_low);
mca_btl_ud_param_register_int("srq_rd_max", "Max number of receive descriptors posted per SRQ.", mca_btl_ud_param_register_int("srq_rd_max", "Max number of receive descriptors posted per SRQ.",
1000, (int*) &mca_btl_ud_component.srq_rd_max); 1000, (int*) &mca_btl_ud_component.srq_rd_max);
mca_btl_ud_param_register_int("srq_rd_per_peer", "Number of receive descriptors posted per peer. (SRQ)", mca_btl_ud_param_register_int("srq_rd_per_peer", "Number of receive descriptors posted per peer. (SRQ)",
@ -262,10 +262,9 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int *num_btl_modules,
{ {
struct ibv_device **ib_devs; struct ibv_device **ib_devs;
struct ibv_device* ib_dev; struct ibv_device* ib_dev;
int32_t num_devs, rc; int32_t num_devs;
mca_btl_base_module_t** btls; mca_btl_base_module_t** btls;
uint32_t i,j, length; uint32_t i, j;
struct mca_mpool_base_resources_t mpool_resources;
opal_list_t btl_list; opal_list_t btl_list;
mca_btl_ud_module_t* ud_btl; mca_btl_ud_module_t* ud_btl;
mca_btl_base_selected_module_t* ib_selected; mca_btl_base_selected_module_t* ib_selected;
@ -319,7 +318,7 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int *num_btl_modules,
ib_devs[i++] = ib_dev; ib_devs[i++] = ib_dev;
#endif #endif
/** We must loop through all the hca id's, get there handles and /** We must loop through all the hca id's, get their handles and
for each hca we query the number of ports on the hca and set up for each hca we query the number of ports on the hca and set up
a distinct btl module for each hca port */ a distinct btl module for each hca port */
@ -347,7 +346,6 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int *num_btl_modules,
/* Note ports are 1 based hence j = 1 */ /* Note ports are 1 based hence j = 1 */
for(j = 1; j <= ib_dev_attr.phys_port_cnt; j++){ for(j = 1; j <= ib_dev_attr.phys_port_cnt; j++){
struct ibv_port_attr* ib_port_attr; struct ibv_port_attr* ib_port_attr;
ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr)); ib_port_attr = (struct ibv_port_attr*) malloc(sizeof(struct ibv_port_attr));
@ -383,11 +381,11 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int *num_btl_modules,
/* Allocate space for btl modules */ /* Allocate space for btl modules */
mca_btl_ud_component.ud_btls = (mca_btl_ud_module_t*) mca_btl_ud_component.ud_btls = (mca_btl_ud_module_t*)
malloc(sizeof(mca_btl_ud_module_t) * mca_btl_ud_component.ib_num_btls); malloc(sizeof(mca_btl_ud_module_t) * mca_btl_ud_component.ib_num_btls);
if(NULL == mca_btl_ud_component.ud_btls) { if(NULL == mca_btl_ud_component.ud_btls) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return NULL; return NULL;
} }
btls = (struct mca_btl_base_module_t**) btls = (struct mca_btl_base_module_t**)
malloc(mca_btl_ud_component.ib_num_btls * sizeof(mca_btl_ud_module_t*)); malloc(mca_btl_ud_component.ib_num_btls * sizeof(mca_btl_ud_module_t*));
if(NULL == btls) { if(NULL == btls) {
@ -406,23 +404,11 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int *num_btl_modules,
ud_btl = &mca_btl_ud_component.ud_btls[i]; ud_btl = &mca_btl_ud_component.ud_btls[i];
ud_btl->rd_num = mca_btl_ud_component.rd_num; ud_btl->rd_num = mca_btl_ud_component.rd_num;
ud_btl->rd_low = mca_btl_ud_component.rd_low;
ud_btl->sd_wqe_lp = mca_btl_ud_component.sd_num; ud_btl->sd_wqe_lp = mca_btl_ud_component.sd_num;
ud_btl->sd_wqe_hp = mca_btl_ud_component.sd_num; ud_btl->sd_wqe_hp = mca_btl_ud_component.sd_num;
/* Initialize module state */ /* Initialize module state */
OBJ_CONSTRUCT(&ud_btl->ib_lock, opal_mutex_t);
OBJ_CONSTRUCT(&ud_btl->send_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->send_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->send_free_frag, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->recv_free_eager, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->recv_free_max, ompi_free_list_t);
OBJ_CONSTRUCT(&ud_btl->pending_frags_hp, opal_list_t);
OBJ_CONSTRUCT(&ud_btl->pending_frags_lp, opal_list_t);
if(mca_btl_ud_module_init(ud_btl) != OMPI_SUCCESS) { if(mca_btl_ud_module_init(ud_btl) != OMPI_SUCCESS) {
#if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST #if OMPI_MCA_BTL_OPENIB_HAVE_DEVICE_LIST
ibv_free_device_list(ib_devs); ibv_free_device_list(ib_devs);
@ -432,125 +418,6 @@ mca_btl_base_module_t** mca_btl_ud_component_init(int *num_btl_modules,
return NULL; return NULL;
} }
mpool_resources.ib_pd = ud_btl->ib_pd;
/* initialize the memory pool using the hca */
ud_btl->super.btl_mpool =
mca_mpool_base_module_create(mca_btl_ud_component.ib_mpool_name,
&ud_btl->super,
&mpool_resources);
if(NULL == ud_btl->super.btl_mpool) {
BTL_ERROR(("error creating openib memory pool! aborting ud btl initialization"));
return NULL;
}
/* Initialize pool of send fragments */
length = sizeof(mca_btl_ud_frag_t) +
sizeof(mca_btl_ud_header_t) +
ud_btl->super.btl_eager_limit +
2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_eager,
length,
OBJ_CLASS(mca_btl_ud_send_frag_eager_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
ompi_free_list_init(&ud_btl->recv_free_eager,
length + sizeof(mca_btl_ud_ib_header_t),
OBJ_CLASS(mca_btl_ud_recv_frag_eager_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
length = sizeof(mca_btl_ud_frag_t) +
sizeof(mca_btl_ud_header_t) +
ud_btl->super.btl_max_send_size +
2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_max,
length,
OBJ_CLASS(mca_btl_ud_send_frag_max_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
/* Initialize pool of receive fragments */
ompi_free_list_init (&ud_btl->recv_free_max,
length + sizeof(mca_btl_ud_ib_header_t),
OBJ_CLASS (mca_btl_ud_recv_frag_max_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
length = sizeof(mca_btl_ud_frag_t) +
sizeof(mca_btl_ud_header_t)+
2*MCA_BTL_IB_FRAG_ALIGN;
ompi_free_list_init(&ud_btl->send_free_frag,
length,
OBJ_CLASS(mca_btl_ud_send_frag_frag_t),
mca_btl_ud_component.ib_free_list_num,
mca_btl_ud_component.ib_free_list_max,
mca_btl_ud_component.ib_free_list_inc,
ud_btl->super.btl_mpool);
/* Post receive descriptors */
do {
struct ibv_recv_wr* bad_wr;
for(j = 0; j < (uint32_t)ud_btl->rd_num; j++) {
mca_btl_ud_frag_t* frag;
ompi_free_list_item_t* item;
OMPI_FREE_LIST_WAIT(&ud_btl->recv_free_eager, item, rc);
frag = (mca_btl_ud_frag_t*)item;
frag->sg_entry.length = frag->size +
sizeof(mca_btl_ud_header_t) +
sizeof(mca_btl_ud_ib_header_t);
if(ibv_post_recv(ud_btl->qp_hp,
&frag->wr_desc.rd_desc, &bad_wr)) {
BTL_ERROR(("error posting recv, errno %s\n",
strerror(errno)));
return NULL;
}
OMPI_FREE_LIST_WAIT(&ud_btl->recv_free_max, item, rc);
frag = (mca_btl_ud_frag_t*)item;
frag->sg_entry.length = frag->size +
sizeof(mca_btl_ud_header_t) +
sizeof(mca_btl_ud_ib_header_t);
if(ibv_post_recv(ud_btl->qp_lp,
&frag->wr_desc.rd_desc, &bad_wr)) {
BTL_ERROR(("error posting recv, errno %s\n",
strerror(errno)));
return NULL;
}
}
} while(0);
/* TODO - Put this somewhere else or clean up our macros */
#if 0
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_ud_component.use_srq) {
MCA_BTL_UD_POST_SRR_HIGH(ud_btl, 1);
MCA_BTL_UD_POST_SRR_LOW(ud_btl, 1);
} else {
#endif
MCA_BTL_UD_ENDPOINT_POST_RR_HIGH(ud_btl, 0);
MCA_BTL_UD_ENDPOINT_POST_RR_LOW(ud_btl, 0);
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
}
#endif
#endif
btls[i] = &ud_btl->super; btls[i] = &ud_btl->super;
} }
@ -584,6 +451,7 @@ int mca_btl_ud_component_progress()
mca_btl_ud_module_t* ud_btl; mca_btl_ud_module_t* ud_btl;
mca_btl_base_recv_reg_t* reg; mca_btl_base_recv_reg_t* reg;
struct ibv_wc wc[MCA_BTL_UD_NUM_WC]; struct ibv_wc wc[MCA_BTL_UD_NUM_WC];
struct ibv_wc* cwc;
/* Poll for completions */ /* Poll for completions */
for(i = 0; i < mca_btl_ud_component.ib_num_btls; i++) { for(i = 0; i < mca_btl_ud_component.ib_num_btls; i++) {
@ -599,16 +467,17 @@ int mca_btl_ud_component_progress()
head_wr = NULL; head_wr = NULL;
for(j = 0; j < ne; j++) { for(j = 0; j < ne; j++) {
if(OPAL_UNLIKELY(wc[j].status != IBV_WC_SUCCESS)) { cwc = &wc[j];
if(OPAL_UNLIKELY(cwc->status != IBV_WC_SUCCESS)) {
BTL_ERROR(("error polling HP CQ with status %d for wr_id %llu opcode %d\n", BTL_ERROR(("error polling HP CQ with status %d for wr_id %llu opcode %d\n",
wc[j].status, wc[j].wr_id, wc[j].opcode)); cwc->status, cwc->wr_id, cwc->opcode));
return OMPI_ERROR; return OMPI_ERROR;
} }
/* Handle work completions */ /* Handle work completions */
switch(wc[j].opcode) { switch(cwc->opcode) {
case IBV_WC_SEND : case IBV_WC_SEND :
frag = (mca_btl_ud_frag_t*)(unsigned long)wc[j].wr_id; frag = (mca_btl_ud_frag_t*)(unsigned long)cwc->wr_id;
frag->base.des_cbfunc(&ud_btl->super, frag->base.des_cbfunc(&ud_btl->super,
frag->endpoint, &frag->base, OMPI_SUCCESS); frag->endpoint, &frag->base, OMPI_SUCCESS);
@ -624,11 +493,11 @@ int mca_btl_ud_component_progress()
break; break;
case IBV_WC_RECV: case IBV_WC_RECV:
frag = (mca_btl_ud_frag_t*)(unsigned long) wc[j].wr_id; frag = (mca_btl_ud_frag_t*)(unsigned long)cwc->wr_id;
reg = &ud_btl->ib_reg[frag->hdr->tag]; reg = &ud_btl->ib_reg[frag->hdr->tag];
frag->segment.seg_addr.pval = frag->hdr + 1; frag->segment.seg_addr.pval = frag->hdr + 1;
frag->segment.seg_len = wc[j].byte_len - frag->segment.seg_len = cwc->byte_len -
sizeof(mca_btl_ud_header_t) - sizeof(mca_btl_ud_header_t) -
sizeof(mca_btl_ud_ib_header_t); sizeof(mca_btl_ud_ib_header_t);
@ -641,7 +510,7 @@ int mca_btl_ud_component_progress()
break; break;
default: default:
BTL_ERROR(("Unhandled work completion opcode is %d", wc[j].opcode)); BTL_ERROR(("Unhandled work completion opcode is %d", cwc->opcode));
break; break;
} }
} }
@ -666,16 +535,17 @@ int mca_btl_ud_component_progress()
} }
for(j = 0; j < ne; j++) { for(j = 0; j < ne; j++) {
if(OPAL_UNLIKELY(wc[j].status != IBV_WC_SUCCESS)) { cwc = &wc[j];
if(OPAL_UNLIKELY(cwc->status != IBV_WC_SUCCESS)) {
BTL_ERROR(("error polling LP CQ with status %d for wr_id %llu opcode %d", BTL_ERROR(("error polling LP CQ with status %d for wr_id %llu opcode %d",
wc[j].status, wc[j].wr_id, wc[j].opcode)); cwc->status, cwc->wr_id, cwc->opcode));
return OMPI_ERROR; return OMPI_ERROR;
} }
/* Handle n/w completions */ /* Handle n/w completions */
switch(wc[j].opcode) { switch(cwc->opcode) {
case IBV_WC_SEND: case IBV_WC_SEND:
frag = (mca_btl_ud_frag_t*) (unsigned long) wc[j].wr_id; frag = (mca_btl_ud_frag_t*) (unsigned long) cwc->wr_id;
frag->base.des_cbfunc(&ud_btl->super, frag->base.des_cbfunc(&ud_btl->super,
frag->endpoint, &frag->base, OMPI_SUCCESS); frag->endpoint, &frag->base, OMPI_SUCCESS);
@ -692,12 +562,12 @@ int mca_btl_ud_component_progress()
case IBV_WC_RECV: case IBV_WC_RECV:
/* Process a RECV */ /* Process a RECV */
frag = (mca_btl_ud_frag_t*) (unsigned long) wc[j].wr_id; frag = (mca_btl_ud_frag_t*) (unsigned long) cwc->wr_id;
reg = &ud_btl->ib_reg[frag->hdr->tag]; reg = &ud_btl->ib_reg[frag->hdr->tag];
frag->segment.seg_addr.pval = frag->hdr + 1; frag->segment.seg_addr.pval = frag->hdr + 1;
frag->segment.seg_len = frag->segment.seg_len =
wc[j].byte_len - sizeof(mca_btl_ud_header_t) - cwc->byte_len - sizeof(mca_btl_ud_header_t) -
sizeof(mca_btl_ud_ib_header_t); sizeof(mca_btl_ud_ib_header_t);
/* call registered callback */ /* call registered callback */
@ -710,7 +580,7 @@ int mca_btl_ud_component_progress()
break; break;
default: default:
BTL_ERROR(("Unhandled work completion opcode %d", wc[j].opcode)); BTL_ERROR(("Unhandled work completion opcode %d", cwc->opcode));
break; break;
} }
} }

Просмотреть файл

@ -52,7 +52,6 @@ inline int mca_btl_ud_endpoint_post_send(mca_btl_ud_module_t* ud_btl,
{ {
struct ibv_qp* ib_qp; struct ibv_qp* ib_qp;
struct ibv_send_wr* bad_wr; struct ibv_send_wr* bad_wr;
int rc;
/* Have to be careful here - UD adds a 40 byte header, but it is not /* Have to be careful here - UD adds a 40 byte header, but it is not
included on the sending side. */ included on the sending side. */
@ -107,19 +106,6 @@ inline int mca_btl_ud_endpoint_post_send(mca_btl_ud_module_t* ud_btl,
} }
MCA_BTL_UD_END_TIME(ibv_post_send); MCA_BTL_UD_END_TIME(ibv_post_send);
#if 0
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
if(mca_btl_ud_component.use_srq) {
MCA_BTL_UD_POST_SRR_HIGH(ud_btl, 1);
MCA_BTL_UD_POST_SRR_LOW(ud_btl, 1);
} else {
#endif
MCA_BTL_UD_ENDPOINT_POST_RR_HIGH(ud_btl, 1);
MCA_BTL_UD_ENDPOINT_POST_RR_LOW(ud_btl, 1);
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
}
#endif
#endif
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -479,7 +465,6 @@ int mca_btl_ud_endpoint_send(mca_btl_base_endpoint_t* endpoint,
* queue pair creation and we need to get the remote queue pair * queue pair creation and we need to get the remote queue pair
* info from the peer before the qp is usable, * info from the peer before the qp is usable,
*/ */
/* TODO - maybe start to push this off into its own file? */
int mca_btl_ud_endpoint_init_qp( int mca_btl_ud_endpoint_init_qp(
mca_btl_base_module_t* btl, mca_btl_base_module_t* btl,