1
1

Use OMPI object system to make fragment hierarchy more object oriented. The

main idea (except of cleanup) is to save on initialisation of unneeded fields
and to use C type checking system to catch obvious errors.

This commit was SVN r16779.
Этот коммит содержится в:
Gleb Natapov 2007-11-28 07:11:14 +00:00
родитель 267cd2342a
Коммит 6a2d210b7d
8 изменённых файлов: 559 добавлений и 510 удалений

Просмотреть файл

@ -466,7 +466,7 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
uint8_t order, uint8_t order,
size_t size) size_t size)
{ {
mca_btl_openib_frag_t* frag = NULL; mca_btl_openib_com_frag_t* frag = NULL;
mca_btl_openib_module_t* openib_btl; mca_btl_openib_module_t* openib_btl;
int rc; int rc;
openib_btl = (mca_btl_openib_module_t*) btl; openib_btl = (mca_btl_openib_module_t*) btl;
@ -475,13 +475,12 @@ mca_btl_base_descriptor_t* mca_btl_openib_alloc(
if(NULL == frag) if(NULL == frag)
return NULL; return NULL;
/* GMS is this necessary anymore ? */ /* not all upper layer users set this */
frag->segment.seg_len = size; to_base_frag(frag)->segment.seg_len = size;
frag->base.order = order; to_base_frag(frag)->base.order = order;
frag->base.des_flags = 0;
assert(frag->qp_idx <= order); assert(to_send_frag(frag)->qp_idx <= order);
return (mca_btl_base_descriptor_t*)frag; return &to_base_frag(frag)->base;
} }
/** /**
@ -494,19 +493,32 @@ int mca_btl_openib_free(
struct mca_btl_base_module_t* btl, struct mca_btl_base_module_t* btl,
mca_btl_base_descriptor_t* des) mca_btl_base_descriptor_t* des)
{ {
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)des;
/* is this fragment pointing at user memory? */ /* is this fragment pointing at user memory? */
if(((MCA_BTL_OPENIB_FRAG_SEND_USER == frag->type) || if(MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des) ||
(MCA_BTL_OPENIB_FRAG_RECV_USER == frag->type)) MCA_BTL_OPENIB_FRAG_RECV_USER == openib_frag_type(des)) {
&& frag->registration != NULL) { mca_btl_openib_com_frag_t* frag = to_com_frag(des);
btl->btl_mpool->mpool_deregister(btl->btl_mpool,
(mca_mpool_base_registration_t*) if(frag->registration != NULL) {
frag->registration); btl->btl_mpool->mpool_deregister(btl->btl_mpool,
frag->registration = NULL; (mca_mpool_base_registration_t*)frag->registration);
frag->registration = NULL;
}
} }
MCA_BTL_IB_FRAG_RETURN(((mca_btl_openib_module_t*) btl), frag); /* reset those field on free so we will not have to do it on alloc */
to_base_frag(des)->base.des_flags = 0;
if(MCA_BTL_OPENIB_FRAG_RECV == openib_frag_type(des) ||
MCA_BTL_OPENIB_FRAG_RECV_USER == openib_frag_type(des)) {
to_base_frag(des)->base.des_src = NULL;
to_base_frag(des)->base.des_src_cnt = 0;
} else if(MCA_BTL_OPENIB_FRAG_SEND == openib_frag_type(des) ||
MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) {
to_base_frag(des)->base.des_dst = NULL;
to_base_frag(des)->base.des_dst_cnt = 0;
if(MCA_BTL_OPENIB_FRAG_SEND == openib_frag_type(des))
to_com_frag(des)->sg_entry.addr = (uint64_t)to_send_frag(des)->hdr;
}
MCA_BTL_IB_FRAG_RETURN(des);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }
@ -546,8 +558,8 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
) )
{ {
mca_btl_openib_module_t *openib_btl; mca_btl_openib_module_t *openib_btl;
mca_btl_openib_frag_t *frag = NULL;
mca_btl_openib_reg_t *openib_reg; mca_btl_openib_reg_t *openib_reg;
mca_btl_openib_com_frag_t *frag = NULL;
struct iovec iov; struct iovec iov;
uint32_t iov_count = 1; uint32_t iov_count = 1;
size_t max_data = *size; size_t max_data = *size;
@ -574,38 +586,35 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, rc = btl->btl_mpool->mpool_register(btl->btl_mpool,
iov.iov_base, max_data, 0, &registration); iov.iov_base, max_data, 0, &registration);
if(OMPI_SUCCESS != rc || NULL == registration) { if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag); MCA_BTL_IB_FRAG_RETURN(frag);
return NULL; return NULL;
} }
/* keep track of the registration we did */ /* keep track of the registration we did */
frag->registration = (mca_btl_openib_reg_t*)registration; to_com_frag(frag)->registration =
(mca_btl_openib_reg_t*)registration;
} }
openib_reg = (mca_btl_openib_reg_t*)registration; openib_reg = (mca_btl_openib_reg_t*)registration;
frag->base.order = order;
frag->base.des_flags = 0;
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->sg_entry.length = max_data; frag->sg_entry.length = max_data;
frag->sg_entry.lkey = openib_reg->mr->lkey; frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long)iov.iov_base; frag->sg_entry.addr = (uint64_t)iov.iov_base;
frag->segment.seg_len = max_data; to_base_frag(frag)->base.order = order;
frag->segment.seg_addr.pval = iov.iov_base; to_base_frag(frag)->segment.seg_len = max_data;
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey; to_base_frag(frag)->segment.seg_addr.pval = iov.iov_base;
to_base_frag(frag)->segment.seg_key.key32[0] =
(uint32_t)frag->sg_entry.lkey;
assert(MCA_BTL_NO_ORDER == order); assert(MCA_BTL_NO_ORDER == order);
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu " BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
"frag->segment.seg_key.key32[0] = %lu", "frag->segment.seg_key.key32[0] = %lu",
frag->sg_entry.lkey, frag->sg_entry.addr, frag->sg_entry.lkey, frag->sg_entry.addr,
frag->segment.seg_key.key32[0])); frag->sg_entry.lkey));
return &frag->base;
return &to_base_frag(frag)->base;
} }
} }
@ -621,20 +630,15 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
return NULL; return NULL;
iov.iov_len = max_data; iov.iov_len = max_data;
iov.iov_base = (unsigned char*)frag->segment.seg_addr.pval + reserve; iov.iov_base = (unsigned char*)
to_base_frag(frag)->segment.seg_addr.pval + reserve;
rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data); rc = ompi_convertor_pack(convertor, &iov, &iov_count, &max_data);
*size = max_data; *size = max_data;
frag->segment.seg_len = max_data + reserve; to_base_frag(frag)->segment.seg_len = max_data + reserve;
frag->segment.seg_key.key32[0] = (uint32_t)frag->sg_entry.lkey; to_base_frag(frag)->base.order = order;
/* frag->base.order = order; */
frag->base.des_src = &frag->segment; return &to_base_frag(frag)->base;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->base.des_flags = 0;
frag->base.order = order;
return &frag->base;
} }
/** /**
@ -661,9 +665,10 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
size_t* size) size_t* size)
{ {
mca_btl_openib_module_t *openib_btl; mca_btl_openib_module_t *openib_btl;
mca_btl_openib_frag_t *frag; mca_btl_openib_com_frag_t *frag;
mca_btl_openib_reg_t *openib_reg; mca_btl_openib_reg_t *openib_reg;
int rc; int rc;
void *buffer;
openib_btl = (mca_btl_openib_module_t*)btl; openib_btl = (mca_btl_openib_module_t*)btl;
@ -672,16 +677,16 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
return NULL; return NULL;
} }
ompi_convertor_get_current_pointer( convertor, (void**)&(frag->segment.seg_addr.pval) ); ompi_convertor_get_current_pointer(convertor, &buffer);
if(NULL == registration){ if(NULL == registration){
/* we didn't get a memory registration passed in, so we have to /* we didn't get a memory registration passed in, so we have to
* register the region ourselves * register the region ourselves
*/ */
rc = btl->btl_mpool->mpool_register(btl->btl_mpool, rc = btl->btl_mpool->mpool_register(btl->btl_mpool, buffer, *size, 0,
frag->segment.seg_addr.pval, *size, 0, &registration); &registration);
if(OMPI_SUCCESS != rc || NULL == registration) { if(OMPI_SUCCESS != rc || NULL == registration) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag); MCA_BTL_IB_FRAG_RETURN(frag);
return NULL; return NULL;
} }
/* keep track of the registration we did */ /* keep track of the registration we did */
@ -691,24 +696,19 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst(
frag->sg_entry.length = *size; frag->sg_entry.length = *size;
frag->sg_entry.lkey = openib_reg->mr->lkey; frag->sg_entry.lkey = openib_reg->mr->lkey;
frag->sg_entry.addr = (unsigned long) frag->segment.seg_addr.pval; frag->sg_entry.addr = (uint64_t)buffer;
frag->segment.seg_len = *size; to_base_frag(frag)->segment.seg_addr.pval = buffer;
frag->segment.seg_key.key32[0] = openib_reg->mr->rkey; to_base_frag(frag)->segment.seg_len = *size;
to_base_frag(frag)->segment.seg_key.key32[0] = openib_reg->mr->rkey;
frag->base.order = order; to_base_frag(frag)->base.order = order;
frag->base.des_dst = &frag->segment;
frag->base.des_dst_cnt = 1;
frag->base.des_src = NULL;
frag->base.des_src_cnt = 0;
frag->base.des_flags = 0;
BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu " BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu "
"frag->segment.seg_key.key32[0] = %lu", "frag->segment.seg_key.key32[0] = %lu",
frag->sg_entry.lkey, frag->sg_entry.addr, frag->sg_entry.lkey, frag->sg_entry.addr,
frag->segment.seg_key.key32[0])); openib_reg->mr->rkey));
return &frag->base; return &to_base_frag(frag)->base;
} }
static int mca_btl_finalize_hca(struct mca_btl_openib_hca_t *hca) static int mca_btl_finalize_hca(struct mca_btl_openib_hca_t *hca)
@ -826,7 +826,7 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
if(BTL_OPENIB_QP_TYPE_SRQ(qp)){ if(BTL_OPENIB_QP_TYPE_SRQ(qp)){
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(openib_btl, MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(
&openib_btl->qps[qp].u.srq_qp.pending_frags); &openib_btl->qps[qp].u.srq_qp.pending_frags);
if (ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)){ if (ibv_destroy_srq(openib_btl->qps[qp].u.srq_qp.srq)){
@ -895,13 +895,13 @@ int mca_btl_openib_send(
mca_btl_base_tag_t tag) mca_btl_base_tag_t tag)
{ {
mca_btl_openib_send_frag_t* frag = to_send_frag(descriptor);
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)descriptor;
assert(frag->type == MCA_BTL_OPENIB_FRAG_SEND); assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND);
frag->endpoint = endpoint; to_com_frag(frag)->endpoint = endpoint;
frag->hdr->tag = tag; frag->hdr->tag = tag;
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
return mca_btl_openib_endpoint_send(endpoint, frag); return mca_btl_openib_endpoint_send(endpoint, frag);
} }
@ -913,52 +913,50 @@ int mca_btl_openib_put( mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint, mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor) mca_btl_base_descriptor_t* descriptor)
{ {
int rc = OMPI_SUCCESS;
struct ibv_send_wr* bad_wr; struct ibv_send_wr* bad_wr;
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) descriptor; mca_btl_openib_out_frag_t* frag = to_out_frag(descriptor);
/* mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) btl; */ int qp = descriptor->order;
int qp = frag->base.order; uint64_t rem_addr = descriptor->des_dst->seg_addr.lval;
uint32_t rkey = descriptor->des_dst->seg_key.key32[0];
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND_USER ||
openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND);
if(MCA_BTL_NO_ORDER == qp) if(MCA_BTL_NO_ORDER == qp)
qp = mca_btl_openib_component.rdma_qp; qp = mca_btl_openib_component.rdma_qp;
/* setup for queued requests */
frag->endpoint = endpoint;
frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_WRITE;
/* check for a send wqe */ /* check for a send wqe */
if (OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,-1) < 0) { if (OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,1); OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock); OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->pending_put_frags, (opal_list_item_t *)frag); opal_list_append(&endpoint->pending_put_frags, (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
return rc; return OMPI_SUCCESS;
/* post descriptor */
} else {
int ib_rc;
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
if ((endpoint->endpoint_proc->proc_ompi->proc_arch & OMPI_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OMPI_ARCH_ISBIGENDIAN)) {
frag->wr_desc.sr_desc.wr.rdma.remote_addr = opal_swap_bytes8(frag->base.des_dst->seg_addr.lval);
frag->wr_desc.sr_desc.wr.rdma.rkey = opal_swap_bytes4(frag->base.des_dst->seg_key.key32[0]);
} else
#endif
{
frag->wr_desc.sr_desc.wr.rdma.remote_addr = frag->base.des_dst->seg_addr.lval;
frag->wr_desc.sr_desc.wr.rdma.rkey = frag->base.des_dst->seg_key.key32[0];
}
frag->sg_entry.addr = (unsigned long) frag->base.des_src->seg_addr.pval;
frag->sg_entry.length = frag->base.des_src->seg_len;
frag->base.order = qp;
ib_rc = ibv_post_send(endpoint->qps[qp].lcl_qp, &frag->wr_desc.sr_desc, &bad_wr);
if(ib_rc)
rc = OMPI_ERROR;
} }
return rc; /* post descriptor */
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
if((endpoint->endpoint_proc->proc_ompi->proc_arch & OMPI_ARCH_ISBIGENDIAN)
!= (ompi_proc_local()->proc_arch & OMPI_ARCH_ISBIGENDIAN)) {
rem_addr = opal_swap_bytes8(rem_addr);
rkey = opal_swap_bytes4(rkey);
}
#endif
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr =
(uint64_t)descriptor->des_src->seg_addr.pval;
to_com_frag(frag)->sg_entry.length = descriptor->des_src->seg_len;
to_com_frag(frag)->endpoint = endpoint;
descriptor->order = qp;
/* Setting opcode on a frag constructor isn't enough since prepare_src
* may return send_frag instead of put_frag */
frag->sr_desc.opcode = IBV_WR_RDMA_WRITE;
if(ibv_post_send(endpoint->qps[qp].lcl_qp, &frag->sr_desc, &bad_wr))
return OMPI_ERROR;
return OMPI_SUCCESS;
} }
@ -970,69 +968,58 @@ int mca_btl_openib_get( mca_btl_base_module_t* btl,
mca_btl_base_endpoint_t* endpoint, mca_btl_base_endpoint_t* endpoint,
mca_btl_base_descriptor_t* descriptor) mca_btl_base_descriptor_t* descriptor)
{ {
int rc;
struct ibv_send_wr* bad_wr; struct ibv_send_wr* bad_wr;
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) descriptor; mca_btl_openib_get_frag_t* frag = to_get_frag(descriptor);
/* mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*) btl; */ int qp = descriptor->order;
int qp = frag->base.order; uint64_t rem_addr = descriptor->des_src->seg_addr.lval;
frag->endpoint = endpoint; uint32_t rkey = descriptor->des_src->seg_key.key32[0];
frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_READ;
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_RECV_USER);
if(MCA_BTL_NO_ORDER == qp) if(MCA_BTL_NO_ORDER == qp)
qp = mca_btl_openib_component.rdma_qp; qp = mca_btl_openib_component.rdma_qp;
/* check for a send wqe */ /* check for a send wqe */
if (OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,-1) < 0) { if (OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,1); OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock); OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag); opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
return OMPI_SUCCESS; return OMPI_SUCCESS;
}
/* check for a get token */ /* check for a get token */
} else if(OPAL_THREAD_ADD32(&endpoint->get_tokens,-1) < 0) { if(OPAL_THREAD_ADD32(&endpoint->get_tokens,-1) < 0) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,1); OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe,1);
OPAL_THREAD_ADD32(&endpoint->get_tokens,1); OPAL_THREAD_ADD32(&endpoint->get_tokens,1);
OPAL_THREAD_LOCK(&endpoint->endpoint_lock); OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag); opal_list_append(&endpoint->pending_get_frags, (opal_list_item_t*)frag);
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} else {
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
if ((endpoint->endpoint_proc->proc_ompi->proc_arch & OMPI_ARCH_ISBIGENDIAN) !=
(ompi_proc_local()->proc_arch & OMPI_ARCH_ISBIGENDIAN)) {
frag->wr_desc.sr_desc.wr.rdma.remote_addr = opal_swap_bytes8(frag->base.des_src->seg_addr.lval);
frag->wr_desc.sr_desc.wr.rdma.rkey = opal_swap_bytes4(frag->base.des_src->seg_key.key32[0]);
} else
#endif
{
frag->wr_desc.sr_desc.wr.rdma.remote_addr = frag->base.des_src->seg_addr.lval;
frag->wr_desc.sr_desc.wr.rdma.rkey = frag->base.des_src->seg_key.key32[0];
}
frag->sg_entry.addr = (unsigned long) frag->base.des_dst->seg_addr.pval;
frag->sg_entry.length = frag->base.des_dst->seg_len;
frag->base.order = qp;
if(ibv_post_send(endpoint->qps[qp].lcl_qp, &frag->wr_desc.sr_desc, &bad_wr)){
BTL_ERROR(("error posting send request errno (%d) says %s",
errno, strerror(errno)));
rc = ORTE_ERROR;
} else {
rc = ORTE_SUCCESS;
}
} }
#if OMPI_ENABLE_HETEROGENEOUS_SUPPORT
return rc; if((endpoint->endpoint_proc->proc_ompi->proc_arch & OMPI_ARCH_ISBIGENDIAN)
!= (ompi_proc_local()->proc_arch & OMPI_ARCH_ISBIGENDIAN)) {
rem_addr = opal_swap_bytes8(rem_addr);
rkey = opal_swap_bytes4(rkey);
}
#endif
frag->sr_desc.wr.rdma.remote_addr = rem_addr;
frag->sr_desc.wr.rdma.rkey = rkey;
to_com_frag(frag)->sg_entry.addr =
(uint64_t)descriptor->des_dst->seg_addr.pval;
to_com_frag(frag)->sg_entry.length = descriptor->des_dst->seg_len;
to_com_frag(frag)->endpoint = endpoint;
descriptor->order = qp;
if(ibv_post_send(endpoint->qps[qp].lcl_qp, &frag->sr_desc, &bad_wr))
return OMPI_ERROR;
return OMPI_SUCCESS;
} }
int mca_btl_openib_ft_event(int state) { int mca_btl_openib_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) { if(OPAL_CRS_CHECKPOINT == state) {
; ;

Просмотреть файл

@ -546,13 +546,11 @@ static inline int mca_btl_openib_post_srr(mca_btl_openib_module_t* openib_btl,
for(i = 0; i < num_post; i++) { for(i = 0; i < num_post; i++) {
ompi_free_list_item_t* item; ompi_free_list_item_t* item;
mca_btl_openib_frag_t* frag;
OMPI_FREE_LIST_WAIT(free_list, item, rc); OMPI_FREE_LIST_WAIT(free_list, item, rc);
frag = (mca_btl_openib_frag_t*)item; to_base_frag(item)->base.order = qp;
frag->base.order = qp; to_com_frag(item)->endpoint = NULL;
frag->endpoint = NULL;
if(ibv_post_srq_recv(openib_btl->qps[qp].u.srq_qp.srq, if(ibv_post_srq_recv(openib_btl->qps[qp].u.srq_qp.srq,
&frag->wr_desc.rd_desc, &to_recv_frag(item)->rd_desc,
&bad_wr)) { &bad_wr)) {
BTL_ERROR(("error posting receive descriptors to shared " BTL_ERROR(("error posting receive descriptors to shared "
"receive queue: %s", strerror(errno))); "receive queue: %s", strerror(errno)));

Просмотреть файл

@ -86,7 +86,7 @@ static void merge_values(ompi_btl_openib_ini_values_t *target,
ompi_btl_openib_ini_values_t *src); ompi_btl_openib_ini_values_t *src);
static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_frag_t *frag, mca_btl_openib_recv_frag_t *frag,
size_t byte_len, const int prio); size_t byte_len, const int prio);
static char* btl_openib_component_status_to_string(enum ibv_wc_status status); static char* btl_openib_component_status_to_string(enum ibv_wc_status status);
static int btl_openib_component_progress(void); static int btl_openib_component_progress(void);
@ -95,9 +95,7 @@ static void btl_openib_frag_progress_pending_pp(
mca_btl_base_endpoint_t *endpoint, mca_btl_base_endpoint_t *endpoint,
const int qp); const int qp);
static void btl_openib_frag_progress_pending_srq( static void btl_openib_frag_progress_pending_srq(
mca_btl_openib_module_t* openib_btl, mca_btl_openib_module_t* openib_btl, const int qp);
mca_btl_base_endpoint_t *endpoint,
const int qp);
static void btl_openib_frag_progress_pending_put_get( static void btl_openib_frag_progress_pending_put_get(
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint, mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
const int qp); const int qp);
@ -226,20 +224,20 @@ static int btl_openib_modex_send(void)
static void btl_openib_control(struct mca_btl_base_module_t* btl, static void btl_openib_control(struct mca_btl_base_module_t* btl,
mca_btl_base_tag_t tag, mca_btl_base_tag_t tag,
mca_btl_base_descriptor_t* descriptor, mca_btl_base_descriptor_t* des,
void* cbdata) void* cbdata)
{ {
/* dont return credits used for control messages */ /* don't return credits used for control messages */
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)descriptor; mca_btl_openib_endpoint_t* endpoint = to_com_frag(des)->endpoint;
mca_btl_openib_endpoint_t* endpoint = frag->endpoint; mca_btl_openib_control_header_t *ctl_hdr =
mca_btl_openib_control_header_t *ctl_hdr = frag->segment.seg_addr.pval; to_base_frag(des)->segment.seg_addr.pval;
mca_btl_openib_eager_rdma_header_t *rdma_hdr; mca_btl_openib_eager_rdma_header_t *rdma_hdr;
mca_btl_openib_rdma_credits_header_t *credits_hdr; mca_btl_openib_rdma_credits_header_t *credits_hdr;
int qp = frag->qp_idx; int qp = to_recv_frag(des)->qp_idx;
if(BTL_OPENIB_EAGER_RDMA_QP(qp)) { if(BTL_OPENIB_EAGER_RDMA_QP(qp)) {
/* if not sent via rdma */ /* if not sent via rdma */
if(!MCA_BTL_OPENIB_RDMA_FRAG(frag) && if(!MCA_BTL_OPENIB_RDMA_FRAG(des) &&
ctl_hdr->type == MCA_BTL_OPENIB_CONTROL_CREDITS) { ctl_hdr->type == MCA_BTL_OPENIB_CONTROL_CREDITS) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_received, 1); OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_received, 1);
/* rd_posted don't account for rsv preposts for credit message but /* rd_posted don't account for rsv preposts for credit message but
@ -733,15 +731,13 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
openib_btl->super.btl_mpool = openib_btl->hca->mpool; openib_btl->super.btl_mpool = openib_btl->hca->mpool;
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t)); init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_send_user_frag_t);
init_data->length = length;
init_data->type = MCA_BTL_OPENIB_FRAG_SEND_USER;
init_data->order = mca_btl_openib_component.rdma_qp; init_data->order = mca_btl_openib_component.rdma_qp;
init_data->list = &openib_btl->send_user_free; init_data->list = &openib_btl->send_user_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->send_user_free, if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->send_user_free,
length, 2, OBJ_CLASS(mca_btl_openib_send_user_frag_t), sizeof(mca_btl_openib_put_frag_t), 2,
OBJ_CLASS(mca_btl_openib_put_frag_t),
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc, mca_btl_openib_component.ib_free_list_inc,
@ -750,15 +746,13 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
} }
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t)); init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_recv_user_frag_t);
init_data->length = length;
init_data->type = MCA_BTL_OPENIB_FRAG_RECV_USER;
init_data->order = mca_btl_openib_component.rdma_qp; init_data->order = mca_btl_openib_component.rdma_qp;
init_data->list = &openib_btl->recv_user_free; init_data->list = &openib_btl->recv_user_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->recv_user_free, if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->recv_user_free,
length, 2, OBJ_CLASS(mca_btl_openib_recv_user_frag_t), sizeof(mca_btl_openib_get_frag_t), 2,
OBJ_CLASS(mca_btl_openib_get_frag_t),
mca_btl_openib_component.ib_free_list_num, mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max, mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc, mca_btl_openib_component.ib_free_list_inc,
@ -767,19 +761,17 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
} }
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t)); init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_send_frag_control_t) + length = sizeof(mca_btl_openib_send_control_frag_t) +
sizeof(mca_btl_openib_header_t) + sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_footer_t) + sizeof(mca_btl_openib_footer_t) +
sizeof(mca_btl_openib_eager_rdma_header_t); sizeof(mca_btl_openib_eager_rdma_header_t);
init_data->length = sizeof(mca_btl_openib_eager_rdma_header_t);
init_data->type = MCA_BTL_OPENIB_FRAG_CONTROL;
init_data->order = mca_btl_openib_component.eager_rdma_qp; init_data->order = mca_btl_openib_component.eager_rdma_qp;
init_data->list = &openib_btl->send_free_control; init_data->list = &openib_btl->send_free_control;
if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->send_free_control, if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->send_free_control,
length, mca_btl_openib_component.buffer_alignment, length, mca_btl_openib_component.buffer_alignment,
OBJ_CLASS(mca_btl_openib_send_frag_control_t), OBJ_CLASS(mca_btl_openib_send_control_frag_t),
mca_btl_openib_component.ib_free_list_num, -1, mca_btl_openib_component.ib_free_list_num, -1,
mca_btl_openib_component.ib_free_list_inc, mca_btl_openib_component.ib_free_list_inc,
openib_btl->super.btl_mpool, mca_btl_openib_frag_init, openib_btl->super.btl_mpool, mca_btl_openib_frag_init,
@ -805,8 +797,6 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
sizeof(mca_btl_openib_footer_t) + sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size; mca_btl_openib_component.qp_infos[qp].size;
init_data->length = mca_btl_openib_component.qp_infos[qp].size;
init_data->type = MCA_BTL_OPENIB_FRAG_SEND;
init_data->order = qp; init_data->order = qp;
init_data->list = &openib_btl->qps[qp].send_free; init_data->list = &openib_btl->qps[qp].send_free;
@ -827,8 +817,6 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
sizeof(mca_btl_openib_footer_t) + sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size; mca_btl_openib_component.qp_infos[qp].size;
init_data->length = mca_btl_openib_component.qp_infos[qp].size;
init_data->type = MCA_BTL_OPENIB_FRAG_RECV;
init_data->order = qp; init_data->order = qp;
init_data->list = &openib_btl->qps[qp].recv_free; init_data->list = &openib_btl->qps[qp].recv_free;
@ -1135,36 +1123,37 @@ static void merge_values(ompi_btl_openib_ini_values_t *target,
static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl, static int btl_openib_handle_incoming(mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_frag_t *frag, mca_btl_openib_recv_frag_t *frag,
size_t byte_len, const int qp) size_t byte_len, const int qp)
{ {
mca_btl_base_descriptor_t *des = &to_base_frag(frag)->base;
mca_btl_openib_header_t *hdr = frag->hdr;
if(endpoint->nbo) { if(endpoint->nbo) {
BTL_OPENIB_HEADER_NTOH((*(frag->hdr))); BTL_OPENIB_HEADER_NTOH(*hdr);
} }
/* advance the segment address past the header and subtract from the /* advance the segment address past the header and subtract from the
* length..*/ * length..*/
frag->segment.seg_len = byte_len - sizeof(mca_btl_openib_header_t); des->des_dst->seg_len = byte_len - sizeof(mca_btl_openib_header_t);
/* call registered callback */ /* call registered callback */
openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super, openib_btl->ib_reg[hdr->tag].cbfunc(&openib_btl->super, hdr->tag, des,
frag->hdr->tag, &frag->base, openib_btl->ib_reg[hdr->tag].cbdata);
openib_btl->ib_reg[frag->hdr->tag].cbdata);
if(BTL_OPENIB_IS_RDMA_CREDITS(frag->hdr->credits) && if(BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits) &&
BTL_OPENIB_CREDITS(frag->hdr->credits) > 0) { BTL_OPENIB_CREDITS(hdr->credits) > 0) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,
BTL_OPENIB_CREDITS(frag->hdr->credits)); BTL_OPENIB_CREDITS(hdr->credits));
} else { } else {
if(BTL_OPENIB_QP_TYPE_PP(qp) && frag->hdr->credits > 0) { if(BTL_OPENIB_QP_TYPE_PP(qp) && hdr->credits > 0) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits,
frag->hdr->credits); hdr->credits);
} }
} }
if(frag->hdr->cm_seen) { if(hdr->cm_seen) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -hdr->cm_seen);
-frag->hdr->cm_seen);
} }
/* We may receive credits here so try to progress only things that /* We may receive credits here so try to progress only things that
@ -1258,21 +1247,19 @@ static void btl_openib_frag_progress_pending_pp(
mca_btl_base_endpoint_t *endpoint, const int qp) mca_btl_base_endpoint_t *endpoint, const int qp)
{ {
opal_list_item_t *frag_item; opal_list_item_t *frag;
mca_btl_openib_frag_t* frag;
size_t i, len = opal_list_get_size(&endpoint->qps[qp].pending_frags); size_t i, len = opal_list_get_size(&endpoint->qps[qp].pending_frags);
/* check to see if we need to progress any pending descriptors */ /* check to see if we need to progress any pending descriptors */
for(i = 0; i < len && endpoint->qps[qp].sd_wqe > 0 && for(i = 0; i < len && endpoint->qps[qp].sd_wqe > 0 &&
BTL_OPENIB_TOKENS(endpoint, qp) > 0; i++) { BTL_OPENIB_TOKENS(endpoint, qp) > 0; i++) {
OPAL_THREAD_LOCK(&endpoint->endpoint_lock); OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = frag = opal_list_remove_first(&(endpoint->qps[qp].pending_frags));
opal_list_remove_first(&(endpoint->qps[qp].pending_frags));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item)) if(NULL == frag)
break; break;
if(mca_btl_openib_endpoint_send(frag->endpoint, frag) == if(mca_btl_openib_endpoint_send(endpoint, to_send_frag(frag)) ==
OMPI_ERR_OUT_OF_RESOURCE) OMPI_ERR_OUT_OF_RESOURCE)
break; break;
} }
} }
@ -1280,43 +1267,39 @@ static void btl_openib_frag_progress_pending_pp(
static void btl_openib_frag_progress_pending_put_get( static void btl_openib_frag_progress_pending_put_get(
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint, mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint,
const int qp) { const int qp) {
opal_list_item_t *frag_item; opal_list_item_t *frag;
mca_btl_openib_frag_t* frag;
size_t i, len = opal_list_get_size(&endpoint->pending_get_frags); size_t i, len = opal_list_get_size(&endpoint->pending_get_frags);
for(i = 0; i < len && endpoint->qps[qp].sd_wqe > 0 && for(i = 0; i < len && endpoint->qps[qp].sd_wqe > 0 &&
endpoint->get_tokens > 0; i++) { endpoint->get_tokens > 0; i++) {
OPAL_THREAD_LOCK(&endpoint->endpoint_lock); OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(endpoint->pending_get_frags)); frag = opal_list_remove_first(&(endpoint->pending_get_frags));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item)) if(NULL == frag)
break; break;
if(mca_btl_openib_get((mca_btl_base_module_t *)openib_btl, if(mca_btl_openib_get((mca_btl_base_module_t *)openib_btl, endpoint,
frag->endpoint, (mca_btl_base_descriptor_t*)frag) == &to_base_frag(frag)->base) == OMPI_ERR_OUT_OF_RESOURCE)
OMPI_ERR_OUT_OF_RESOURCE)
break; break;
} }
len = opal_list_get_size(&endpoint->pending_put_frags); len = opal_list_get_size(&endpoint->pending_put_frags);
for(i = 0; i < len && endpoint->qps[qp].sd_wqe > 0; i++) { for(i = 0; i < len && endpoint->qps[qp].sd_wqe > 0; i++) {
OPAL_THREAD_LOCK(&endpoint->endpoint_lock); OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
frag_item = opal_list_remove_first(&(endpoint->pending_put_frags)); frag = opal_list_remove_first(&(endpoint->pending_put_frags));
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock); OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item)) if(NULL == frag)
break; break;
if(mca_btl_openib_put((mca_btl_base_module_t*)openib_btl, if(mca_btl_openib_put((mca_btl_base_module_t*)openib_btl, endpoint,
frag->endpoint, (mca_btl_base_descriptor_t*)frag) == &to_base_frag(frag)->base) == OMPI_ERR_OUT_OF_RESOURCE)
OMPI_ERR_OUT_OF_RESOURCE)
break; break;
} }
} }
static void btl_openib_frag_progress_pending_srq( static void btl_openib_frag_progress_pending_srq(
mca_btl_openib_module_t* openib_btl, mca_btl_base_endpoint_t *endpoint, mca_btl_openib_module_t* openib_btl, const int qp)
const int qp)
{ {
opal_list_item_t *frag_item; opal_list_item_t *frag;
mca_btl_openib_frag_t* frag;
size_t i, len; size_t i, len;
assert(BTL_OPENIB_QP_TYPE_SRQ(qp)); assert(BTL_OPENIB_QP_TYPE_SRQ(qp));
@ -1325,13 +1308,13 @@ static void btl_openib_frag_progress_pending_srq(
for(i = 0; i < len && openib_btl->qps[qp].u.srq_qp.sd_credits > 0; i++) { for(i = 0; i < len && openib_btl->qps[qp].u.srq_qp.sd_credits > 0; i++) {
/* dequeue resources due to global flow control */ /* dequeue resources due to global flow control */
OPAL_THREAD_LOCK(&openib_btl->ib_lock); OPAL_THREAD_LOCK(&openib_btl->ib_lock);
frag_item = frag =
opal_list_remove_first(&openib_btl->qps[qp].u.srq_qp.pending_frags); opal_list_remove_first(&openib_btl->qps[qp].u.srq_qp.pending_frags);
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock); OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item)) if(NULL == frag)
break; break;
if(mca_btl_openib_endpoint_send(frag->endpoint, frag) == if(mca_btl_openib_endpoint_send(to_com_frag(frag)->endpoint,
OMPI_ERR_OUT_OF_RESOURCE) to_send_frag(frag)) == OMPI_ERR_OUT_OF_RESOURCE)
break; break;
} }
} }
@ -1381,7 +1364,6 @@ static int btl_openib_component_progress(void)
{ {
int i, j, c; int i, j, c;
int count = 0, ret; int count = 0, ret;
mca_btl_openib_frag_t* frag;
mca_btl_openib_endpoint_t* endpoint; mca_btl_openib_endpoint_t* endpoint;
#if OMPI_HAVE_THREADS #if OMPI_HAVE_THREADS
@ -1395,6 +1377,7 @@ static int btl_openib_component_progress(void)
* queues. * queues.
*/ */
for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) { for(i = 0; i < mca_btl_openib_component.ib_num_btls; i++) {
mca_btl_openib_recv_frag_t* frag;
mca_btl_openib_module_t* openib_btl = mca_btl_openib_component.openib_btls[i]; mca_btl_openib_module_t* openib_btl = mca_btl_openib_component.openib_btls[i];
c = openib_btl->eager_rdma_buffers_count; c = openib_btl->eager_rdma_buffers_count;
@ -1414,7 +1397,7 @@ static int btl_openib_component_progress(void)
int qp; int qp;
opal_atomic_rmb(); opal_atomic_rmb();
if(endpoint->nbo) { if(endpoint->nbo) {
BTL_OPENIB_FOOTER_NTOH((*frag->ftr)); BTL_OPENIB_FOOTER_NTOH(*frag->ftr);
} }
size = MCA_BTL_OPENIB_RDMA_FRAG_GET_SIZE(frag->ftr); size = MCA_BTL_OPENIB_RDMA_FRAG_GET_SIZE(frag->ftr);
#if OMPI_ENABLE_DEBUG #if OMPI_ENABLE_DEBUG
@ -1429,11 +1412,12 @@ static int btl_openib_component_progress(void)
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock); OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) - frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) -
size + sizeof(mca_btl_openib_footer_t)); size + sizeof(mca_btl_openib_footer_t));
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + to_base_frag(frag)->segment.seg_addr.pval =
((unsigned char* )frag->hdr) +
sizeof(mca_btl_openib_header_t); sizeof(mca_btl_openib_header_t);
ret = btl_openib_handle_incoming(openib_btl, ret = btl_openib_handle_incoming(openib_btl,
frag->endpoint, frag, to_com_frag(frag)->endpoint, frag,
size - sizeof(mca_btl_openib_footer_t), size - sizeof(mca_btl_openib_footer_t),
frag->qp_idx); frag->qp_idx);
if (ret != MPI_SUCCESS) { if (ret != MPI_SUCCESS) {
@ -1446,7 +1430,7 @@ static int btl_openib_component_progress(void)
MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr); MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
while (endpoint->eager_rdma_local.tail != while (endpoint->eager_rdma_local.tail !=
endpoint->eager_rdma_local.head) { endpoint->eager_rdma_local.head) {
mca_btl_openib_frag_t *tf; mca_btl_openib_recv_frag_t *tf;
tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(endpoint, tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(endpoint,
endpoint->eager_rdma_local.tail); endpoint->eager_rdma_local.tail);
if (MCA_BTL_OPENIB_RDMA_FRAG_LOCAL (tf)) if (MCA_BTL_OPENIB_RDMA_FRAG_LOCAL (tf))
@ -1492,8 +1476,9 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
{ {
static char *cq_name[] = {"HP CQ", "LP CQ"}; static char *cq_name[] = {"HP CQ", "LP CQ"};
int cq, qp; int cq, qp;
int count = 0,ne = 0, ret; int count = 0,ne = 0;
mca_btl_openib_frag_t* frag; mca_btl_openib_com_frag_t* frag;
mca_btl_base_descriptor_t *des;
mca_btl_openib_endpoint_t* endpoint; mca_btl_openib_endpoint_t* endpoint;
mca_btl_openib_module_t *openib_btl = NULL; mca_btl_openib_module_t *openib_btl = NULL;
struct ibv_wc wc; struct ibv_wc wc;
@ -1504,15 +1489,24 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
ne = ibv_poll_cq(hca->ib_cq[cq], 1, &wc); ne = ibv_poll_cq(hca->ib_cq[cq], 1, &wc);
if(0 == ne) if(0 == ne)
continue; continue;
if(ne < 0 || wc.status != IBV_WC_SUCCESS) if(ne < 0)
goto error; goto error;
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id; des = (mca_btl_base_descriptor_t*)(uintptr_t)wc.wr_id;
qp = frag->base.order; frag = to_com_frag(des);
/* For receive fragments "order" contains QP idx the fragment was posted
* to. For send fragments "order" contains QP idx the fragment was send
* through */
qp = des->order;
endpoint = frag->endpoint; endpoint = frag->endpoint;
if(endpoint) if(endpoint)
openib_btl = endpoint->endpoint_btl; openib_btl = endpoint->endpoint_btl;
if(wc.status != IBV_WC_SUCCESS)
goto error;
/* Handle work completions */ /* Handle work completions */
switch(wc.opcode) { switch(wc.opcode) {
case IBV_WC_RDMA_READ: case IBV_WC_RDMA_READ:
@ -1522,8 +1516,7 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
case IBV_WC_RDMA_WRITE: case IBV_WC_RDMA_WRITE:
case IBV_WC_SEND: case IBV_WC_SEND:
/* Process a completed send/put/get */ /* Process a completed send/put/get */
frag->base.des_cbfunc(&openib_btl->super, endpoint, &frag->base, des->des_cbfunc(&openib_btl->super, endpoint, des, OMPI_SUCCESS);
OMPI_SUCCESS);
/* return send wqe */ /* return send wqe */
OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe, 1); OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe, 1);
@ -1531,7 +1524,7 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
if(IBV_WC_SEND == wc.opcode && BTL_OPENIB_QP_TYPE_SRQ(qp)) { if(IBV_WC_SEND == wc.opcode && BTL_OPENIB_QP_TYPE_SRQ(qp)) {
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
/* new SRQ credit available. Try to progress pending frags*/ /* new SRQ credit available. Try to progress pending frags*/
btl_openib_frag_progress_pending_srq(openib_btl, endpoint, qp); btl_openib_frag_progress_pending_srq(openib_btl, qp);
} }
/* new wqe or/and get token available. Try to progress pending frags */ /* new wqe or/and get token available. Try to progress pending frags */
btl_openib_frag_progress_pending_pp(endpoint, qp); btl_openib_frag_progress_pending_pp(endpoint, qp);
@ -1548,13 +1541,15 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
} }
/* Process a RECV */ /* Process a RECV */
ret = btl_openib_handle_incoming(openib_btl, endpoint, frag, wc.byte_len, qp); if(btl_openib_handle_incoming(openib_btl, endpoint,
if (ret != OMPI_SUCCESS) { to_recv_frag(frag), wc.byte_len, qp) != OMPI_SUCCESS) {
openib_btl->error_cb(&openib_btl->super, MCA_BTL_ERROR_FLAGS_FATAL); openib_btl->error_cb(&openib_btl->super,
MCA_BTL_ERROR_FLAGS_FATAL);
return 0; return 0;
} }
OMPI_FREE_LIST_RETURN(frag->list, (ompi_free_list_item_t*) frag); MCA_BTL_IB_FRAG_RETURN(frag);
if(BTL_OPENIB_QP_TYPE_SRQ(qp)) { if(BTL_OPENIB_QP_TYPE_SRQ(qp)) {
OPAL_THREAD_ADD32((int32_t*) OPAL_THREAD_ADD32((int32_t*)
&openib_btl->qps[qp].u.srq_qp.rd_posted, -1); &openib_btl->qps[qp].u.srq_qp.rd_posted, -1);
@ -1596,21 +1591,16 @@ error:
} else { } else {
static int flush_err_printed[] = {0, 0}; static int flush_err_printed[] = {0, 0};
ompi_proc_t* remote_proc = NULL; ompi_proc_t* remote_proc = NULL;
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id; if(frag && endpoint && endpoint->endpoint_proc &&
if(frag) { endpoint->endpoint_proc->proc_ompi) {
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint; remote_proc = endpoint->endpoint_proc->proc_ompi;
if(endpoint &&
endpoint->endpoint_proc &&
endpoint->endpoint_proc->proc_ompi) {
remote_proc = endpoint->endpoint_proc->proc_ompi;
}
} }
if(wc.status != IBV_WC_WR_FLUSH_ERR || !flush_err_printed[cq]++) { if(wc.status != IBV_WC_WR_FLUSH_ERR || !flush_err_printed[cq]++) {
BTL_PEER_ERROR(remote_proc, ("error polling %s with status %s " BTL_PEER_ERROR(remote_proc, ("error polling %s with status %s "
"status number %d for wr_id %llu opcode %d qp_idx %d", "status number %d for wr_id %llu opcode %d qp_idx %d",
cq_name[cq], cq_name[cq],
btl_openib_component_status_to_string(wc.status), btl_openib_component_status_to_string(wc.status),
wc.status, wc.wr_id, wc.opcode, frag->qp_idx)); wc.status, wc.wr_id, wc.opcode, qp));
} }
if(wc.status == IBV_WC_RETRY_EXC_ERR) { if(wc.status == IBV_WC_RETRY_EXC_ERR) {
opal_show_help("help-mpi-btl-openib.txt", opal_show_help("help-mpi-btl-openib.txt",

Просмотреть файл

@ -45,7 +45,7 @@ struct mca_btl_openib_eager_rdma_remote_t {
typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remote_t; typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remote_t;
#define MCA_BTL_OPENIB_RDMA_FRAG(F) \ #define MCA_BTL_OPENIB_RDMA_FRAG(F) \
((F)->type == MCA_BTL_OPENIB_FRAG_EAGER_RDMA) (openib_frag_type(F) == MCA_BTL_OPENIB_FRAG_EAGER_RDMA)
#define EAGER_RDMA_BUFFER_REMOTE (0) #define EAGER_RDMA_BUFFER_REMOTE (0)
#define EAGER_RDMA_BUFFER_LOCAL (0xff) #define EAGER_RDMA_BUFFER_LOCAL (0xff)

Просмотреть файл

@ -48,62 +48,65 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint); static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint);
static int post_send(mca_btl_openib_module_t *openib_btl, static int post_send(mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_frag_t *frag, mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_send_frag_t *frag,
const int qp, const int do_rdma) const int qp, const int do_rdma)
{ {
struct ibv_send_wr *bad_wr; struct ibv_send_wr *bad_wr;
mca_btl_base_segment_t *seg = &to_base_frag(frag)->segment;
struct ibv_sge *sg = &to_com_frag(frag)->sg_entry;
struct ibv_send_wr *sr_desc = &to_out_frag(frag)->sr_desc;
assert(!do_rdma || BTL_OPENIB_EAGER_RDMA_QP(qp)); assert(!do_rdma || BTL_OPENIB_EAGER_RDMA_QP(qp));
frag->sg_entry.length = frag->segment.seg_len + sg->length = seg->seg_len + sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_t) +
(do_rdma ? sizeof(mca_btl_openib_footer_t) : 0); (do_rdma ? sizeof(mca_btl_openib_footer_t) : 0);
if(frag->sg_entry.length <= openib_btl->ib_inline_max) { if(sg->length <= openib_btl->ib_inline_max) {
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED|IBV_SEND_INLINE; sr_desc->send_flags = IBV_SEND_SIGNALED|IBV_SEND_INLINE;
} else { } else {
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED; sr_desc->send_flags = IBV_SEND_SIGNALED;
} }
if(endpoint->nbo) if(endpoint->nbo)
BTL_OPENIB_HEADER_HTON((*(frag->hdr))); BTL_OPENIB_HEADER_HTON(*frag->hdr);
if(do_rdma) { if(do_rdma) {
int32_t head; int32_t head;
mca_btl_openib_footer_t* ftr = mca_btl_openib_footer_t* ftr = (mca_btl_openib_footer_t*)
(mca_btl_openib_footer_t*)(((char*)frag->segment.seg_addr.pval) + (((char*)seg->seg_addr.pval) + seg->seg_len);
frag->segment.seg_len); sr_desc->opcode = IBV_WR_RDMA_WRITE;
frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_WRITE; MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length);
MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, frag->sg_entry.length);
MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr); MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
#if OMPI_ENABLE_DEBUG #if OMPI_ENABLE_DEBUG
((mca_btl_openib_footer_t*)(((char*)frag->segment.seg_addr.pval) + ((mca_btl_openib_footer_t*)(((char*)seg->seg_addr.pval) +
frag->segment.seg_len))->seq = seg->seg_len))->seq = endpoint->eager_rdma_remote.seq++;
endpoint->eager_rdma_remote.seq++;
#endif #endif
if(endpoint->nbo) if(endpoint->nbo)
BTL_OPENIB_FOOTER_HTON((*ftr)); BTL_OPENIB_FOOTER_HTON((*ftr));
frag->wr_desc.sr_desc.wr.rdma.rkey = endpoint->eager_rdma_remote.rkey; sr_desc->wr.rdma.rkey = endpoint->eager_rdma_remote.rkey;
MCA_BTL_OPENIB_RDMA_MOVE_INDEX(endpoint->eager_rdma_remote.head, head); MCA_BTL_OPENIB_RDMA_MOVE_INDEX(endpoint->eager_rdma_remote.head, head);
frag->wr_desc.sr_desc.wr.rdma.remote_addr = sr_desc->wr.rdma.remote_addr =
endpoint->eager_rdma_remote.base.lval + endpoint->eager_rdma_remote.base.lval +
head * openib_btl->eager_rdma_frag_size + head * openib_btl->eager_rdma_frag_size +
sizeof(mca_btl_openib_header_t) + sizeof(mca_btl_openib_header_t) +
mca_btl_openib_component.eager_limit + mca_btl_openib_component.eager_limit +
sizeof(mca_btl_openib_footer_t); sizeof(mca_btl_openib_footer_t);
frag->wr_desc.sr_desc.wr.rdma.remote_addr -= frag->sg_entry.length; sr_desc->wr.rdma.remote_addr -= sg->length;
} else { } else {
if(BTL_OPENIB_QP_TYPE_SRQ(qp)) { if(BTL_OPENIB_QP_TYPE_SRQ(qp)) {
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND_WITH_IMM; sr_desc->opcode = IBV_WR_SEND_WITH_IMM;
frag->wr_desc.sr_desc.imm_data = endpoint->rem_info.rem_index; sr_desc->imm_data = endpoint->rem_info.rem_index;
} else { } else {
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND; sr_desc->opcode = IBV_WR_SEND;
} }
} }
frag->base.order = qp; to_base_frag(frag)->base.order = qp;
return ibv_post_send(endpoint->qps[qp].lcl_qp, &frag->wr_desc.sr_desc, &bad_wr);
assert(sg->addr == (uint64_t)frag->hdr);
return ibv_post_send(endpoint->qps[qp].lcl_qp, sr_desc, &bad_wr);
} }
/* /*
@ -112,7 +115,7 @@ static int post_send(mca_btl_openib_module_t *openib_btl,
static int btl_openib_acquire_send_resources( static int btl_openib_acquire_send_resources(
mca_btl_openib_module_t *openib_btl, mca_btl_openib_module_t *openib_btl,
mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_frag_t *frag, int *qp, int *do_rdma) mca_btl_openib_send_frag_t *frag, int *qp, int *do_rdma)
{ {
if(*do_rdma) { if(*do_rdma) {
if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) { if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
@ -164,22 +167,23 @@ static int btl_openib_acquire_send_resources(
} while(0 == OPAL_ATOMIC_CMPSET_32(&FROM, TO, 0)) } while(0 == OPAL_ATOMIC_CMPSET_32(&FROM, TO, 0))
/* this function os called with endpoint->endpoint_lock held */ /* this function os called with endpoint->endpoint_lock held */
static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* openib_btl, static inline int mca_btl_openib_endpoint_post_send(
mca_btl_openib_endpoint_t * endpoint, mca_btl_openib_module_t* openib_btl,
mca_btl_openib_frag_t * frag) mca_btl_openib_endpoint_t *endpoint,
mca_btl_openib_send_frag_t *frag)
{ {
mca_btl_openib_header_t *hdr = frag->hdr;
mca_btl_base_descriptor_t *des = &to_base_frag(frag)->base;
int do_rdma = 0, qp, ib_rc; int do_rdma = 0, qp, ib_rc;
int32_t cm_return; int32_t cm_return;
frag->sg_entry.addr = (unsigned long) frag->hdr; if(des->order != MCA_BTL_NO_ORDER) {
qp = des->order; /* if order is provided use it */
if(frag->base.order != MCA_BTL_NO_ORDER) {
qp = frag->base.order; /* if order is provided use it */
} else { } else {
qp = frag->qp_idx; qp = frag->qp_idx;
if(frag->segment.seg_len <= mca_btl_openib_component.eager_limit && if(des->des_src->seg_len <= mca_btl_openib_component.eager_limit &&
(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY)) (des->des_flags & MCA_BTL_DES_FLAGS_PRIORITY))
do_rdma = 1; /* High priority frag. Try to send over eager RDMA */ do_rdma = 1; /* High priority frag. Try to send over eager RDMA */
} }
@ -187,42 +191,41 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
&do_rdma) == OMPI_ERR_OUT_OF_RESOURCE) &do_rdma) == OMPI_ERR_OUT_OF_RESOURCE)
return OMPI_SUCCESS; return OMPI_SUCCESS;
frag->hdr->credits = 0; hdr->credits = 0;
if(BTL_OPENIB_EAGER_RDMA_QP(qp)) { if(BTL_OPENIB_EAGER_RDMA_QP(qp)) {
GET_CREDITS(endpoint->eager_rdma_local.credits, frag->hdr->credits); GET_CREDITS(endpoint->eager_rdma_local.credits, hdr->credits);
if(frag->hdr->credits) if(hdr->credits)
frag->hdr->credits |= BTL_OPENIB_RDMA_CREDITS_FLAG; hdr->credits |= BTL_OPENIB_RDMA_CREDITS_FLAG;
} }
if(BTL_OPENIB_QP_TYPE_PP(qp) && if(BTL_OPENIB_QP_TYPE_PP(qp) && 0 == hdr->credits) {
0 == frag->hdr->credits) { GET_CREDITS(endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits);
GET_CREDITS(endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits);
} }
GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return); GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
/* cm_seen is only 8 bytes, but cm_return is 32 bytes */ /* cm_seen is only 8 bytes, but cm_return is 32 bytes */
if(cm_return > 255) { if(cm_return > 255) {
frag->hdr->cm_seen = 255; hdr->cm_seen = 255;
cm_return -= 255; cm_return -= 255;
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return); OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
} else { } else {
frag->hdr->cm_seen = cm_return; hdr->cm_seen = cm_return;
} }
ib_rc = post_send(openib_btl, endpoint, frag, qp, do_rdma); ib_rc = post_send(openib_btl, endpoint, frag, qp, do_rdma);
if(ib_rc) { if(ib_rc) {
if(endpoint->nbo) { if(endpoint->nbo) {
BTL_OPENIB_HEADER_NTOH((*(frag->hdr))); BTL_OPENIB_HEADER_NTOH(*hdr);
} }
if(BTL_OPENIB_IS_RDMA_CREDITS(frag->hdr->credits)) { if(BTL_OPENIB_IS_RDMA_CREDITS(hdr->credits)) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
BTL_OPENIB_CREDITS(frag->hdr->credits)); BTL_OPENIB_CREDITS(hdr->credits));
} }
OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe, 1); OPAL_THREAD_ADD32(&endpoint->qps[qp].sd_wqe, 1);
if(do_rdma) { if(do_rdma) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
} else { } else {
if(BTL_OPENIB_QP_TYPE_PP(qp)) { if(BTL_OPENIB_QP_TYPE_PP(qp)) {
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits); OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, hdr->credits);
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1); OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.sd_credits, 1);
} else { } else {
OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1); OPAL_THREAD_ADD32(&openib_btl->qps[qp].u.srq_qp.sd_credits, 1);
@ -371,8 +374,7 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) { for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
OBJ_DESTRUCT(&endpoint->qps[qp].pending_frags); OBJ_DESTRUCT(&endpoint->qps[qp].pending_frags);
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(endpoint->endpoint_btl, MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->qps[qp].pending_frags);
&endpoint->qps[qp].pending_frags);
if(ibv_destroy_qp(endpoint->qps[qp].lcl_qp)) { if(ibv_destroy_qp(endpoint->qps[qp].lcl_qp)) {
BTL_ERROR(("Failed to destroy QP:%d\n", qp)); BTL_ERROR(("Failed to destroy QP:%d\n", qp));
} }
@ -382,16 +384,13 @@ static void mca_btl_openib_endpoint_destruct(mca_btl_base_endpoint_t* endpoint)
} }
OBJ_DESTRUCT(&endpoint->endpoint_lock); OBJ_DESTRUCT(&endpoint->endpoint_lock);
/* Clean pending lists */ /* Clean pending lists */
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(endpoint->endpoint_btl, MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_lazy_frags);
&endpoint->pending_lazy_frags);
OBJ_DESTRUCT(&endpoint->pending_lazy_frags); OBJ_DESTRUCT(&endpoint->pending_lazy_frags);
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(endpoint->endpoint_btl, MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_get_frags);
&endpoint->pending_get_frags);
OBJ_DESTRUCT(&endpoint->pending_get_frags); OBJ_DESTRUCT(&endpoint->pending_get_frags);
MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(endpoint->endpoint_btl, MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(&endpoint->pending_put_frags);
&endpoint->pending_put_frags);
OBJ_DESTRUCT(&endpoint->pending_put_frags); OBJ_DESTRUCT(&endpoint->pending_put_frags);
} }
@ -422,8 +421,8 @@ int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t *endpoint)
void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint) void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
{ {
opal_list_item_t *frag_item; opal_list_item_t *frag_item;
mca_btl_openib_frag_t *frag; mca_btl_openib_send_frag_t *frag;
mca_btl_openib_module_t* openib_btl; mca_btl_openib_module_t *openib_btl;
endpoint->endpoint_state = MCA_BTL_IB_CONNECTED; endpoint->endpoint_state = MCA_BTL_IB_CONNECTED;
@ -434,22 +433,22 @@ void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoint)
/* While there are frags in the list, process them */ /* While there are frags in the list, process them */
while (!opal_list_is_empty(&(endpoint->pending_lazy_frags))) { while (!opal_list_is_empty(&(endpoint->pending_lazy_frags))) {
frag_item = opal_list_remove_first(&(endpoint->pending_lazy_frags)); frag_item = opal_list_remove_first(&(endpoint->pending_lazy_frags));
frag = (mca_btl_openib_frag_t *) frag_item; frag = to_send_frag(frag_item);
openib_btl = endpoint->endpoint_btl; openib_btl = endpoint->endpoint_btl;
/* We need to post this one */ /* We need to post this one */
if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag)) if(OMPI_SUCCESS != mca_btl_openib_endpoint_post_send(openib_btl,
endpoint, frag))
BTL_ERROR(("Error posting send")); BTL_ERROR(("Error posting send"));
} }
} }
/* /*
* Attempt to send a fragment using a given endpoint. If the endpoint is not * Attempt to send a fragment using a given endpoint. If the endpoint is not
* connected, queue the fragment and start the connection as required. * connected, queue the fragment and start the connection as required.
*/ */
int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint, int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint,
mca_btl_openib_frag_t* frag) mca_btl_openib_send_frag_t* frag)
{ {
int rc; int rc;
bool call_progress = false; bool call_progress = false;
@ -530,7 +529,7 @@ static void mca_btl_openib_endpoint_credits(
int qp; int qp;
mca_btl_openib_frag_t *frag = (mca_btl_openib_frag_t*)descriptor; mca_btl_openib_send_control_frag_t *frag = to_send_control_frag(descriptor);
qp = frag->qp_idx; qp = frag->qp_idx;
@ -554,7 +553,7 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
const int qp) const int qp)
{ {
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
mca_btl_openib_frag_t* frag; mca_btl_openib_send_control_frag_t* frag;
mca_btl_openib_rdma_credits_header_t *credits_hdr; mca_btl_openib_rdma_credits_header_t *credits_hdr;
int do_rdma = 0, ib_rc; int do_rdma = 0, ib_rc;
int32_t cm_return; int32_t cm_return;
@ -565,11 +564,19 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(openib_btl, frag, ib_rc); MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(openib_btl, frag, ib_rc);
frag->qp_idx = qp; frag->qp_idx = qp;
endpoint->qps[qp].credit_frag = frag; endpoint->qps[qp].credit_frag = frag;
/* set those once and forever */
to_base_frag(frag)->base.des_cbfunc = mca_btl_openib_endpoint_credits;
to_base_frag(frag)->base.des_cbdata = NULL;
to_com_frag(frag)->endpoint = endpoint;
frag->hdr->tag = MCA_BTL_TAG_BTL;
to_base_frag(frag)->segment.seg_len =
sizeof(mca_btl_openib_rdma_credits_header_t);
} }
assert(frag->qp_idx == qp); assert(frag->qp_idx == qp);
credits_hdr = credits_hdr =
(mca_btl_openib_rdma_credits_header_t*)frag->segment.seg_addr.pval; (mca_btl_openib_rdma_credits_header_t*)
to_base_frag(frag)->segment.seg_addr.pval;
if(BTL_OPENIB_EAGER_RDMA_QP(qp)) { if(BTL_OPENIB_EAGER_RDMA_QP(qp)) {
if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) { if(OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, -1) < 0) {
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1); OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
@ -587,11 +594,6 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
} }
} }
frag->base.des_cbfunc = mca_btl_openib_endpoint_credits;
frag->base.des_cbdata = NULL;
frag->endpoint = endpoint;
frag->hdr->tag = MCA_BTL_TAG_BTL;
GET_CREDITS(endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits); GET_CREDITS(endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits);
GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return); GET_CREDITS(endpoint->qps[qp].u.pp_qp.cm_return, cm_return);
@ -612,24 +614,26 @@ void mca_btl_openib_endpoint_send_credits(mca_btl_openib_endpoint_t* endpoint,
if(endpoint->nbo) if(endpoint->nbo)
BTL_OPENIB_RDMA_CREDITS_HEADER_HTON((*credits_hdr)); BTL_OPENIB_RDMA_CREDITS_HEADER_HTON((*credits_hdr));
frag->segment.seg_len = sizeof(mca_btl_openib_rdma_credits_header_t);
frag->sg_entry.addr = (unsigned long)frag->hdr;
if((ib_rc = post_send(openib_btl, endpoint, frag, qp, do_rdma))) { if(!(ib_rc = post_send(openib_btl, endpoint, frag, qp, do_rdma)))
if(endpoint->nbo) { return;
BTL_OPENIB_HEADER_NTOH((*frag->hdr));
BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH((*credits_hdr)); if(endpoint->nbo) {
} BTL_OPENIB_HEADER_NTOH(*frag->hdr);
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp); BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(*credits_hdr);
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits, frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, credits_hdr->rdma_credits);
if(do_rdma)
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
else
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
BTL_ERROR(("error posting send request errno %d says %s", ib_rc,
strerror(errno)));
} }
BTL_OPENIB_CREDITS_SEND_UNLOCK(endpoint, qp);
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.rd_credits,
frag->hdr->credits);
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
credits_hdr->rdma_credits);
if(do_rdma)
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens, 1);
else
OPAL_THREAD_ADD32(&endpoint->qps[qp].u.pp_qp.cm_sent, -1);
BTL_ERROR(("error posting send request errno %d says %s", ib_rc,
strerror(errno)));
} }
/* local callback function for completion of eager rdma connect */ /* local callback function for completion of eager rdma connect */
@ -639,17 +643,16 @@ static void mca_btl_openib_endpoint_eager_rdma_connect_cb(
struct mca_btl_base_descriptor_t* descriptor, struct mca_btl_base_descriptor_t* descriptor,
int status) int status)
{ {
MCA_BTL_IB_FRAG_RETURN(((mca_btl_openib_module_t*)btl), MCA_BTL_IB_FRAG_RETURN(descriptor);
((mca_btl_openib_frag_t*)descriptor));
} }
/* send the eager rdma conect message to the remote endpoint */ /* send the eager rdma connect message to the remote endpoint */
static int mca_btl_openib_endpoint_send_eager_rdma( static int mca_btl_openib_endpoint_send_eager_rdma(
mca_btl_base_endpoint_t* endpoint) mca_btl_base_endpoint_t* endpoint)
{ {
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
mca_btl_openib_eager_rdma_header_t *rdma_hdr; mca_btl_openib_eager_rdma_header_t *rdma_hdr;
mca_btl_openib_frag_t* frag; mca_btl_openib_send_control_frag_t* frag;
int rc; int rc;
MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(openib_btl, frag, rc); MCA_BTL_IB_FRAG_ALLOC_CREDIT_WAIT(openib_btl, frag, rc);
@ -657,13 +660,17 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
return -1; return -1;
} }
frag->base.des_cbfunc = mca_btl_openib_endpoint_eager_rdma_connect_cb; to_base_frag(frag)->base.des_cbfunc =
frag->base.des_cbdata = NULL; mca_btl_openib_endpoint_eager_rdma_connect_cb;
frag->endpoint = endpoint; to_base_frag(frag)->base.des_cbdata = NULL;
frag->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY; to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
to_send_frag(frag)->qp_idx = 0;
to_base_frag(frag)->segment.seg_len =
sizeof(mca_btl_openib_eager_rdma_header_t);
to_com_frag(frag)->endpoint = endpoint;
frag->hdr->tag = MCA_BTL_TAG_BTL; frag->hdr->tag = MCA_BTL_TAG_BTL;
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)frag->segment.seg_addr.pval; rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)to_base_frag(frag)->segment.seg_addr.pval;
rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA; rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA;
rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey; rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey;
rdma_hdr->rdma_start.lval = ompi_ptr_ptol(endpoint->eager_rdma_local.base.pval); rdma_hdr->rdma_start.lval = ompi_ptr_ptol(endpoint->eager_rdma_local.base.pval);
@ -675,7 +682,6 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
rdma_hdr->control.type, rdma_hdr->control.type,
sizeof(mca_btl_openib_eager_rdma_header_t) sizeof(mca_btl_openib_eager_rdma_header_t)
)); ));
frag->segment.seg_len = sizeof(mca_btl_openib_eager_rdma_header_t);
if(endpoint->nbo) { if(endpoint->nbo) {
BTL_OPENIB_EAGER_RDMA_CONTROL_HEADER_HTON((*rdma_hdr)); BTL_OPENIB_EAGER_RDMA_CONTROL_HEADER_HTON((*rdma_hdr));
@ -688,7 +694,7 @@ static int mca_btl_openib_endpoint_send_eager_rdma(
)); ));
} }
if (mca_btl_openib_endpoint_send(endpoint, frag) != OMPI_SUCCESS) { if (mca_btl_openib_endpoint_send(endpoint, frag) != OMPI_SUCCESS) {
MCA_BTL_IB_FRAG_RETURN(openib_btl, frag); MCA_BTL_IB_FRAG_RETURN(frag);
BTL_ERROR(("Error sending RDMA buffer", strerror(errno))); BTL_ERROR(("Error sending RDMA buffer", strerror(errno)));
return -1; return -1;
} }
@ -742,18 +748,18 @@ void mca_btl_openib_endpoint_connect_eager_rdma(
item->ptr = buf + i * openib_btl->eager_rdma_frag_size; item->ptr = buf + i * openib_btl->eager_rdma_frag_size;
OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t); OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t);
init_data.length = mca_btl_openib_component.eager_limit;
init_data.order = mca_btl_openib_component.eager_rdma_qp; init_data.order = mca_btl_openib_component.eager_rdma_qp;
init_data.type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
init_data.list = NULL; init_data.list = NULL;
mca_btl_openib_frag_init(item, &init_data); mca_btl_openib_frag_init(item, &init_data);
frag = (mca_btl_openib_recv_frag_t*) item; frag = to_recv_frag(item);
frag->ftr = (mca_btl_openib_footer_t*)((char*)frag->segment.seg_addr.pval to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA;
+ frag->size); to_com_frag(frag)->endpoint = endpoint;
frag->ftr = (mca_btl_openib_footer_t*)
((char*)to_base_frag(frag)->segment.seg_addr.pval +
mca_btl_openib_component.eager_limit);
MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr); MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
((mca_btl_openib_frag_t*)item)->endpoint = endpoint;
} }
endpoint->eager_rdma_local.frags = headers_buf; endpoint->eager_rdma_local.frags = headers_buf;

Просмотреть файл

@ -121,7 +121,7 @@ struct mca_btl_openib_endpoint_qp_t {
case of PP QP, if there is case of PP QP, if there is
no credit available */ no credit available */
int32_t rd_credit_send_lock; /**< Lock credit send fragment */ int32_t rd_credit_send_lock; /**< Lock credit send fragment */
struct mca_btl_openib_frag_t *credit_frag; mca_btl_openib_send_control_frag_t *credit_frag;
union { union {
mca_btl_openib_endpoint_srq_qp_t srq_qp; mca_btl_openib_endpoint_srq_qp_t srq_qp;
mca_btl_openib_endpoint_pp_qp_t pp_qp; mca_btl_openib_endpoint_pp_qp_t pp_qp;
@ -199,7 +199,7 @@ typedef mca_btl_base_endpoint_t mca_btl_openib_endpoint_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t); OBJ_CLASS_DECLARATION(mca_btl_openib_endpoint_t);
int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint, int mca_btl_openib_endpoint_send(mca_btl_base_endpoint_t* endpoint,
struct mca_btl_openib_frag_t* frag); struct mca_btl_openib_send_frag_t* frag);
void mca_btl_openib_endpoint_send_credits(mca_btl_base_endpoint_t*, const int); void mca_btl_openib_endpoint_send_credits(mca_btl_base_endpoint_t*, const int);
void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*); void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*);
int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t *endpoint); int mca_btl_openib_endpoint_post_recvs(mca_btl_openib_endpoint_t *endpoint);
@ -233,13 +233,11 @@ static inline int mca_btl_openib_endpoint_post_rr(mca_btl_base_endpoint_t *endpo
for(i = 0; i < (num_post + cm_received); i++) { for(i = 0; i < (num_post + cm_received); i++) {
ompi_free_list_item_t* item; ompi_free_list_item_t* item;
mca_btl_openib_frag_t* frag;
OMPI_FREE_LIST_WAIT(free_list, item, rc); OMPI_FREE_LIST_WAIT(free_list, item, rc);
frag = (mca_btl_openib_frag_t*)item; to_base_frag(item)->base.order = qp;
frag->endpoint = endpoint; to_com_frag(item)->endpoint = endpoint;
frag->base.order = qp;
if(ibv_post_recv(endpoint->qps[qp].lcl_qp, if(ibv_post_recv(endpoint->qps[qp].lcl_qp,
&frag->wr_desc.rd_desc, &to_recv_frag(item)->rd_desc,
&bad_wr)) { &bad_wr)) {
BTL_ERROR(("error posting receive errno says %s\n", BTL_ERROR(("error posting receive errno says %s\n",
strerror(errno))); strerror(errno)));

Просмотреть файл

@ -22,136 +22,172 @@
#include "btl_openib_frag.h" #include "btl_openib_frag.h"
#include "btl_openib_eager_rdma.h" #include "btl_openib_eager_rdma.h"
void mca_btl_openib_frag_init(ompi_free_list_item_t* item, void* ctx) { void mca_btl_openib_frag_init(ompi_free_list_item_t* item, void* ctx)
{
mca_btl_openib_frag_init_data_t* init_data = mca_btl_openib_frag_init_data_t* init_data = ctx;
(mca_btl_openib_frag_init_data_t*) ctx; mca_btl_openib_frag_t *frag = to_base_frag(item);
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*) item;
mca_btl_openib_reg_t* registration = if(MCA_BTL_OPENIB_FRAG_RECV == frag->type) {
(mca_btl_openib_reg_t*)frag->base.super.registration; to_recv_frag(frag)->qp_idx = init_data->order;
to_com_frag(frag)->sg_entry.length =
frag->size = init_data->length; mca_btl_openib_component.qp_infos[init_data->order].size +
assert(init_data->order != 255); sizeof(mca_btl_openib_header_t);
frag->base.order = MCA_BTL_NO_ORDER;
frag->type = init_data->type;
frag->list = init_data->list;
frag->qp_idx = init_data->order;
frag->hdr = (mca_btl_openib_header_t*)frag->base.super.ptr;
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
if(registration) {
frag->registration = registration;
frag->sg_entry.lkey = registration->mr->lkey;
frag->segment.seg_key.key32[0] = frag->sg_entry.lkey;
} }
/* init the segment address to start after the btl header */
frag->segment.seg_len = frag->size;
frag->sg_entry.addr = (unsigned long) frag->hdr;
frag->sg_entry.length = frag->size + sizeof(mca_btl_openib_header_t);
frag->base.des_flags = 0;
return; if(MCA_BTL_OPENIB_FRAG_SEND == frag->type)
to_send_frag(frag)->qp_idx = init_data->order;
frag->list = init_data->list;
} }
static void base_constructor(mca_btl_openib_frag_t *frag)
{
static void mca_btl_openib_send_frag_common_constructor(mca_btl_openib_frag_t* frag) frag->base.order = MCA_BTL_NO_ORDER;
{
frag->base.des_src = &frag->segment;
frag->base.des_src_cnt = 1;
frag->base.des_dst = NULL;
frag->base.des_dst_cnt = 0;
frag->wr_desc.sr_desc.wr_id = (unsigned long) frag;
frag->wr_desc.sr_desc.sg_list = &frag->sg_entry;
frag->wr_desc.sr_desc.num_sge = 1;
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
frag->wr_desc.sr_desc.next = NULL;
} }
static void mca_btl_openib_recv_frag_common_constructor(mca_btl_openib_frag_t* frag) static void com_constructor(mca_btl_openib_com_frag_t *frag)
{ {
frag->base.des_dst = &frag->segment; mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
frag->base.des_dst_cnt = 1; mca_btl_openib_reg_t* reg =
frag->base.des_src = NULL; (mca_btl_openib_reg_t*)base_frag->base.super.registration;
frag->base.des_src_cnt = 0;
frag->registration = reg;
frag->wr_desc.rd_desc.wr_id = (unsigned long) frag;
frag->wr_desc.rd_desc.sg_list = &frag->sg_entry; if(reg) {
frag->wr_desc.rd_desc.num_sge = 1; frag->sg_entry.lkey = reg->mr->lkey;
frag->wr_desc.rd_desc.next = NULL; base_frag->segment.seg_key.key32[0] = reg->mr->lkey;
}
} }
static void out_constructor(mca_btl_openib_out_frag_t *frag)
{
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
static void mca_btl_openib_recv_user_frag_constructor(mca_btl_openib_frag_t* frag) base_frag->base.des_src = &base_frag->segment;
{ base_frag->base.des_src_cnt = 1;
frag->registration = NULL; base_frag->base.des_dst = NULL;
frag->hdr = (mca_btl_openib_header_t*)frag->base.super.ptr; base_frag->base.des_dst_cnt = 0;
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
/* init the segment address to start after the btl header */
frag->segment.seg_len = frag->size;
frag->sg_entry.addr = (unsigned long) frag->hdr;
frag->sg_entry.length = frag->size + sizeof(mca_btl_openib_header_t);
frag->base.des_flags = 0;
mca_btl_openib_recv_frag_common_constructor(frag); frag->sr_desc.wr_id = (uint64_t)frag;
frag->sr_desc.sg_list = &to_com_frag(frag)->sg_entry;
frag->sr_desc.num_sge = 1;
frag->sr_desc.opcode = IBV_WR_SEND;
frag->sr_desc.send_flags = IBV_SEND_SIGNALED;
frag->sr_desc.next = NULL;
} }
static void in_constructor(mca_btl_openib_in_frag_t *frag)
{
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
static void mca_btl_openib_send_user_frag_constructor(mca_btl_openib_frag_t* frag) base_frag->base.des_dst = &base_frag->segment;
{ base_frag->base.des_dst_cnt = 1;
frag->registration = NULL; base_frag->base.des_src = NULL;
frag->hdr = (mca_btl_openib_header_t*)frag->base.super.ptr; base_frag->base.des_src_cnt = 0;
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t); }
/* init the segment address to start after the btl header */
frag->segment.seg_len = frag->size;
frag->sg_entry.addr = (unsigned long) frag->hdr;
frag->sg_entry.length = frag->size + sizeof(mca_btl_openib_header_t);
frag->base.des_flags = 0;
mca_btl_openib_send_frag_common_constructor(frag); static void send_constructor(mca_btl_openib_send_frag_t *frag)
{
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
base_frag->type = MCA_BTL_OPENIB_FRAG_SEND;
frag->hdr = (mca_btl_openib_header_t*)base_frag->base.super.ptr;
base_frag->segment.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
to_com_frag(frag)->sg_entry.addr = (uint64_t)frag->hdr;
}
static void recv_constructor(mca_btl_openib_recv_frag_t *frag)
{
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
base_frag->type = MCA_BTL_OPENIB_FRAG_RECV;
frag->hdr = (mca_btl_openib_header_t*)base_frag->base.super.ptr;
base_frag->segment.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
to_com_frag(frag)->sg_entry.addr = (uint64_t)frag->hdr;
frag->rd_desc.wr_id = (uint64_t)frag;
frag->rd_desc.sg_list = &to_com_frag(frag)->sg_entry;
frag->rd_desc.num_sge = 1;
frag->rd_desc.next = NULL;
}
static void send_control_constructor(mca_btl_openib_send_control_frag_t *frag)
{
to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_CONTROL;
}
static void put_constructor(mca_btl_openib_put_frag_t *frag)
{
to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_SEND_USER;
to_out_frag(frag)->sr_desc.opcode = IBV_WR_RDMA_WRITE;
}
static void get_constructor(mca_btl_openib_get_frag_t *frag)
{
to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_RECV_USER;
frag->sr_desc.wr_id = (uint64_t)frag;
frag->sr_desc.sg_list = &to_com_frag(frag)->sg_entry;
frag->sr_desc.num_sge = 1;
frag->sr_desc.opcode = IBV_WR_RDMA_READ;
frag->sr_desc.send_flags = IBV_SEND_SIGNALED;
frag->sr_desc.next = NULL;
} }
OBJ_CLASS_INSTANCE( OBJ_CLASS_INSTANCE(
mca_btl_openib_frag_t, mca_btl_openib_frag_t,
mca_btl_base_descriptor_t, mca_btl_base_descriptor_t,
NULL, base_constructor,
NULL); NULL);
OBJ_CLASS_INSTANCE( OBJ_CLASS_INSTANCE(
mca_btl_openib_send_frag_t, mca_btl_openib_com_frag_t,
mca_btl_base_descriptor_t, mca_btl_openib_frag_t,
mca_btl_openib_send_frag_common_constructor, com_constructor,
NULL); NULL);
OBJ_CLASS_INSTANCE( OBJ_CLASS_INSTANCE(
mca_btl_openib_send_frag_control_t, mca_btl_openib_out_frag_t,
mca_btl_base_descriptor_t, mca_btl_openib_com_frag_t,
mca_btl_openib_send_frag_common_constructor, out_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_in_frag_t,
mca_btl_openib_com_frag_t,
in_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_send_frag_t,
mca_btl_openib_out_frag_t,
send_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_recv_frag_t,
mca_btl_openib_in_frag_t,
recv_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_send_control_frag_t,
mca_btl_openib_send_frag_t,
send_control_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_put_frag_t,
mca_btl_openib_out_frag_t,
put_constructor,
NULL); NULL);
OBJ_CLASS_INSTANCE( OBJ_CLASS_INSTANCE(
mca_btl_openib_send_user_frag_t, mca_btl_openib_get_frag_t,
mca_btl_base_descriptor_t, mca_btl_openib_in_frag_t,
mca_btl_openib_send_user_frag_constructor, get_constructor,
NULL); NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_recv_user_frag_t,
mca_btl_base_descriptor_t,
mca_btl_openib_recv_user_frag_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_recv_frag_t,
mca_btl_base_descriptor_t,
mca_btl_openib_recv_frag_common_constructor,
NULL);

Просмотреть файл

@ -43,14 +43,14 @@ typedef struct mca_btl_openib_header_t mca_btl_openib_header_t;
#define BTL_OPENIB_IS_RDMA_CREDITS(I) ((I)&BTL_OPENIB_RDMA_CREDITS_FLAG) #define BTL_OPENIB_IS_RDMA_CREDITS(I) ((I)&BTL_OPENIB_RDMA_CREDITS_FLAG)
#define BTL_OPENIB_CREDITS(I) ((I)&~BTL_OPENIB_RDMA_CREDITS_FLAG) #define BTL_OPENIB_CREDITS(I) ((I)&~BTL_OPENIB_RDMA_CREDITS_FLAG)
#define BTL_OPENIB_HEADER_HTON(h) \ #define BTL_OPENIB_HEADER_HTON(h) \
do { \ do { \
h.credits = htons(h.credits); \ (h).credits = htons((h).credits); \
} while (0) } while (0)
#define BTL_OPENIB_HEADER_NTOH(h) \ #define BTL_OPENIB_HEADER_NTOH(h) \
do { \ do { \
h.credits = ntohs(h.credits); \ (h).credits = ntohs((h).credits); \
} while (0) } while (0)
@ -79,13 +79,13 @@ typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t;
#if OMPI_ENABLE_DEBUG #if OMPI_ENABLE_DEBUG
#define BTL_OPENIB_FOOTER_HTON(h) \ #define BTL_OPENIB_FOOTER_HTON(h) \
do { \ do { \
h.seq = htonl(h.seq); \ (h).seq = htonl((h).seq); \
MCA_BTL_OPENIB_FTR_SIZE_REVERSE(h); \ MCA_BTL_OPENIB_FTR_SIZE_REVERSE(h); \
} while (0) } while (0)
#define BTL_OPENIB_FOOTER_NTOH(h) \ #define BTL_OPENIB_FOOTER_NTOH(h) \
do { \ do { \
h.seq = ntohl(h.seq); \ (h).seq = ntohl((h).seq); \
MCA_BTL_OPENIB_FTR_SIZE_REVERSE(h); \ MCA_BTL_OPENIB_FTR_SIZE_REVERSE(h); \
} while (0) } while (0)
#else #else
@ -144,7 +144,7 @@ do { \
#define BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(h) \ #define BTL_OPENIB_RDMA_CREDITS_HEADER_NTOH(h) \
do { \ do { \
h.rdma_credits = ntohs(h.rdma_credits); \ (h).rdma_credits = ntohs((h).rdma_credits); \
} while (0) } while (0)
enum mca_btl_openib_frag_type_t { enum mca_btl_openib_frag_type_t {
@ -157,48 +157,84 @@ enum mca_btl_openib_frag_type_t {
}; };
typedef enum mca_btl_openib_frag_type_t mca_btl_openib_frag_type_t; typedef enum mca_btl_openib_frag_type_t mca_btl_openib_frag_type_t;
#define openib_frag_type(f) (to_base_frag(f)->type)
/** /**
* IB send fragment derived type. * IB fragment derived type.
*/ */
struct mca_btl_openib_frag_t {
/* base openib frag */
typedef struct mca_btl_openib_frag_t {
mca_btl_base_descriptor_t base; mca_btl_base_descriptor_t base;
struct mca_btl_base_endpoint_t *endpoint;
mca_btl_openib_footer_t *ftr;
mca_btl_openib_header_t *hdr;
mca_btl_base_segment_t segment; mca_btl_base_segment_t segment;
size_t size;
mca_btl_openib_frag_type_t type; mca_btl_openib_frag_type_t type;
union{
struct ibv_recv_wr rd_desc;
struct ibv_send_wr sr_desc;
} wr_desc;
struct ibv_sge sg_entry;
struct mca_btl_openib_reg_t *registration;
ompi_free_list_t* list; ompi_free_list_t* list;
uint8_t qp_idx; } mca_btl_openib_frag_t;
};
typedef struct mca_btl_openib_frag_t mca_btl_openib_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_frag_t); OBJ_CLASS_DECLARATION(mca_btl_openib_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_send_frag_t; #define to_base_frag(f) ((mca_btl_openib_frag_t*)(f))
/* frag used for communication */
typedef struct mca_btl_openib_com_frag_t {
mca_btl_openib_frag_t super;
struct ibv_sge sg_entry;
struct mca_btl_openib_reg_t *registration;
struct mca_btl_base_endpoint_t *endpoint;
} mca_btl_openib_com_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_com_frag_t);
#define to_com_frag(f) ((mca_btl_openib_com_frag_t*)(f))
typedef struct mca_btl_openib_out_frag_t {
mca_btl_openib_com_frag_t super;
struct ibv_send_wr sr_desc;
} mca_btl_openib_out_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_out_frag_t);
#define to_out_frag(f) ((mca_btl_openib_out_frag_t*)(f))
typedef struct mca_btl_openib_com_frag_t mca_btl_openib_in_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_in_frag_t);
#define to_in_frag(f) ((mca_btl_openib_in_frag_t*)(f))
typedef struct mca_btl_openib_send_frag_t {
mca_btl_openib_out_frag_t super;
mca_btl_openib_header_t *hdr;
mca_btl_openib_footer_t *ftr;
uint8_t qp_idx;
} mca_btl_openib_send_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_t); OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_send_user_frag_t; #define to_send_frag(f) ((mca_btl_openib_send_frag_t*)(f))
OBJ_CLASS_DECLARATION(mca_btl_openib_send_user_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_recv_user_frag_t; typedef struct mca_btl_openib_recv_frag_t {
mca_btl_openib_in_frag_t super;
OBJ_CLASS_DECLARATION(mca_btl_openib_recv_user_frag_t); mca_btl_openib_header_t *hdr;
mca_btl_openib_footer_t *ftr;
struct ibv_recv_wr rd_desc;
uint8_t qp_idx;
} mca_btl_openib_recv_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_recv_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_recv_frag_t; #define to_recv_frag(f) ((mca_btl_openib_recv_frag_t*)(f))
OBJ_CLASS_DECLARATION(mca_btl_openib_recv_frag_t);
typedef struct mca_btl_openib_frag_t mca_btl_openib_send_frag_control_t; typedef struct mca_btl_openib_out_frag_t mca_btl_openib_put_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_put_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_control_t);
#define to_put_frag(f) ((mca_btl_openib_put_frag_t*)(f))
typedef struct mca_btl_openib_get_frag_t {
mca_btl_openib_in_frag_t super;
struct ibv_send_wr sr_desc;
} mca_btl_openib_get_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_get_frag_t);
#define to_get_frag(f) ((mca_btl_openib_get_frag_t*)(f))
typedef struct mca_btl_openib_send_frag_t mca_btl_openib_send_control_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_control_frag_t);
#define to_send_control_frag(f) ((mca_btl_openib_send_control_frag_t*)(f))
/* /*
* Allocate an IB send descriptor * Allocate an IB send descriptor
* *
@ -208,7 +244,7 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_control_t);
do { \ do { \
ompi_free_list_item_t *item; \ ompi_free_list_item_t *item; \
OMPI_FREE_LIST_WAIT(&(btl)->send_free_control, item, rc); \ OMPI_FREE_LIST_WAIT(&(btl)->send_free_control, item, rc); \
frag = (mca_btl_openib_frag_t*)item; \ frag = to_send_control_frag(item); \
} while(0) } while(0)
#define MCA_BTL_IB_FRAG_ALLOC_BY_SIZE(btl, frag, _size, rc) \ #define MCA_BTL_IB_FRAG_ALLOC_BY_SIZE(btl, frag, _size, rc) \
@ -222,42 +258,40 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_control_t);
break; \ break; \
} \ } \
} \ } \
frag = (mca_btl_openib_frag_t*)item; \ frag = to_com_frag(item); \
} while(0); } while(0);
#define MCA_BTL_IB_FRAG_ALLOC_SEND_USER(btl, frag, rc) \ #define MCA_BTL_IB_FRAG_ALLOC_SEND_USER(btl, frag, rc) \
do { \ do { \
ompi_free_list_item_t *item; \ ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&(btl)->send_user_free, item, rc); \ OMPI_FREE_LIST_GET(&(btl)->send_user_free, item, rc); \
frag = (mca_btl_openib_frag_t*)item; \ frag = to_com_frag(item); \
} while(0) } while(0)
#define MCA_BTL_IB_FRAG_ALLOC_RECV_USER(btl, frag, rc) \ #define MCA_BTL_IB_FRAG_ALLOC_RECV_USER(btl, frag, rc) \
do { \ do { \
ompi_free_list_item_t *item; \ ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&(btl)->recv_user_free, item, rc); \ OMPI_FREE_LIST_GET(&(btl)->recv_user_free, item, rc); \
frag = (mca_btl_openib_frag_t*) item; \ frag = to_com_frag(item); \
} while(0) } while(0)
#define MCA_BTL_IB_FRAG_RETURN(btl, frag) \ #define MCA_BTL_IB_FRAG_RETURN(frag) \
do { \ do { \
OMPI_FREE_LIST_RETURN(frag->list, \ OMPI_FREE_LIST_RETURN(to_base_frag(frag)->list, \
(ompi_free_list_item_t*)(frag)); \ (ompi_free_list_item_t*)(frag)); \
} while(0); } while(0);
#define MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(btl,list) \ #define MCA_BTL_OPENIB_CLEAN_PENDING_FRAGS(list) \
while(!opal_list_is_empty(list)){ \ while(!opal_list_is_empty(list)){ \
opal_list_item_t *frag_item; \ opal_list_item_t *frag_item; \
frag_item = opal_list_remove_first(list); \ frag_item = opal_list_remove_first(list); \
MCA_BTL_IB_FRAG_RETURN(btl, ((mca_btl_openib_frag_t*)frag_item)); \ MCA_BTL_IB_FRAG_RETURN(frag_item); \
} \ } \
struct mca_btl_openib_module_t; struct mca_btl_openib_module_t;
struct mca_btl_openib_frag_init_data_t { struct mca_btl_openib_frag_init_data_t {
uint8_t order; uint8_t order;
size_t length;
mca_btl_openib_frag_type_t type;
ompi_free_list_t* list; ompi_free_list_t* list;
}; };
typedef struct mca_btl_openib_frag_init_data_t mca_btl_openib_frag_init_data_t; typedef struct mca_btl_openib_frag_init_data_t mca_btl_openib_frag_init_data_t;