Implementation of short message RDMA. Endpoint registers circular buffer and sends its address and rkey to the peer. Peer uses this buffer to eagerly RDMA small message into it. Endpoint polls the buffer for message arrival before checking HP/LP QPs. Set btl_openib_use_eager_rdma to 1 to enable it.
This commit was SVN r9425.
Этот коммит содержится в:
родитель
52e5d90a53
Коммит
a5a78b10cc
@ -31,7 +31,8 @@ sources = \
|
||||
btl_openib_frag.c \
|
||||
btl_openib_frag.h \
|
||||
btl_openib_proc.c \
|
||||
btl_openib_proc.h
|
||||
btl_openib_proc.h \
|
||||
btl_openib_eager_rdma.h
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
|
@ -29,6 +29,7 @@
|
||||
/* Open MPI includes */
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
#include "ompi/class/ompi_bitmap.h"
|
||||
#include "orte/class/orte_pointer_array.h"
|
||||
#include "opal/event/event.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
@ -114,7 +115,10 @@ struct mca_btl_openib_component_t {
|
||||
uint32_t ib_service_level;
|
||||
uint32_t ib_static_rate;
|
||||
uint32_t ib_src_path_bits;
|
||||
|
||||
uint32_t use_eager_rdma;
|
||||
uint32_t eager_rdma_threashold;
|
||||
uint32_t eager_rdma_num;
|
||||
uint32_t max_eager_rdma;
|
||||
|
||||
}; typedef struct mca_btl_openib_component_t mca_btl_openib_component_t;
|
||||
|
||||
@ -179,10 +183,11 @@ struct mca_btl_openib_module_t {
|
||||
opal_list_t pending_frags_lp;
|
||||
/**< list of pending low priority frags */
|
||||
|
||||
|
||||
size_t eager_rdma_frag_size; /**< length of eager frag */
|
||||
orte_pointer_array_t *eager_rdma_buffers; /**< RDMA buffers to poll */
|
||||
uint32_t eager_rdma_buffers_count; /**< number of RDMA buffers */
|
||||
}; typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
|
||||
|
||||
|
||||
struct mca_btl_openib_frag_t;
|
||||
extern mca_btl_openib_module_t mca_btl_openib_module;
|
||||
|
||||
|
@ -33,8 +33,8 @@
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib_endpoint.h"
|
||||
#include "btl_openib_eager_rdma.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
|
||||
|
||||
#include "ompi/datatype/convertor.h"
|
||||
@ -186,7 +186,17 @@ int mca_btl_openib_component_open(void)
|
||||
16, (int*) &mca_btl_openib_component.srq_rd_per_peer);
|
||||
mca_btl_openib_param_register_int("srq_sd_max", "Maximum number of send descriptors posted. (SRQ)",
|
||||
8, &mca_btl_openib_component.srq_sd_max);
|
||||
|
||||
mca_btl_openib_param_register_int("use_eager_rdma", "user RDMA for eager messages",
|
||||
0, &mca_btl_openib_component.use_eager_rdma);
|
||||
if (mca_btl_openib_component.use_srq)
|
||||
mca_btl_openib_component.use_eager_rdma = 0;
|
||||
mca_btl_openib_param_register_int("eager_rdma_threashold", "Open rdma channel for eager messages after this number of messages received from peer",
|
||||
100, &mca_btl_openib_component.eager_rdma_threashold);
|
||||
mca_btl_openib_param_register_int("max_eager_rdma", "Maximum number of eager RDMA connections",
|
||||
16, (int*)&mca_btl_openib_component.max_eager_rdma);
|
||||
mca_btl_openib_param_register_int("eager_rdma_num", "Number of RDMA buffers for eager messages",
|
||||
16, (int*)&mca_btl_openib_component.eager_rdma_num);
|
||||
mca_btl_openib_component.eager_rdma_num+=1;
|
||||
mca_btl_openib_param_register_int ("eager_limit", "eager send limit",
|
||||
(32*1024),(int*) &mca_btl_openib_module.super.btl_eager_limit);
|
||||
mca_btl_openib_param_register_int ("min_send_size", "minimum send size",
|
||||
@ -264,11 +274,37 @@ static void mca_btl_openib_control(
|
||||
/* dont return credits used for control messages */
|
||||
mca_btl_openib_frag_t* frag = (mca_btl_openib_frag_t*)descriptor;
|
||||
mca_btl_openib_endpoint_t* endpoint = frag->endpoint;
|
||||
mca_btl_openib_control_header_t *ctl_hdr = frag->segment.seg_addr.pval;
|
||||
mca_btl_openib_eager_rdma_header_t *rdma_hdr;
|
||||
|
||||
if(frag->size == mca_btl_openib_component.eager_limit) {
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -1);
|
||||
/* if not sent via rdma */
|
||||
if(!MCA_BTL_OPENIB_RDMA_FRAG(frag) &&
|
||||
ctl_hdr->type == MCA_BTL_OPENIB_CONTROL_NOOP) {
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -1);
|
||||
}
|
||||
} else {
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -1);
|
||||
}
|
||||
|
||||
switch (ctl_hdr->type) {
|
||||
case MCA_BTL_OPENIB_CONTROL_NOOP:
|
||||
break;
|
||||
case MCA_BTL_OPENIB_CONTROL_RDMA:
|
||||
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)ctl_hdr;
|
||||
if (endpoint->eager_rdma_remote.base.pval) {
|
||||
BTL_ERROR(("Got RDMA connect twise!"));
|
||||
return;
|
||||
}
|
||||
endpoint->eager_rdma_remote.rkey = rdma_hdr->rkey;
|
||||
endpoint->eager_rdma_remote.base.pval = rdma_hdr->rdma_start.pval;
|
||||
endpoint->eager_rdma_remote.tokens =
|
||||
mca_btl_openib_component.eager_rdma_num - 1;
|
||||
break;
|
||||
default:
|
||||
BTL_ERROR(("Unknown message type received by BTL"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -481,9 +517,13 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
/* Initialize pool of send fragments */
|
||||
length = sizeof(mca_btl_openib_frag_t) +
|
||||
sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
openib_btl->super.btl_eager_limit+
|
||||
2*MCA_BTL_IB_FRAG_ALIGN;
|
||||
|
||||
|
||||
openib_btl->eager_rdma_frag_size =
|
||||
length & ~(2 * MCA_BTL_IB_FRAG_ALIGN - 1);
|
||||
|
||||
ompi_free_list_init(&openib_btl->send_free_eager,
|
||||
length,
|
||||
OBJ_CLASS(mca_btl_openib_send_frag_eager_t),
|
||||
@ -534,6 +574,12 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
openib_btl->super.btl_mpool);
|
||||
|
||||
orte_pointer_array_init(&openib_btl->eager_rdma_buffers,
|
||||
mca_btl_openib_component.max_eager_rdma,
|
||||
mca_btl_openib_component.max_eager_rdma,
|
||||
0);
|
||||
openib_btl->eager_rdma_buffers_count = 0;
|
||||
|
||||
/* Initialize the rd_desc_post array for posting of rr*/
|
||||
openib_btl->rd_desc_post = (struct ibv_recv_wr *)
|
||||
malloc(((mca_btl_openib_component.rd_num + mca_btl_openib_component.rd_rsv) * sizeof(struct ibv_recv_wr)));
|
||||
@ -554,14 +600,114 @@ mca_btl_base_module_t** mca_btl_openib_component_init(int *num_btl_modules,
|
||||
return btls;
|
||||
}
|
||||
|
||||
int mca_btl_openib_handle_incoming_hp(
|
||||
mca_btl_openib_module_t *openib_btl,
|
||||
mca_btl_openib_endpoint_t *endpoint,
|
||||
mca_btl_openib_frag_t *frag,
|
||||
size_t byte_len)
|
||||
{
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
frag->segment.seg_len = byte_len -
|
||||
((unsigned char*)frag->segment.seg_addr.pval -
|
||||
(unsigned char*) frag->hdr);
|
||||
|
||||
/* call registered callback */
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super,
|
||||
frag->hdr->tag, &frag->base,
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
|
||||
if (!MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
|
||||
OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager),
|
||||
(opal_list_item_t*) frag);
|
||||
} else {
|
||||
mca_btl_openib_frag_t *tf;
|
||||
OPAL_THREAD_LOCK(&endpoint->eager_rdma_local.lock);
|
||||
MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
|
||||
while (endpoint->eager_rdma_local.tail !=
|
||||
endpoint->eager_rdma_local.head) {
|
||||
tf = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(endpoint,
|
||||
endpoint->eager_rdma_local.tail);
|
||||
if (MCA_BTL_OPENIB_RDMA_FRAG_LOCAL (tf))
|
||||
break;
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits, 1);
|
||||
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(endpoint->eager_rdma_local.tail);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
|
||||
}
|
||||
|
||||
if (!mca_btl_openib_component.use_srq) {
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp, frag->hdr->credits);
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,
|
||||
frag->hdr->rdma_credits);
|
||||
}
|
||||
|
||||
if (mca_btl_openib_component.use_eager_rdma &&
|
||||
!endpoint->eager_rdma_local.base.pval &&
|
||||
openib_btl->eager_rdma_buffers_count <
|
||||
mca_btl_openib_component.max_eager_rdma &&
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_recv_count, 1) ==
|
||||
mca_btl_openib_component.eager_rdma_threashold)
|
||||
mca_btl_openib_endpoint_connect_eager_rdma(endpoint);
|
||||
|
||||
/* repost receive descriptors */
|
||||
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
|
||||
if(mca_btl_openib_component.use_srq) {
|
||||
if (!MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
|
||||
OPAL_THREAD_ADD32((int32_t*) &openib_btl->srd_posted_hp, -1);
|
||||
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 0);
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
if (!MCA_BTL_OPENIB_RDMA_FRAG(frag)) {
|
||||
OPAL_THREAD_ADD32((int32_t*) &endpoint->rd_posted_hp, -1);
|
||||
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 0);
|
||||
}
|
||||
|
||||
/* check to see if we need to progress any pending desciptors */
|
||||
if(endpoint->sd_tokens_hp > 0 ||
|
||||
endpoint->eager_rdma_remote.tokens > 0) {
|
||||
|
||||
while(!opal_list_is_empty(&endpoint->pending_frags_hp) &&
|
||||
endpoint->sd_wqe_hp > 0 &&
|
||||
(endpoint->sd_tokens_hp > 0 ||
|
||||
endpoint->eager_rdma_remote.tokens > 0)) {
|
||||
opal_list_item_t *frag_item;
|
||||
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||
frag_item = opal_list_remove_first(&(endpoint->pending_frags_hp));
|
||||
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
|
||||
break;
|
||||
if(OMPI_SUCCESS !=
|
||||
mca_btl_openib_endpoint_send(frag->endpoint, frag)) {
|
||||
BTL_ERROR(("error in posting pending send\n"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check to see if we need to return credits */
|
||||
if((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win ||
|
||||
endpoint->eager_rdma_local.credits >=
|
||||
mca_btl_openib_component.rd_win) &&
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, 1) == 1) {
|
||||
mca_btl_openib_endpoint_send_credits_hp(endpoint);
|
||||
}
|
||||
|
||||
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
|
||||
}
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* IB component progress.
|
||||
*/
|
||||
|
||||
int mca_btl_openib_component_progress()
|
||||
{
|
||||
uint32_t i;
|
||||
int count = 0,ne;
|
||||
uint32_t i, j, c;
|
||||
int count = 0,ne = 0, ret;
|
||||
int32_t credits;
|
||||
mca_btl_openib_frag_t* frag;
|
||||
mca_btl_openib_endpoint_t* endpoint;
|
||||
@ -576,6 +722,52 @@ int mca_btl_openib_component_progress()
|
||||
* we will check the high priority and process them until there are none left.
|
||||
* note that low priority messages are only processed one per progress call.
|
||||
*/
|
||||
|
||||
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
||||
c = openib_btl->eager_rdma_buffers_count;
|
||||
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
|
||||
|
||||
for(j = 0; j < c; j++) {
|
||||
endpoint =
|
||||
orte_pointer_array_get_item(openib_btl->eager_rdma_buffers, j);
|
||||
|
||||
if(!endpoint) /* shouldn't happen */
|
||||
continue;
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->eager_rdma_local.lock);
|
||||
frag = MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG (endpoint,
|
||||
endpoint->eager_rdma_local.head);
|
||||
|
||||
if (MCA_BTL_OPENIB_RDMA_FRAG_LOCAL (frag)) {
|
||||
uint32_t size = MCA_BTL_OPENIB_RDMA_FRAG_GET_SIZE(frag->ftr);
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
if (frag->ftr->seq != endpoint->eager_rdma_local.seq)
|
||||
BTL_ERROR(("Eager RDMA wrong SEQ: received %d expected %d",
|
||||
frag->ftr->seq,
|
||||
endpoint->eager_rdma_local.seq));
|
||||
endpoint->eager_rdma_local.seq++;
|
||||
#endif
|
||||
MCA_BTL_OPENIB_RDMA_NEXT_INDEX(endpoint->eager_rdma_local.head);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
|
||||
frag->hdr = (mca_btl_openib_header_t*)(((char*)frag->ftr) -
|
||||
size + sizeof(mca_btl_openib_footer_t));
|
||||
frag->segment.seg_addr.pval = ((unsigned char* )frag->hdr) +
|
||||
sizeof(mca_btl_openib_header_t);
|
||||
|
||||
ret = mca_btl_openib_handle_incoming_hp(openib_btl,
|
||||
frag->endpoint, frag,
|
||||
size - sizeof(mca_btl_openib_footer_t));
|
||||
if (ret != MPI_SUCCESS)
|
||||
return ret;
|
||||
count++;
|
||||
} else
|
||||
OPAL_THREAD_UNLOCK(&endpoint->eager_rdma_local.lock);
|
||||
}
|
||||
|
||||
if(count)
|
||||
break;
|
||||
|
||||
ne=ibv_poll_cq(openib_btl->ib_cq_hp, 1, &wc );
|
||||
if(ne < 0 ){
|
||||
BTL_ERROR(("error polling HP CQ with %d errno says %s\n", ne, strerror(errno)));
|
||||
@ -594,6 +786,7 @@ int mca_btl_openib_component_progress()
|
||||
BTL_ERROR(("Got an RDMA with Immediate data Not supported!"));
|
||||
return OMPI_ERROR;
|
||||
|
||||
case IBV_WC_RDMA_WRITE:
|
||||
case IBV_WC_SEND :
|
||||
|
||||
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id;
|
||||
@ -607,7 +800,7 @@ int mca_btl_openib_component_progress()
|
||||
|
||||
/* check to see if we need to progress any pending desciptors */
|
||||
while (!opal_list_is_empty(&endpoint->pending_frags_hp) &&
|
||||
endpoint->sd_wqe_hp > 0 && endpoint->sd_tokens_hp > 0) {
|
||||
endpoint->sd_wqe_hp > 0 && (endpoint->sd_tokens_hp > 0 | endpoint->eager_rdma_remote.tokens > 0)) {
|
||||
opal_list_item_t *frag_item;
|
||||
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||
frag_item = opal_list_remove_first(&(endpoint->pending_frags_hp));
|
||||
@ -623,7 +816,7 @@ int mca_btl_openib_component_progress()
|
||||
if(!mca_btl_openib_component.use_srq) {
|
||||
|
||||
/* check to see if we need to return credits */
|
||||
if( endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win &&
|
||||
if((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win || endpoint->eager_rdma_local.credits >= mca_btl_openib_component.rd_win) &&
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, 1) == 1) {
|
||||
mca_btl_openib_endpoint_send_credits_hp(endpoint);
|
||||
}
|
||||
@ -646,65 +839,16 @@ int mca_btl_openib_component_progress()
|
||||
break;
|
||||
|
||||
case IBV_WC_RECV:
|
||||
|
||||
/* Process a RECV */
|
||||
frag = (mca_btl_openib_frag_t*) (unsigned long) wc.wr_id;
|
||||
endpoint = (mca_btl_openib_endpoint_t*) frag->endpoint;
|
||||
credits = frag->hdr->credits;
|
||||
|
||||
/* advance the segment address past the header and subtract from the length..*/
|
||||
frag->segment.seg_len = wc.byte_len-
|
||||
((unsigned char*) frag->segment.seg_addr.pval - (unsigned char*) frag->hdr);
|
||||
|
||||
/* call registered callback */
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbfunc(&openib_btl->super,
|
||||
frag->hdr->tag,
|
||||
&frag->base,
|
||||
openib_btl->ib_reg[frag->hdr->tag].cbdata);
|
||||
OMPI_FREE_LIST_RETURN(&(openib_btl->recv_free_eager), (opal_list_item_t*) frag);
|
||||
|
||||
/* repost receive descriptors */
|
||||
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
|
||||
if(mca_btl_openib_component.use_srq) {
|
||||
OPAL_THREAD_ADD32((int32_t*) &openib_btl->srd_posted_hp, -1);
|
||||
MCA_BTL_OPENIB_POST_SRR_HIGH(openib_btl, 0);
|
||||
} else {
|
||||
#endif
|
||||
OPAL_THREAD_ADD32((int32_t*) &endpoint->rd_posted_hp, -1);
|
||||
MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, 0);
|
||||
|
||||
/* check to see if we need to progress any pending desciptors */
|
||||
if( OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp, credits) > 0) {
|
||||
|
||||
while(!opal_list_is_empty(&endpoint->pending_frags_hp) &&
|
||||
endpoint->sd_wqe_hp > 0 && endpoint->sd_tokens_hp > 0) {
|
||||
opal_list_item_t *frag_item;
|
||||
OPAL_THREAD_LOCK(&endpoint->endpoint_lock);
|
||||
frag_item = opal_list_remove_first(&(endpoint->pending_frags_hp));
|
||||
OPAL_THREAD_UNLOCK(&endpoint->endpoint_lock);
|
||||
if(NULL == (frag = (mca_btl_openib_frag_t *) frag_item))
|
||||
break;
|
||||
if(OMPI_SUCCESS != mca_btl_openib_endpoint_send(frag->endpoint, frag)) {
|
||||
BTL_ERROR(("error in posting pending send\n"));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* check to see if we need to return credits */
|
||||
if( endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win &&
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, 1) == 1) {
|
||||
mca_btl_openib_endpoint_send_credits_hp(endpoint);
|
||||
}
|
||||
|
||||
#ifdef OMPI_MCA_BTL_OPENIB_HAVE_SRQ
|
||||
}
|
||||
#endif
|
||||
ret = mca_btl_openib_handle_incoming_hp(openib_btl,
|
||||
frag->endpoint, frag, wc.byte_len);
|
||||
if (ret != OMPI_SUCCESS)
|
||||
return ret;
|
||||
count++;
|
||||
break;
|
||||
|
||||
case IBV_WC_RDMA_READ:
|
||||
case IBV_WC_RDMA_WRITE:
|
||||
default:
|
||||
BTL_ERROR(("Unhandled work completion opcode is %d", wc.opcode));
|
||||
break;
|
||||
|
90
ompi/mca/btl/openib/btl_openib_eager_rdma.h
Обычный файл
90
ompi/mca/btl/openib/btl_openib_eager_rdma.h
Обычный файл
@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (c) 2006 Voltaire All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_OPENIB_EAGER_RDMA_BUF_H
|
||||
#define MCA_BTL_OPENIB_EAGER_RDMA_BUF_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_endpoint.h"
|
||||
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct mca_btl_openib_eager_rdma_local_t {
|
||||
ompi_ptr_t base; /**< buffer for RDMAing eager messages */
|
||||
mca_mpool_openib_registration_t *reg;
|
||||
uint16_t head; /**< RDMA buffer to poll */
|
||||
uint16_t tail; /**< Needed for credit managment */
|
||||
int32_t credits; /**< number of RDMA credits */
|
||||
#ifdef OMPI_ENABLE_DEBUG
|
||||
uint32_t seq;
|
||||
#endif
|
||||
opal_mutex_t lock; /**< guard access to RDMA buffer */
|
||||
};
|
||||
typedef struct mca_btl_openib_eager_rdma_local_t mca_btl_openib_eager_rdma_local_t;
|
||||
|
||||
struct mca_btl_openib_eager_rdma_remote_t {
|
||||
ompi_ptr_t base; /**< address of remote buffer */
|
||||
uint64_t rkey; /**< RKey for accessing remote buffer */
|
||||
uint16_t head; /**< RDMA buffer to post to */
|
||||
int32_t tokens; /**< number of rdam tokens */
|
||||
#ifdef OMPI_ENABLE_DEBUG
|
||||
uint32_t seq;
|
||||
#endif
|
||||
};
|
||||
typedef struct mca_btl_openib_eager_rdma_remote_t mca_btl_openib_eager_rdma_remote_t;
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_FRAG(F) \
|
||||
((F)->endpoint && \
|
||||
(F)->endpoint->eager_rdma_local.reg && \
|
||||
(F)->mr == (F)->endpoint->eager_rdma_local.reg->mr)
|
||||
|
||||
#define EAGER_RDMA_BUFFER_REMOTE (0)
|
||||
#define EAGER_RDMA_BUFFER_LOCAL (0xff)
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
#define MCA_BTL_OPENIB_RDMA_FRAG_GET_SIZE(F) ((F)->u.size >> 8)
|
||||
#define MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(F, S) \
|
||||
((F)->u.size = (S) << 8)
|
||||
#else
|
||||
#define MCA_BTL_OPENIB_RDMA_FRAG_GET_SIZE(F) ((F)->u.size & 0x00ffffff)
|
||||
#define MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(F, S) \
|
||||
((F)->u.size = (S) & 0x00ffffff)
|
||||
#endif
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(F) \
|
||||
(((volatile uint8_t*)(F)->ftr->u.buf)[3] != EAGER_RDMA_BUFFER_REMOTE)
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_FRAG_REMOTE(F) \
|
||||
(!MCA_BTL_OPENIB_RDMA_FRAG_LOCAL(F))
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(F) do { \
|
||||
((volatile uint8_t*)(F)->u.buf)[3] = EAGER_RDMA_BUFFER_REMOTE; \
|
||||
}while (0)
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(F) do { \
|
||||
((volatile uint8_t*)(F)->u.buf)[3] = EAGER_RDMA_BUFFER_LOCAL; \
|
||||
}while (0)
|
||||
|
||||
#define MCA_BTL_OPENIB_GET_LOCAL_RDMA_FRAG(E, I) \
|
||||
(mca_btl_openib_frag_t*)((E)->eager_rdma_local.base.pval + \
|
||||
(I) * (E)->endpoint_btl->eager_rdma_frag_size)
|
||||
|
||||
#define MCA_BTL_OPENIB_RDMA_NEXT_INDEX(I) do { \
|
||||
(I) = ((I) + 1) % \
|
||||
mca_btl_openib_component.eager_rdma_num; \
|
||||
} while (0)
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
@ -70,49 +70,53 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
|
||||
mca_btl_openib_endpoint_t * endpoint,
|
||||
mca_btl_openib_frag_t * frag)
|
||||
{
|
||||
|
||||
int do_rdma = 0;
|
||||
struct ibv_qp* ib_qp;
|
||||
struct ibv_send_wr* bad_wr;
|
||||
frag->sg_entry.addr = (unsigned long) frag->hdr;
|
||||
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
|
||||
|
||||
if(frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY && frag->size <= openib_btl->super.btl_eager_limit){
|
||||
|
||||
if((frag->base.des_flags & MCA_BTL_DES_FLAGS_PRIORITY) &&
|
||||
frag->size <= openib_btl->super.btl_eager_limit){
|
||||
/* check for a send wqe */
|
||||
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,-1) < 0) {
|
||||
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
|
||||
opal_list_append(&endpoint->pending_frags_hp, (opal_list_item_t *)frag);
|
||||
opal_list_append(&endpoint->pending_frags_hp,
|
||||
(opal_list_item_t *)frag);
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
/* check for a token */
|
||||
} else if(!mca_btl_openib_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,-1) < 0) {
|
||||
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,1);
|
||||
opal_list_append(&endpoint->pending_frags_hp, (opal_list_item_t *)frag);
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
} else if( mca_btl_openib_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_hp,-1) < 0) {
|
||||
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
|
||||
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_hp,1);
|
||||
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
||||
opal_list_append(&openib_btl->pending_frags_hp, (opal_list_item_t *)frag);
|
||||
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
/* queue the request */
|
||||
}
|
||||
/* check for rdma tocken */
|
||||
if (OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,-1) < 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_remote.tokens,1);
|
||||
/* check for a token */
|
||||
if(!mca_btl_openib_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,-1) < 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_tokens_hp,1);
|
||||
opal_list_append(&endpoint->pending_frags_hp,
|
||||
(opal_list_item_t *)frag);
|
||||
return OMPI_SUCCESS;
|
||||
} else if( mca_btl_openib_component.use_srq &&
|
||||
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_hp,-1) < 0) {
|
||||
/* queue the request */
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_wqe_hp,1);
|
||||
OPAL_THREAD_ADD32(&openib_btl->sd_tokens_hp,1);
|
||||
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
||||
opal_list_append(&openib_btl->pending_frags_hp,
|
||||
(opal_list_item_t *)frag);
|
||||
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
frag->hdr->credits = (endpoint->rd_credits_hp > 0) ? endpoint->rd_credits_hp : 0;
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -frag->hdr->credits);
|
||||
ib_qp = endpoint->lcl_qp_hp;
|
||||
do_rdma = 1;
|
||||
}
|
||||
|
||||
frag->hdr->credits =
|
||||
(endpoint->rd_credits_hp > 0) ? endpoint->rd_credits_hp : 0;
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -frag->hdr->credits);
|
||||
frag->hdr->rdma_credits = endpoint->eager_rdma_local.credits;
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
|
||||
-frag->hdr->rdma_credits);
|
||||
ib_qp = endpoint->lcl_qp_hp;
|
||||
} else {
|
||||
|
||||
/* check for a send wqe */
|
||||
if (OPAL_THREAD_ADD32(&endpoint->sd_wqe_lp,-1) < 0) {
|
||||
|
||||
@ -147,18 +151,46 @@ static inline int mca_btl_openib_endpoint_post_send(mca_btl_openib_module_t* ope
|
||||
}
|
||||
}
|
||||
|
||||
frag->sg_entry.length =
|
||||
frag->segment.seg_len + sizeof(mca_btl_openib_header_t);
|
||||
frag->sg_entry.length =
|
||||
frag->segment.seg_len + sizeof(mca_btl_openib_header_t) +
|
||||
(do_rdma ? sizeof(mca_btl_openib_footer_t) : 0);
|
||||
if(frag->sg_entry.length <= openib_btl->ib_inline_max) {
|
||||
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED|IBV_SEND_INLINE;
|
||||
} else {
|
||||
frag->wr_desc.sr_desc.send_flags = IBV_SEND_SIGNALED;
|
||||
}
|
||||
|
||||
|
||||
if(do_rdma) {
|
||||
mca_btl_openib_footer_t* ftr =
|
||||
(mca_btl_openib_footer_t*)(((char*)frag->segment.seg_addr.pval) +
|
||||
frag->segment.seg_len);
|
||||
frag->wr_desc.sr_desc.opcode = IBV_WR_RDMA_WRITE;
|
||||
MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, frag->sg_entry.length);
|
||||
MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
|
||||
#ifdef OMPI_ENABLE_DEBUG
|
||||
((mca_btl_openib_footer_t*)(((char*)frag->segment.seg_addr.pval) +
|
||||
frag->segment.seg_len))->seq =
|
||||
endpoint->eager_rdma_remote.seq++;
|
||||
#endif
|
||||
frag->wr_desc.sr_desc.wr.rdma.rkey = endpoint->eager_rdma_remote.rkey;
|
||||
frag->wr_desc.sr_desc.wr.rdma.remote_addr =
|
||||
endpoint->eager_rdma_remote.base.lval +
|
||||
endpoint->eager_rdma_remote.head *
|
||||
openib_btl->eager_rdma_frag_size +
|
||||
sizeof(mca_btl_openib_frag_t) +
|
||||
sizeof(mca_btl_openib_header_t) +
|
||||
frag->size +
|
||||
sizeof(mca_btl_openib_footer_t);
|
||||
frag->wr_desc.sr_desc.wr.rdma.remote_addr -= frag->sg_entry.length;
|
||||
MCA_BTL_OPENIB_RDMA_NEXT_INDEX (endpoint->eager_rdma_remote.head);
|
||||
} else {
|
||||
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
|
||||
}
|
||||
if(ibv_post_send(ib_qp,
|
||||
&frag->wr_desc.sr_desc,
|
||||
&bad_wr)) {
|
||||
BTL_ERROR(("error posting send request errno says %s", strerror(errno)));
|
||||
BTL_ERROR(("error posting send request errno says %s\n",
|
||||
strerror(errno)));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
@ -225,6 +257,13 @@ static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint)
|
||||
endpoint->sd_tokens_lp = mca_btl_openib_component.rd_num;
|
||||
endpoint->get_tokens = mca_btl_openib_component.ib_qp_ous_rd_atom;
|
||||
|
||||
/* initialize RDMA eager related parts */
|
||||
endpoint->eager_recv_count = 0;
|
||||
memset(&endpoint->eager_rdma_remote, 0,
|
||||
sizeof(mca_btl_openib_eager_rdma_remote_t));
|
||||
memset (&endpoint->eager_rdma_local, 0,
|
||||
sizeof(mca_btl_openib_eager_rdma_local_t));
|
||||
|
||||
endpoint->rem_info.rem_qp_num_hp = 0;
|
||||
endpoint->rem_info.rem_qp_num_lp = 0;
|
||||
endpoint->rem_info.rem_lid = 0;
|
||||
@ -1021,9 +1060,11 @@ void mca_btl_openib_endpoint_send_credits_lp(
|
||||
frag->hdr->tag = MCA_BTL_TAG_BTL;
|
||||
frag->hdr->credits = endpoint->rd_credits_lp;
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_lp, -frag->hdr->credits);
|
||||
((mca_btl_openib_control_header_t *)frag->segment.seg_addr.pval)->type = MCA_BTL_OPENIB_CONTROL_NOOP;
|
||||
|
||||
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
|
||||
frag->sg_entry.length = sizeof(mca_btl_openib_header_t);
|
||||
frag->sg_entry.length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_control_header_t);
|
||||
frag->sg_entry.addr = (unsigned long) frag->hdr;
|
||||
|
||||
if(frag->sg_entry.length <= openib_btl->ib_inline_max) {
|
||||
@ -1062,7 +1103,8 @@ static void mca_btl_openib_endpoint_credits_hp(
|
||||
/* check to see if there are addditional credits to return */
|
||||
if ((credits = OPAL_THREAD_ADD32(&endpoint->sd_credits_hp,-1)) > 0) {
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp,-credits);
|
||||
if (endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win &&
|
||||
if ((endpoint->rd_credits_hp >= mca_btl_openib_component.rd_win ||
|
||||
endpoint->eager_rdma_local.credits >= mca_btl_openib_component.rd_win) &&
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp,1) == 1) {
|
||||
mca_btl_openib_endpoint_send_credits_hp(endpoint);
|
||||
}
|
||||
@ -1093,11 +1135,17 @@ void mca_btl_openib_endpoint_send_credits_hp(
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
frag->hdr->tag = MCA_BTL_TAG_BTL;
|
||||
frag->hdr->credits = endpoint->rd_credits_hp;
|
||||
frag->hdr->credits =
|
||||
(endpoint->rd_credits_hp > 0) ? endpoint->rd_credits_hp : 0;
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, -frag->hdr->credits);
|
||||
frag->hdr->rdma_credits = endpoint->eager_rdma_local.credits;
|
||||
OPAL_THREAD_ADD32(&endpoint->eager_rdma_local.credits,
|
||||
-frag->hdr->rdma_credits);
|
||||
((mca_btl_openib_control_header_t *)frag->segment.seg_addr.pval)->type = MCA_BTL_OPENIB_CONTROL_NOOP;
|
||||
|
||||
frag->wr_desc.sr_desc.opcode = IBV_WR_SEND;
|
||||
frag->sg_entry.length = sizeof(mca_btl_openib_header_t);
|
||||
frag->sg_entry.length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_control_header_t);
|
||||
frag->sg_entry.addr = (unsigned long) frag->hdr;
|
||||
|
||||
if(frag->sg_entry.length <= openib_btl->ib_inline_max) {
|
||||
@ -1112,8 +1160,104 @@ void mca_btl_openib_endpoint_send_credits_hp(
|
||||
OPAL_THREAD_ADD32(&endpoint->sd_credits_hp, -1);
|
||||
OPAL_THREAD_ADD32(&endpoint->rd_credits_hp, frag->hdr->credits);
|
||||
MCA_BTL_IB_FRAG_RETURN_EAGER(openib_btl, frag);
|
||||
BTL_ERROR(("error posting send request errno %d says %s", strerror(errno)));
|
||||
BTL_ERROR(("error posting send request errno %d says %s", errno,
|
||||
strerror(errno)));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void mca_btl_openib_endpoint_eager_rdma(
|
||||
mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
int status)
|
||||
{
|
||||
MCA_BTL_IB_FRAG_RETURN_EAGER((mca_btl_openib_module_t*)btl,
|
||||
(mca_btl_openib_frag_t*)descriptor);
|
||||
}
|
||||
|
||||
static int mca_btl_openib_endpoint_send_eager_rdma(
|
||||
mca_btl_base_endpoint_t* endpoint)
|
||||
{
|
||||
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
|
||||
mca_btl_openib_eager_rdma_header_t *rdma_hdr;
|
||||
mca_btl_openib_frag_t* frag;
|
||||
struct ibv_send_wr* bad_wr;
|
||||
int rc;
|
||||
|
||||
MCA_BTL_IB_FRAG_ALLOC_EAGER(openib_btl, frag, rc);
|
||||
if(NULL == frag) {
|
||||
BTL_ERROR(("error allocating fragment"));
|
||||
return -1;
|
||||
}
|
||||
|
||||
frag->base.des_cbfunc = mca_btl_openib_endpoint_eager_rdma;
|
||||
frag->base.des_cbdata = NULL;
|
||||
frag->endpoint = endpoint;
|
||||
frag->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY;
|
||||
|
||||
frag->hdr->tag = MCA_BTL_TAG_BTL;
|
||||
rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)frag->segment.seg_addr.pval;
|
||||
rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA;
|
||||
rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey;
|
||||
rdma_hdr->rdma_start.pval = endpoint->eager_rdma_local.base.pval;
|
||||
frag->segment.seg_len = sizeof(mca_btl_openib_eager_rdma_header_t);
|
||||
if (mca_btl_openib_endpoint_post_send(openib_btl, endpoint, frag) !=
|
||||
OMPI_SUCCESS) {
|
||||
MCA_BTL_IB_FRAG_RETURN_EAGER(openib_btl, frag);
|
||||
BTL_ERROR(("Error sending RDMA buffer", strerror(errno)));
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/* create RDMA buffer for eager messages */
|
||||
void mca_btl_openib_endpoint_connect_eager_rdma(
|
||||
mca_btl_openib_endpoint_t* endpoint)
|
||||
{
|
||||
mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl;
|
||||
mca_btl_openib_eager_rdma_local_t *eager_rdma;
|
||||
char *buf;
|
||||
int i;
|
||||
|
||||
if (endpoint->eager_rdma_local.base.pval)
|
||||
return;
|
||||
|
||||
OPAL_THREAD_LOCK(&openib_btl->ib_lock);
|
||||
buf = openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool,
|
||||
openib_btl->eager_rdma_frag_size *
|
||||
mca_btl_openib_component.eager_rdma_num, 0, 0,
|
||||
(mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg);
|
||||
|
||||
if(!buf)
|
||||
goto unlock;
|
||||
|
||||
for(i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) {
|
||||
ompi_free_list_item_t *item = (ompi_free_list_item_t *)(buf +
|
||||
i*openib_btl->eager_rdma_frag_size);
|
||||
item->user_data = endpoint->eager_rdma_local.reg;
|
||||
OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_eager_t);
|
||||
((mca_btl_openib_frag_t*)item)->endpoint = endpoint;
|
||||
}
|
||||
|
||||
if(orte_pointer_array_add (&endpoint->eager_rdma_index,
|
||||
openib_btl->eager_rdma_buffers, endpoint) < 0)
|
||||
goto cleanup;
|
||||
|
||||
endpoint->eager_rdma_local.base.pval = buf;
|
||||
openib_btl->eager_rdma_buffers_count++;
|
||||
if (mca_btl_openib_endpoint_send_eager_rdma(endpoint) == 0) {
|
||||
OBJ_CONSTRUCT(&endpoint->eager_rdma_local.lock, opal_mutex_t);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
openib_btl->eager_rdma_buffers_count--;
|
||||
endpoint->eager_rdma_local.base.pval = NULL;
|
||||
orte_pointer_array_set_item(openib_btl->eager_rdma_buffers,
|
||||
endpoint->eager_rdma_index, NULL);
|
||||
|
||||
cleanup:
|
||||
openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool,
|
||||
buf, (mca_mpool_base_registration_t*)eager_rdma->reg);
|
||||
unlock:
|
||||
OPAL_THREAD_UNLOCK(&openib_btl->ib_lock);
|
||||
}
|
||||
|
@ -25,9 +25,11 @@
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_eager_rdma.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/mca/mpool/openib/mpool_openib.h"
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
@ -161,6 +163,13 @@ struct mca_btl_base_endpoint_t {
|
||||
int32_t sd_wqe_lp; /**< number of available send wqe entries */
|
||||
|
||||
uint16_t subnet; /**< subnet of this endpoint*/
|
||||
|
||||
uint32_t eager_recv_count; /**< number of eager received */
|
||||
mca_btl_openib_eager_rdma_remote_t eager_rdma_remote;
|
||||
/**< info about remote RDMA buffer */
|
||||
mca_btl_openib_eager_rdma_local_t eager_rdma_local;
|
||||
/**< info about local RDMA buffer */
|
||||
size_t eager_rdma_index; /**< index into RDMA buffers pointer array */
|
||||
};
|
||||
|
||||
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||
@ -171,7 +180,7 @@ int mca_btl_openib_endpoint_connect(mca_btl_base_endpoint_t*);
|
||||
void mca_btl_openib_post_recv(void);
|
||||
void mca_btl_openib_endpoint_send_credits_hp(mca_btl_base_endpoint_t*);
|
||||
void mca_btl_openib_endpoint_send_credits_lp(mca_btl_base_endpoint_t*);
|
||||
|
||||
void mca_btl_openib_endpoint_connect_eager_rdma(mca_btl_openib_endpoint_t*);
|
||||
|
||||
#define MCA_BTL_OPENIB_ENDPOINT_POST_RR_HIGH(endpoint, \
|
||||
additional) \
|
||||
|
@ -96,6 +96,9 @@ static void mca_btl_openib_recv_frag_eager_constructor(mca_btl_openib_frag_t* fr
|
||||
{
|
||||
frag->size = mca_btl_openib_component.eager_limit;
|
||||
mca_btl_openib_recv_frag_common_constructor(frag);
|
||||
frag->ftr = (mca_btl_openib_footer_t*)((char*)frag->segment.seg_addr.pval
|
||||
+ frag->size);
|
||||
MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr);
|
||||
}
|
||||
|
||||
static void mca_btl_openib_send_frag_frag_constructor(mca_btl_openib_frag_t* frag)
|
||||
|
@ -35,9 +35,37 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_btl_openib_frag_t);
|
||||
struct mca_btl_openib_header_t {
|
||||
mca_btl_base_tag_t tag;
|
||||
int16_t credits;
|
||||
int16_t rdma_credits;
|
||||
};
|
||||
typedef struct mca_btl_openib_header_t mca_btl_openib_header_t;
|
||||
|
||||
struct mca_btl_openib_footer_t {
|
||||
#ifdef OMPI_ENABLE_DEBUG
|
||||
uint32_t seq;
|
||||
#endif
|
||||
union {
|
||||
uint32_t size;
|
||||
uint8_t buf[4];
|
||||
} u;
|
||||
};
|
||||
typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t;
|
||||
|
||||
typedef enum {
|
||||
MCA_BTL_OPENIB_CONTROL_NOOP,
|
||||
MCA_BTL_OPENIB_CONTROL_RDMA
|
||||
} mca_btl_openib_control_t;
|
||||
|
||||
struct mca_btl_openib_control_header_t {
|
||||
mca_btl_openib_control_t type;
|
||||
};
|
||||
typedef struct mca_btl_openib_control_header_t mca_btl_openib_control_header_t;
|
||||
|
||||
struct mca_btl_openib_eager_rdma_header_t {
|
||||
mca_btl_openib_control_header_t control;
|
||||
ompi_ptr_t rdma_start;
|
||||
uint64_t rkey;
|
||||
};
|
||||
typedef struct mca_btl_openib_eager_rdma_header_t mca_btl_openib_eager_rdma_header_t;
|
||||
|
||||
/**
|
||||
* IB send fragment derived type.
|
||||
@ -55,6 +83,7 @@ struct mca_btl_openib_frag_t {
|
||||
struct ibv_sge sg_entry;
|
||||
struct ibv_mr *mr;
|
||||
mca_btl_openib_header_t *hdr;
|
||||
mca_btl_openib_footer_t *ftr;
|
||||
mca_mpool_openib_registration_t * openib_reg;
|
||||
};
|
||||
typedef struct mca_btl_openib_frag_t mca_btl_openib_frag_t;
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user