Message coalescing for openib BTL. If fragment is waiting to be transmitted in
a pending queue pack another message into it if there is enough space there. This commit was SVN r16900.
Этот коммит содержится в:
родитель
7302cd24eb
Коммит
5313a2baa7
@ -505,23 +505,124 @@ ib_frag_alloc(mca_btl_openib_module_t *btl, size_t size, uint8_t order)
|
||||
return &to_base_frag(item)->base;
|
||||
}
|
||||
|
||||
/* check if pending fragment has enough space for coalescing */
|
||||
static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list,
|
||||
opal_mutex_t *lock, mca_btl_base_endpoint_t *ep, size_t size)
|
||||
{
|
||||
mca_btl_openib_send_frag_t *frag = NULL;
|
||||
|
||||
if(opal_list_is_empty(frag_list))
|
||||
return NULL;
|
||||
|
||||
OPAL_THREAD_LOCK(lock);
|
||||
if(!opal_list_is_empty(frag_list)) {
|
||||
int qp;
|
||||
size_t total_length;
|
||||
opal_list_item_t *i = opal_list_get_first(frag_list);
|
||||
frag = to_send_frag(i);
|
||||
if(to_com_frag(frag)->endpoint != ep ||
|
||||
MCA_BTL_OPENIB_FRAG_CONTROL == openib_frag_type(frag)) {
|
||||
OPAL_THREAD_UNLOCK(lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
total_length = size + frag->coalesced_length +
|
||||
to_base_frag(frag)->segment.seg_len +
|
||||
sizeof(mca_btl_openib_header_coalesced_t);
|
||||
|
||||
qp = to_base_frag(frag)->base.order;
|
||||
|
||||
if(total_length <= mca_btl_openib_component.qp_infos[qp].size)
|
||||
opal_list_remove_first(frag_list);
|
||||
else
|
||||
frag = NULL;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(lock);
|
||||
|
||||
return frag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a segment.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param size (IN) Request segment size.
|
||||
* @param size (IN) Size of segment to allocate
|
||||
* @param size (IN) Size of segment to allocate
|
||||
*
|
||||
* When allocating a segment we pull a pre-alllocated segment
|
||||
* from one of two free lists, an eager list and a max list
|
||||
*/
|
||||
mca_btl_base_descriptor_t* mca_btl_openib_alloc(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
uint8_t order,
|
||||
size_t size)
|
||||
{
|
||||
return ib_frag_alloc((mca_btl_openib_module_t*)btl, size, order);
|
||||
mca_btl_openib_module_t *obtl = (mca_btl_openib_module_t*)btl;
|
||||
int qp = frag_size_to_order(obtl, size);
|
||||
mca_btl_openib_send_frag_t *sfrag = NULL;
|
||||
mca_btl_openib_coalesced_frag_t *cfrag;
|
||||
|
||||
assert(qp != MCA_BTL_NO_ORDER);
|
||||
|
||||
if(mca_btl_openib_component.use_message_coalescing) {
|
||||
sfrag = check_coalescing(&ep->qps[qp].qp->pending_frags[0],
|
||||
&ep->qps[qp].qp->lock, ep, size);
|
||||
|
||||
if(NULL == sfrag) {
|
||||
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
|
||||
sfrag = check_coalescing(&ep->qps[qp].pending_frags[0],
|
||||
&ep->endpoint_lock, ep, size);
|
||||
} else {
|
||||
sfrag = check_coalescing(
|
||||
&obtl->qps[qp].u.srq_qp.pending_frags[0],
|
||||
&obtl->ib_lock, ep, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(NULL == sfrag)
|
||||
return ib_frag_alloc((mca_btl_openib_module_t*)btl, size, order);
|
||||
|
||||
/* begin coalescing message */
|
||||
MCA_BTL_IB_FRAG_ALLOC_COALESCED(obtl, cfrag);
|
||||
cfrag->send_frag = sfrag;
|
||||
|
||||
/* fix up new coalescing header if this is the first coalesced frag */
|
||||
if(sfrag->hdr != sfrag->chdr) {
|
||||
mca_btl_openib_control_header_t *ctrl_hdr;
|
||||
mca_btl_openib_header_coalesced_t *clsc_hdr;
|
||||
uint8_t org_tag;
|
||||
|
||||
org_tag = sfrag->hdr->tag;
|
||||
sfrag->hdr = sfrag->chdr;
|
||||
ctrl_hdr = (mca_btl_openib_control_header_t*)(sfrag->hdr + 1);
|
||||
clsc_hdr = (mca_btl_openib_header_coalesced_t*)(ctrl_hdr + 1);
|
||||
sfrag->hdr->tag = MCA_BTL_TAG_BTL;
|
||||
ctrl_hdr->type = MCA_BTL_OPENIB_CONTROL_COALESCED;
|
||||
clsc_hdr->tag = org_tag;
|
||||
clsc_hdr->size = to_base_frag(sfrag)->segment.seg_len;
|
||||
clsc_hdr->alloc_size = to_base_frag(sfrag)->segment.seg_len;
|
||||
sfrag->coalesced_length = sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t);
|
||||
to_com_frag(sfrag)->sg_entry.addr = (uint64_t)sfrag->hdr;
|
||||
}
|
||||
|
||||
cfrag->hdr = (mca_btl_openib_header_coalesced_t*)
|
||||
(((unsigned char*)(sfrag->hdr + 1)) + sfrag->coalesced_length +
|
||||
to_base_frag(sfrag)->segment.seg_len);
|
||||
cfrag->hdr->alloc_size = size;
|
||||
|
||||
/* point coalesced frag pointer into a data buffer */
|
||||
to_base_frag(cfrag)->segment.seg_addr.pval = cfrag->hdr + 1;
|
||||
to_base_frag(cfrag)->segment.seg_len = size;
|
||||
|
||||
/* save coalesced fragment on a main fragment; we will need it after send
|
||||
* completion to free it and to call upper layer callback */
|
||||
opal_list_append(&sfrag->coalesced_frags, (opal_list_item_t*)cfrag);
|
||||
sfrag->coalesced_length += (size+sizeof(mca_btl_openib_header_coalesced_t));
|
||||
|
||||
return &to_base_frag(cfrag)->base;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -548,16 +649,27 @@ int mca_btl_openib_free(
|
||||
|
||||
/* reset those field on free so we will not have to do it on alloc */
|
||||
to_base_frag(des)->base.des_flags = 0;
|
||||
if(MCA_BTL_OPENIB_FRAG_RECV == openib_frag_type(des) ||
|
||||
MCA_BTL_OPENIB_FRAG_RECV_USER == openib_frag_type(des)) {
|
||||
to_base_frag(des)->base.des_src = NULL;
|
||||
to_base_frag(des)->base.des_src_cnt = 0;
|
||||
} else if(MCA_BTL_OPENIB_FRAG_SEND == openib_frag_type(des) ||
|
||||
MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) {
|
||||
to_base_frag(des)->base.des_dst = NULL;
|
||||
to_base_frag(des)->base.des_dst_cnt = 0;
|
||||
if(MCA_BTL_OPENIB_FRAG_SEND == openib_frag_type(des))
|
||||
switch(openib_frag_type(des)) {
|
||||
case MCA_BTL_OPENIB_FRAG_RECV:
|
||||
case MCA_BTL_OPENIB_FRAG_RECV_USER:
|
||||
to_base_frag(des)->base.des_src = NULL;
|
||||
to_base_frag(des)->base.des_src_cnt = 0;
|
||||
break;
|
||||
case MCA_BTL_OPENIB_FRAG_SEND:
|
||||
to_send_frag(des)->hdr = (mca_btl_openib_header_t*)
|
||||
(((unsigned char*)to_send_frag(des)->chdr) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t));
|
||||
to_com_frag(des)->sg_entry.addr = (uint64_t)to_send_frag(des)->hdr;
|
||||
to_send_frag(des)->coalesced_length = 0;
|
||||
assert(!opal_list_get_size(&to_send_frag(des)->coalesced_frags));
|
||||
/* fall throug */
|
||||
case MCA_BTL_OPENIB_FRAG_SEND_USER:
|
||||
to_base_frag(des)->base.des_dst = NULL;
|
||||
to_base_frag(des)->base.des_dst_cnt = 0;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
MCA_BTL_IB_FRAG_RETURN(des);
|
||||
|
||||
@ -664,9 +776,10 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
|
||||
if(max_data + reserve > btl->btl_max_send_size) {
|
||||
max_data = btl->btl_max_send_size - reserve;
|
||||
}
|
||||
|
||||
frag = (mca_btl_openib_com_frag_t*)
|
||||
ib_frag_alloc(openib_btl, max_data + reserve, order);
|
||||
|
||||
frag = (mca_btl_openib_com_frag_t*)(reserve ?
|
||||
ib_frag_alloc(openib_btl, max_data + reserve, order) :
|
||||
mca_btl_openib_alloc(btl, endpoint, order, max_data));
|
||||
|
||||
if(NULL == frag)
|
||||
return NULL;
|
||||
@ -941,19 +1054,27 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
|
||||
|
||||
int mca_btl_openib_send(
|
||||
struct mca_btl_base_module_t* btl,
|
||||
struct mca_btl_base_endpoint_t* endpoint,
|
||||
struct mca_btl_base_descriptor_t* descriptor,
|
||||
struct mca_btl_base_endpoint_t* ep,
|
||||
struct mca_btl_base_descriptor_t* des,
|
||||
mca_btl_base_tag_t tag)
|
||||
|
||||
{
|
||||
mca_btl_openib_send_frag_t* frag = to_send_frag(descriptor);
|
||||
mca_btl_openib_send_frag_t *frag;
|
||||
|
||||
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND);
|
||||
|
||||
to_com_frag(frag)->endpoint = endpoint;
|
||||
frag->hdr->tag = tag;
|
||||
assert(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND ||
|
||||
openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_COALESCED);
|
||||
|
||||
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_COALESCED) {
|
||||
to_coalesced_frag(des)->hdr->tag = tag;
|
||||
to_coalesced_frag(des)->hdr->size = des->des_src->seg_len;
|
||||
frag = to_coalesced_frag(des)->send_frag;
|
||||
} else {
|
||||
frag = to_send_frag(des);
|
||||
to_com_frag(des)->endpoint = ep;
|
||||
frag->hdr->tag = tag;
|
||||
}
|
||||
|
||||
return mca_btl_openib_endpoint_send(endpoint, frag);
|
||||
return mca_btl_openib_endpoint_send(ep, frag);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -195,6 +195,7 @@ struct mca_btl_openib_component_t {
|
||||
if_[in|ex]clude list that we use for error checking (to ensure
|
||||
that they all exist) */
|
||||
char **if_list;
|
||||
bool use_message_coalescing;
|
||||
#ifdef HAVE_IBV_FORK_INIT
|
||||
/** Whether we want fork support or not */
|
||||
int want_fork_support;
|
||||
@ -316,6 +317,8 @@ struct mca_btl_openib_module_t {
|
||||
|
||||
ompi_free_list_t send_free_control; /**< frags for control massages */
|
||||
|
||||
ompi_free_list_t send_free_coalesced; /**< frags for coalesced massages */
|
||||
|
||||
opal_mutex_t ib_lock; /**< module level lock */
|
||||
|
||||
size_t ib_inline_max; /**< max size of inline send*/
|
||||
|
@ -215,10 +215,14 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
|
||||
void* cbdata)
|
||||
{
|
||||
/* don't return credits used for control messages */
|
||||
mca_btl_openib_module_t *obtl = (mca_btl_openib_module_t*)btl;
|
||||
mca_btl_openib_endpoint_t* ep = to_com_frag(des)->endpoint;
|
||||
mca_btl_openib_control_header_t *ctl_hdr =
|
||||
to_base_frag(des)->segment.seg_addr.pval;
|
||||
mca_btl_openib_eager_rdma_header_t *rdma_hdr;
|
||||
mca_btl_openib_header_coalesced_t *clsc_hdr =
|
||||
(mca_btl_openib_header_coalesced_t*)(ctl_hdr + 1);
|
||||
size_t len = des->des_dst->seg_len - sizeof(*ctl_hdr);
|
||||
|
||||
switch (ctl_hdr->type) {
|
||||
case MCA_BTL_OPENIB_CONTROL_CREDITS:
|
||||
@ -251,6 +255,27 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
|
||||
ep->eager_rdma_remote.base.lval = rdma_hdr->rdma_start.lval;
|
||||
ep->eager_rdma_remote.tokens=mca_btl_openib_component.eager_rdma_num - 1;
|
||||
break;
|
||||
case MCA_BTL_OPENIB_CONTROL_COALESCED:
|
||||
while(len > 0) {
|
||||
size_t skip = (sizeof(*clsc_hdr) + clsc_hdr->alloc_size);
|
||||
mca_btl_base_descriptor_t tmp_des;
|
||||
mca_btl_base_segment_t tmp_seg;
|
||||
|
||||
assert(len >= sizeof(*clsc_hdr));
|
||||
|
||||
tmp_des.des_dst = &tmp_seg;
|
||||
tmp_des.des_dst_cnt = 1;
|
||||
tmp_seg.seg_addr.pval = clsc_hdr + 1;
|
||||
tmp_seg.seg_len = clsc_hdr->size;
|
||||
|
||||
/* call registered callback */
|
||||
obtl->ib_reg[clsc_hdr->tag].cbfunc(&obtl->super, clsc_hdr->tag,
|
||||
&tmp_des, obtl->ib_reg[clsc_hdr->tag].cbdata);
|
||||
len -= skip;
|
||||
clsc_hdr = (mca_btl_openib_header_coalesced_t*)
|
||||
(((unsigned char*)clsc_hdr) + skip);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
BTL_ERROR(("Unknown message type received by BTL"));
|
||||
break;
|
||||
@ -739,6 +764,7 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
|
||||
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
|
||||
|
||||
OBJ_CONSTRUCT(&openib_btl->send_free_control, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&openib_btl->send_free_coalesced, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&openib_btl->send_user_free, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&openib_btl->recv_user_free, ompi_free_list_t);
|
||||
|
||||
@ -771,7 +797,7 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
|
||||
init_data->order = mca_btl_openib_component.rdma_qp;
|
||||
init_data->list = &openib_btl->recv_user_free;
|
||||
|
||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(&openib_btl->recv_user_free,
|
||||
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(&openib_btl->recv_user_free,
|
||||
sizeof(mca_btl_openib_get_frag_t), 2,
|
||||
OBJ_CLASS(mca_btl_openib_get_frag_t),
|
||||
0, 0,
|
||||
@ -802,7 +828,21 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
|
||||
init_data)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
|
||||
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
length = sizeof(mca_btl_openib_coalesced_frag_t);
|
||||
|
||||
init_data->list = &openib_btl->send_free_coalesced;
|
||||
|
||||
if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->send_free_coalesced,
|
||||
length, 2, OBJ_CLASS(mca_btl_openib_coalesced_frag_t),
|
||||
mca_btl_openib_component.ib_free_list_num,
|
||||
mca_btl_openib_component.ib_free_list_max,
|
||||
mca_btl_openib_component.ib_free_list_inc,
|
||||
NULL, mca_btl_openib_frag_init, init_data)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* setup all the qps */
|
||||
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
|
||||
OBJ_CONSTRUCT(&openib_btl->qps[qp].send_free, ompi_free_list_t);
|
||||
@ -819,9 +859,11 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
|
||||
|
||||
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
/* Initialize pool of send fragments */
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
mca_btl_openib_component.qp_infos[qp].size;
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
mca_btl_openib_component.qp_infos[qp].size;
|
||||
|
||||
init_data->order = qp;
|
||||
init_data->list = &openib_btl->qps[qp].send_free;
|
||||
@ -840,6 +882,8 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
|
||||
|
||||
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
|
||||
length = sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
mca_btl_openib_component.qp_infos[qp].size;
|
||||
|
||||
@ -867,6 +911,8 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
|
||||
|
||||
openib_btl->eager_rdma_frag_size = OPAL_ALIGN(
|
||||
sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t) +
|
||||
sizeof(mca_btl_openib_footer_t) +
|
||||
openib_btl->super.btl_eager_limit,
|
||||
mca_btl_openib_component.buffer_alignment, size_t);
|
||||
@ -1598,8 +1644,16 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
|
||||
|
||||
case IBV_WC_RDMA_WRITE:
|
||||
case IBV_WC_SEND:
|
||||
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) {
|
||||
opal_list_item_t *i;
|
||||
while((i = opal_list_remove_first(
|
||||
&to_send_frag(des)->coalesced_frags))) {
|
||||
to_base_frag(i)->base.des_cbfunc(&openib_btl->super,
|
||||
endpoint, &to_base_frag(i)->base, OMPI_SUCCESS);
|
||||
}
|
||||
}
|
||||
/* Process a completed send/put/get */
|
||||
des->des_cbfunc(&openib_btl->super, endpoint, des, OMPI_SUCCESS);
|
||||
des->des_cbfunc(&openib_btl->super, endpoint, des,OMPI_SUCCESS);
|
||||
|
||||
/* return send wqe */
|
||||
qp_put_wqe(endpoint, qp);
|
||||
|
@ -39,10 +39,8 @@
|
||||
#include "ompi/mca/pml/base/pml_base_sendreq.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_endpoint.h"
|
||||
#include "btl_openib_proc.h"
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib_xrc.h"
|
||||
|
||||
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
|
||||
@ -59,7 +57,7 @@ static int post_send(mca_btl_openib_endpoint_t *ep,
|
||||
int qp = to_base_frag(frag)->base.order;
|
||||
|
||||
sg->length = seg->seg_len + sizeof(mca_btl_openib_header_t) +
|
||||
(rdma ? sizeof(mca_btl_openib_footer_t) : 0);
|
||||
(rdma ? sizeof(mca_btl_openib_footer_t) : 0) + frag->coalesced_length;
|
||||
|
||||
if(sg->length <= openib_btl->ib_inline_max) {
|
||||
sr_desc->send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
|
||||
@ -73,8 +71,8 @@ static int post_send(mca_btl_openib_endpoint_t *ep,
|
||||
if(rdma) {
|
||||
int32_t head;
|
||||
mca_btl_openib_footer_t* ftr =
|
||||
(mca_btl_openib_footer_t*)(((char*)seg->seg_addr.pval) +
|
||||
seg->seg_len);
|
||||
(mca_btl_openib_footer_t*)(((char*)frag->hdr) + sg->length -
|
||||
sizeof(mca_btl_openib_footer_t));
|
||||
sr_desc->opcode = IBV_WR_RDMA_WRITE;
|
||||
MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length);
|
||||
MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
|
||||
@ -184,6 +182,7 @@ int mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t *endpoint,
|
||||
int qp, ib_rc;
|
||||
int32_t cm_return;
|
||||
bool do_rdma = false;
|
||||
size_t eager_limit;
|
||||
|
||||
if(OPAL_LIKELY(des->order == MCA_BTL_NO_ORDER))
|
||||
des->order = frag->qp_idx;
|
||||
@ -193,7 +192,10 @@ int mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t *endpoint,
|
||||
if(acruire_wqe(endpoint, frag) != OMPI_SUCCESS)
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
|
||||
if(des->des_src->seg_len <= mca_btl_openib_component.eager_limit &&
|
||||
eager_limit = mca_btl_openib_component.eager_limit +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t);
|
||||
if(des->des_src->seg_len + frag->coalesced_length <= eager_limit &&
|
||||
(des->des_flags & MCA_BTL_DES_FLAGS_PRIORITY)) {
|
||||
/* High priority frag. Try to send over eager RDMA */
|
||||
if(acquire_eager_rdma_send_credit(endpoint) == OMPI_SUCCESS)
|
||||
|
@ -28,8 +28,8 @@
|
||||
#include "opal/event/event.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib_eager_rdma.h"
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
@ -19,6 +19,7 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "btl_openib.h"
|
||||
#include "btl_openib_frag.h"
|
||||
#include "btl_openib_eager_rdma.h"
|
||||
|
||||
@ -31,7 +32,9 @@ void mca_btl_openib_frag_init(ompi_free_list_item_t* item, void* ctx)
|
||||
to_recv_frag(frag)->qp_idx = init_data->order;
|
||||
to_com_frag(frag)->sg_entry.length =
|
||||
mca_btl_openib_component.qp_infos[init_data->order].size +
|
||||
sizeof(mca_btl_openib_header_t);
|
||||
sizeof(mca_btl_openib_header_t) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t);
|
||||
}
|
||||
|
||||
if(MCA_BTL_OPENIB_FRAG_SEND == frag->type)
|
||||
@ -92,10 +95,15 @@ static void send_constructor(mca_btl_openib_send_frag_t *frag)
|
||||
|
||||
base_frag->type = MCA_BTL_OPENIB_FRAG_SEND;
|
||||
|
||||
frag->hdr = (mca_btl_openib_header_t*)base_frag->base.super.ptr;
|
||||
base_frag->segment.seg_addr.pval =
|
||||
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
|
||||
frag->chdr = (mca_btl_openib_header_t*)base_frag->base.super.ptr;
|
||||
frag->hdr = (mca_btl_openib_header_t*)
|
||||
(((unsigned char*)base_frag->base.super.ptr) +
|
||||
sizeof(mca_btl_openib_header_coalesced_t) +
|
||||
sizeof(mca_btl_openib_control_header_t));
|
||||
base_frag->segment.seg_addr.pval = frag->hdr + 1;
|
||||
to_com_frag(frag)->sg_entry.addr = (uint64_t)frag->hdr;
|
||||
frag->coalesced_length = 0;
|
||||
OBJ_CONSTRUCT(&frag->coalesced_frags, opal_list_t);
|
||||
}
|
||||
|
||||
static void recv_constructor(mca_btl_openib_recv_frag_t *frag)
|
||||
@ -138,6 +146,18 @@ static void get_constructor(mca_btl_openib_get_frag_t *frag)
|
||||
frag->sr_desc.next = NULL;
|
||||
}
|
||||
|
||||
static void coalesced_constructor(mca_btl_openib_coalesced_frag_t *frag)
|
||||
{
|
||||
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
|
||||
|
||||
base_frag->type = MCA_BTL_OPENIB_FRAG_COALESCED;
|
||||
|
||||
base_frag->base.des_src = &base_frag->segment;
|
||||
base_frag->base.des_src_cnt = 1;
|
||||
base_frag->base.des_dst = NULL;
|
||||
base_frag->base.des_dst_cnt = 0;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_openib_frag_t,
|
||||
mca_btl_base_descriptor_t,
|
||||
@ -191,3 +211,9 @@ OBJ_CLASS_INSTANCE(
|
||||
mca_btl_openib_in_frag_t,
|
||||
get_constructor,
|
||||
NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_btl_openib_coalesced_frag_t,
|
||||
mca_btl_openib_frag_t,
|
||||
coalesced_constructor,
|
||||
NULL);
|
||||
|
@ -53,6 +53,11 @@ do { \
|
||||
(h).credits = ntohs((h).credits); \
|
||||
} while (0)
|
||||
|
||||
typedef struct mca_btl_openib_header_coalesced_t {
|
||||
mca_btl_base_tag_t tag;
|
||||
uint32_t size;
|
||||
uint32_t alloc_size;
|
||||
} mca_btl_openib_header_coalesced_t;
|
||||
|
||||
struct mca_btl_openib_footer_t {
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
@ -101,8 +106,9 @@ typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t;
|
||||
#endif
|
||||
|
||||
|
||||
#define MCA_BTL_OPENIB_CONTROL_CREDITS 0
|
||||
#define MCA_BTL_OPENIB_CONTROL_RDMA 1
|
||||
#define MCA_BTL_OPENIB_CONTROL_CREDITS 0
|
||||
#define MCA_BTL_OPENIB_CONTROL_RDMA 1
|
||||
#define MCA_BTL_OPENIB_CONTROL_COALESCED 2
|
||||
|
||||
struct mca_btl_openib_control_header_t {
|
||||
uint8_t type;
|
||||
@ -153,7 +159,8 @@ enum mca_btl_openib_frag_type_t {
|
||||
MCA_BTL_OPENIB_FRAG_SEND,
|
||||
MCA_BTL_OPENIB_FRAG_SEND_USER,
|
||||
MCA_BTL_OPENIB_FRAG_EAGER_RDMA,
|
||||
MCA_BTL_OPENIB_FRAG_CONTROL
|
||||
MCA_BTL_OPENIB_FRAG_CONTROL,
|
||||
MCA_BTL_OPENIB_FRAG_COALESCED
|
||||
};
|
||||
typedef enum mca_btl_openib_frag_type_t mca_btl_openib_frag_type_t;
|
||||
|
||||
@ -199,9 +206,11 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_in_frag_t);
|
||||
|
||||
typedef struct mca_btl_openib_send_frag_t {
|
||||
mca_btl_openib_out_frag_t super;
|
||||
mca_btl_openib_header_t *hdr;
|
||||
mca_btl_openib_header_t *hdr, *chdr;
|
||||
mca_btl_openib_footer_t *ftr;
|
||||
uint8_t qp_idx;
|
||||
uint32_t coalesced_length;
|
||||
opal_list_t coalesced_frags;
|
||||
} mca_btl_openib_send_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_t);
|
||||
|
||||
@ -235,6 +244,16 @@ typedef struct mca_btl_openib_send_frag_t mca_btl_openib_send_control_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_openib_send_control_frag_t);
|
||||
|
||||
#define to_send_control_frag(f) ((mca_btl_openib_send_control_frag_t*)(f))
|
||||
|
||||
typedef struct mca_btl_openib_coalesced_frag_t {
|
||||
mca_btl_openib_frag_t super;
|
||||
mca_btl_openib_send_frag_t *send_frag;
|
||||
mca_btl_openib_header_coalesced_t *hdr;
|
||||
} mca_btl_openib_coalesced_frag_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_openib_coalesced_frag_t);
|
||||
|
||||
#define to_coalesced_frag(f) ((mca_btl_openib_coalesced_frag_t*)(f))
|
||||
|
||||
/*
|
||||
* Allocate an IB send descriptor
|
||||
*
|
||||
@ -247,6 +266,17 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_control_frag_t);
|
||||
frag = to_send_control_frag(item); \
|
||||
} while(0)
|
||||
|
||||
static inline uint8_t frag_size_to_order(mca_btl_openib_module_t* btl,
|
||||
size_t size)
|
||||
{
|
||||
int qp;
|
||||
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++)
|
||||
if(mca_btl_openib_component.qp_infos[qp].size >= size)
|
||||
return qp;
|
||||
|
||||
return MCA_BTL_NO_ORDER;
|
||||
}
|
||||
|
||||
#define MCA_BTL_IB_FRAG_ALLOC_SEND_USER(btl, frag, rc) \
|
||||
do { \
|
||||
ompi_free_list_item_t *item; \
|
||||
@ -261,6 +291,14 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_control_frag_t);
|
||||
frag = to_com_frag(item); \
|
||||
} while(0)
|
||||
|
||||
#define MCA_BTL_IB_FRAG_ALLOC_COALESCED(btl, frag) \
|
||||
do { \
|
||||
int ign_rc; \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&(btl)->send_free_coalesced, item, ign_rc) \
|
||||
frag = to_coalesced_frag(item); \
|
||||
} while(0)
|
||||
|
||||
#define MCA_BTL_IB_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
OMPI_FREE_LIST_RETURN(to_base_frag(frag)->list, \
|
||||
|
@ -416,6 +416,10 @@ int btl_openib_register_mca_params(void)
|
||||
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
|
||||
}
|
||||
|
||||
CHECK(reg_int("use_message_coalescing",
|
||||
"Use message coalescing", 1, &ival, 0));
|
||||
mca_btl_openib_component.use_message_coalescing = (0 != ival);
|
||||
|
||||
/* Info only */
|
||||
|
||||
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user