1
1

Message coalescing for openib BTL. If fragment is waiting to be transmitted in

a pending queue pack another message into it if there is enough space there.

This commit was SVN r16900.
Этот коммит содержится в:
Gleb Natapov 2007-12-09 14:05:13 +00:00
родитель 7302cd24eb
Коммит 5313a2baa7
8 изменённых файлов: 292 добавлений и 44 удалений

Просмотреть файл

@ -505,23 +505,124 @@ ib_frag_alloc(mca_btl_openib_module_t *btl, size_t size, uint8_t order)
return &to_base_frag(item)->base;
}
/* check if pending fragment has enough space for coalescing */
static mca_btl_openib_send_frag_t *check_coalescing(opal_list_t *frag_list,
opal_mutex_t *lock, mca_btl_base_endpoint_t *ep, size_t size)
{
mca_btl_openib_send_frag_t *frag = NULL;
if(opal_list_is_empty(frag_list))
return NULL;
OPAL_THREAD_LOCK(lock);
if(!opal_list_is_empty(frag_list)) {
int qp;
size_t total_length;
opal_list_item_t *i = opal_list_get_first(frag_list);
frag = to_send_frag(i);
if(to_com_frag(frag)->endpoint != ep ||
MCA_BTL_OPENIB_FRAG_CONTROL == openib_frag_type(frag)) {
OPAL_THREAD_UNLOCK(lock);
return NULL;
}
total_length = size + frag->coalesced_length +
to_base_frag(frag)->segment.seg_len +
sizeof(mca_btl_openib_header_coalesced_t);
qp = to_base_frag(frag)->base.order;
if(total_length <= mca_btl_openib_component.qp_infos[qp].size)
opal_list_remove_first(frag_list);
else
frag = NULL;
}
OPAL_THREAD_UNLOCK(lock);
return frag;
}
/**
* Allocate a segment.
*
* @param btl (IN) BTL module
* @param size (IN) Request segment size.
* @param size (IN) Size of segment to allocate
* @param size (IN) Size of segment to allocate
*
* When allocating a segment we pull a pre-alllocated segment
* from one of two free lists, an eager list and a max list
*/
mca_btl_base_descriptor_t* mca_btl_openib_alloc(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_endpoint_t* ep,
uint8_t order,
size_t size)
{
return ib_frag_alloc((mca_btl_openib_module_t*)btl, size, order);
mca_btl_openib_module_t *obtl = (mca_btl_openib_module_t*)btl;
int qp = frag_size_to_order(obtl, size);
mca_btl_openib_send_frag_t *sfrag = NULL;
mca_btl_openib_coalesced_frag_t *cfrag;
assert(qp != MCA_BTL_NO_ORDER);
if(mca_btl_openib_component.use_message_coalescing) {
sfrag = check_coalescing(&ep->qps[qp].qp->pending_frags[0],
&ep->qps[qp].qp->lock, ep, size);
if(NULL == sfrag) {
if(BTL_OPENIB_QP_TYPE_PP(qp)) {
sfrag = check_coalescing(&ep->qps[qp].pending_frags[0],
&ep->endpoint_lock, ep, size);
} else {
sfrag = check_coalescing(
&obtl->qps[qp].u.srq_qp.pending_frags[0],
&obtl->ib_lock, ep, size);
}
}
}
if(NULL == sfrag)
return ib_frag_alloc((mca_btl_openib_module_t*)btl, size, order);
/* begin coalescing message */
MCA_BTL_IB_FRAG_ALLOC_COALESCED(obtl, cfrag);
cfrag->send_frag = sfrag;
/* fix up new coalescing header if this is the first coalesced frag */
if(sfrag->hdr != sfrag->chdr) {
mca_btl_openib_control_header_t *ctrl_hdr;
mca_btl_openib_header_coalesced_t *clsc_hdr;
uint8_t org_tag;
org_tag = sfrag->hdr->tag;
sfrag->hdr = sfrag->chdr;
ctrl_hdr = (mca_btl_openib_control_header_t*)(sfrag->hdr + 1);
clsc_hdr = (mca_btl_openib_header_coalesced_t*)(ctrl_hdr + 1);
sfrag->hdr->tag = MCA_BTL_TAG_BTL;
ctrl_hdr->type = MCA_BTL_OPENIB_CONTROL_COALESCED;
clsc_hdr->tag = org_tag;
clsc_hdr->size = to_base_frag(sfrag)->segment.seg_len;
clsc_hdr->alloc_size = to_base_frag(sfrag)->segment.seg_len;
sfrag->coalesced_length = sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_header_coalesced_t);
to_com_frag(sfrag)->sg_entry.addr = (uint64_t)sfrag->hdr;
}
cfrag->hdr = (mca_btl_openib_header_coalesced_t*)
(((unsigned char*)(sfrag->hdr + 1)) + sfrag->coalesced_length +
to_base_frag(sfrag)->segment.seg_len);
cfrag->hdr->alloc_size = size;
/* point coalesced frag pointer into a data buffer */
to_base_frag(cfrag)->segment.seg_addr.pval = cfrag->hdr + 1;
to_base_frag(cfrag)->segment.seg_len = size;
/* save coalesced fragment on a main fragment; we will need it after send
* completion to free it and to call upper layer callback */
opal_list_append(&sfrag->coalesced_frags, (opal_list_item_t*)cfrag);
sfrag->coalesced_length += (size+sizeof(mca_btl_openib_header_coalesced_t));
return &to_base_frag(cfrag)->base;
}
/**
@ -548,16 +649,27 @@ int mca_btl_openib_free(
/* reset those field on free so we will not have to do it on alloc */
to_base_frag(des)->base.des_flags = 0;
if(MCA_BTL_OPENIB_FRAG_RECV == openib_frag_type(des) ||
MCA_BTL_OPENIB_FRAG_RECV_USER == openib_frag_type(des)) {
to_base_frag(des)->base.des_src = NULL;
to_base_frag(des)->base.des_src_cnt = 0;
} else if(MCA_BTL_OPENIB_FRAG_SEND == openib_frag_type(des) ||
MCA_BTL_OPENIB_FRAG_SEND_USER == openib_frag_type(des)) {
to_base_frag(des)->base.des_dst = NULL;
to_base_frag(des)->base.des_dst_cnt = 0;
if(MCA_BTL_OPENIB_FRAG_SEND == openib_frag_type(des))
switch(openib_frag_type(des)) {
case MCA_BTL_OPENIB_FRAG_RECV:
case MCA_BTL_OPENIB_FRAG_RECV_USER:
to_base_frag(des)->base.des_src = NULL;
to_base_frag(des)->base.des_src_cnt = 0;
break;
case MCA_BTL_OPENIB_FRAG_SEND:
to_send_frag(des)->hdr = (mca_btl_openib_header_t*)
(((unsigned char*)to_send_frag(des)->chdr) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t));
to_com_frag(des)->sg_entry.addr = (uint64_t)to_send_frag(des)->hdr;
to_send_frag(des)->coalesced_length = 0;
assert(!opal_list_get_size(&to_send_frag(des)->coalesced_frags));
/* fall throug */
case MCA_BTL_OPENIB_FRAG_SEND_USER:
to_base_frag(des)->base.des_dst = NULL;
to_base_frag(des)->base.des_dst_cnt = 0;
break;
default:
break;
}
MCA_BTL_IB_FRAG_RETURN(des);
@ -664,9 +776,10 @@ mca_btl_base_descriptor_t* mca_btl_openib_prepare_src(
if(max_data + reserve > btl->btl_max_send_size) {
max_data = btl->btl_max_send_size - reserve;
}
frag = (mca_btl_openib_com_frag_t*)
ib_frag_alloc(openib_btl, max_data + reserve, order);
frag = (mca_btl_openib_com_frag_t*)(reserve ?
ib_frag_alloc(openib_btl, max_data + reserve, order) :
mca_btl_openib_alloc(btl, endpoint, order, max_data));
if(NULL == frag)
return NULL;
@ -941,19 +1054,27 @@ int mca_btl_openib_finalize(struct mca_btl_base_module_t* btl)
int mca_btl_openib_send(
struct mca_btl_base_module_t* btl,
struct mca_btl_base_endpoint_t* endpoint,
struct mca_btl_base_descriptor_t* descriptor,
struct mca_btl_base_endpoint_t* ep,
struct mca_btl_base_descriptor_t* des,
mca_btl_base_tag_t tag)
{
mca_btl_openib_send_frag_t* frag = to_send_frag(descriptor);
mca_btl_openib_send_frag_t *frag;
assert(openib_frag_type(frag) == MCA_BTL_OPENIB_FRAG_SEND);
to_com_frag(frag)->endpoint = endpoint;
frag->hdr->tag = tag;
assert(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND ||
openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_COALESCED);
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_COALESCED) {
to_coalesced_frag(des)->hdr->tag = tag;
to_coalesced_frag(des)->hdr->size = des->des_src->seg_len;
frag = to_coalesced_frag(des)->send_frag;
} else {
frag = to_send_frag(des);
to_com_frag(des)->endpoint = ep;
frag->hdr->tag = tag;
}
return mca_btl_openib_endpoint_send(endpoint, frag);
return mca_btl_openib_endpoint_send(ep, frag);
}
/*

Просмотреть файл

@ -195,6 +195,7 @@ struct mca_btl_openib_component_t {
if_[in|ex]clude list that we use for error checking (to ensure
that they all exist) */
char **if_list;
bool use_message_coalescing;
#ifdef HAVE_IBV_FORK_INIT
/** Whether we want fork support or not */
int want_fork_support;
@ -316,6 +317,8 @@ struct mca_btl_openib_module_t {
ompi_free_list_t send_free_control; /**< frags for control massages */
ompi_free_list_t send_free_coalesced; /**< frags for coalesced massages */
opal_mutex_t ib_lock; /**< module level lock */
size_t ib_inline_max; /**< max size of inline send*/

Просмотреть файл

@ -215,10 +215,14 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
void* cbdata)
{
/* don't return credits used for control messages */
mca_btl_openib_module_t *obtl = (mca_btl_openib_module_t*)btl;
mca_btl_openib_endpoint_t* ep = to_com_frag(des)->endpoint;
mca_btl_openib_control_header_t *ctl_hdr =
to_base_frag(des)->segment.seg_addr.pval;
mca_btl_openib_eager_rdma_header_t *rdma_hdr;
mca_btl_openib_header_coalesced_t *clsc_hdr =
(mca_btl_openib_header_coalesced_t*)(ctl_hdr + 1);
size_t len = des->des_dst->seg_len - sizeof(*ctl_hdr);
switch (ctl_hdr->type) {
case MCA_BTL_OPENIB_CONTROL_CREDITS:
@ -251,6 +255,27 @@ static void btl_openib_control(mca_btl_base_module_t* btl,
ep->eager_rdma_remote.base.lval = rdma_hdr->rdma_start.lval;
ep->eager_rdma_remote.tokens=mca_btl_openib_component.eager_rdma_num - 1;
break;
case MCA_BTL_OPENIB_CONTROL_COALESCED:
while(len > 0) {
size_t skip = (sizeof(*clsc_hdr) + clsc_hdr->alloc_size);
mca_btl_base_descriptor_t tmp_des;
mca_btl_base_segment_t tmp_seg;
assert(len >= sizeof(*clsc_hdr));
tmp_des.des_dst = &tmp_seg;
tmp_des.des_dst_cnt = 1;
tmp_seg.seg_addr.pval = clsc_hdr + 1;
tmp_seg.seg_len = clsc_hdr->size;
/* call registered callback */
obtl->ib_reg[clsc_hdr->tag].cbfunc(&obtl->super, clsc_hdr->tag,
&tmp_des, obtl->ib_reg[clsc_hdr->tag].cbdata);
len -= skip;
clsc_hdr = (mca_btl_openib_header_coalesced_t*)
(((unsigned char*)clsc_hdr) + skip);
}
break;
default:
BTL_ERROR(("Unknown message type received by BTL"));
break;
@ -739,6 +764,7 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
OBJ_CONSTRUCT(&openib_btl->ib_lock, opal_mutex_t);
OBJ_CONSTRUCT(&openib_btl->send_free_control, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->send_free_coalesced, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->send_user_free, ompi_free_list_t);
OBJ_CONSTRUCT(&openib_btl->recv_user_free, ompi_free_list_t);
@ -771,7 +797,7 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
init_data->order = mca_btl_openib_component.rdma_qp;
init_data->list = &openib_btl->recv_user_free;
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(&openib_btl->recv_user_free,
if(OMPI_SUCCESS != ompi_free_list_init_ex_new(&openib_btl->recv_user_free,
sizeof(mca_btl_openib_get_frag_t), 2,
OBJ_CLASS(mca_btl_openib_get_frag_t),
0, 0,
@ -802,7 +828,21 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
init_data)) {
return OMPI_ERROR;
}
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_coalesced_frag_t);
init_data->list = &openib_btl->send_free_coalesced;
if(OMPI_SUCCESS != ompi_free_list_init_ex(&openib_btl->send_free_coalesced,
length, 2, OBJ_CLASS(mca_btl_openib_coalesced_frag_t),
mca_btl_openib_component.ib_free_list_num,
mca_btl_openib_component.ib_free_list_max,
mca_btl_openib_component.ib_free_list_inc,
NULL, mca_btl_openib_frag_init, init_data)) {
return OMPI_ERROR;
}
/* setup all the qps */
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++) {
OBJ_CONSTRUCT(&openib_btl->qps[qp].send_free, ompi_free_list_t);
@ -819,9 +859,11 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
/* Initialize pool of send fragments */
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
init_data->order = qp;
init_data->list = &openib_btl->qps[qp].send_free;
@ -840,6 +882,8 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
init_data = malloc(sizeof(mca_btl_openib_frag_init_data_t));
length = sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
mca_btl_openib_component.qp_infos[qp].size;
@ -867,6 +911,8 @@ static int finish_btl_init(mca_btl_openib_module_t *openib_btl)
openib_btl->eager_rdma_frag_size = OPAL_ALIGN(
sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t) +
sizeof(mca_btl_openib_footer_t) +
openib_btl->super.btl_eager_limit,
mca_btl_openib_component.buffer_alignment, size_t);
@ -1598,8 +1644,16 @@ static int btl_openib_module_progress(mca_btl_openib_hca_t* hca)
case IBV_WC_RDMA_WRITE:
case IBV_WC_SEND:
if(openib_frag_type(des) == MCA_BTL_OPENIB_FRAG_SEND) {
opal_list_item_t *i;
while((i = opal_list_remove_first(
&to_send_frag(des)->coalesced_frags))) {
to_base_frag(i)->base.des_cbfunc(&openib_btl->super,
endpoint, &to_base_frag(i)->base, OMPI_SUCCESS);
}
}
/* Process a completed send/put/get */
des->des_cbfunc(&openib_btl->super, endpoint, des, OMPI_SUCCESS);
des->des_cbfunc(&openib_btl->super, endpoint, des,OMPI_SUCCESS);
/* return send wqe */
qp_put_wqe(endpoint, qp);

Просмотреть файл

@ -39,10 +39,8 @@
#include "ompi/mca/pml/base/pml_base_sendreq.h"
#include "ompi/class/ompi_free_list.h"
#include "btl_openib.h"
#include "btl_openib_endpoint.h"
#include "btl_openib_proc.h"
#include "btl_openib_frag.h"
#include "btl_openib_xrc.h"
static void mca_btl_openib_endpoint_construct(mca_btl_base_endpoint_t* endpoint);
@ -59,7 +57,7 @@ static int post_send(mca_btl_openib_endpoint_t *ep,
int qp = to_base_frag(frag)->base.order;
sg->length = seg->seg_len + sizeof(mca_btl_openib_header_t) +
(rdma ? sizeof(mca_btl_openib_footer_t) : 0);
(rdma ? sizeof(mca_btl_openib_footer_t) : 0) + frag->coalesced_length;
if(sg->length <= openib_btl->ib_inline_max) {
sr_desc->send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
@ -73,8 +71,8 @@ static int post_send(mca_btl_openib_endpoint_t *ep,
if(rdma) {
int32_t head;
mca_btl_openib_footer_t* ftr =
(mca_btl_openib_footer_t*)(((char*)seg->seg_addr.pval) +
seg->seg_len);
(mca_btl_openib_footer_t*)(((char*)frag->hdr) + sg->length -
sizeof(mca_btl_openib_footer_t));
sr_desc->opcode = IBV_WR_RDMA_WRITE;
MCA_BTL_OPENIB_RDMA_FRAG_SET_SIZE(ftr, sg->length);
MCA_BTL_OPENIB_RDMA_MAKE_LOCAL(ftr);
@ -184,6 +182,7 @@ int mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t *endpoint,
int qp, ib_rc;
int32_t cm_return;
bool do_rdma = false;
size_t eager_limit;
if(OPAL_LIKELY(des->order == MCA_BTL_NO_ORDER))
des->order = frag->qp_idx;
@ -193,7 +192,10 @@ int mca_btl_openib_endpoint_post_send(mca_btl_openib_endpoint_t *endpoint,
if(acruire_wqe(endpoint, frag) != OMPI_SUCCESS)
return OMPI_ERR_OUT_OF_RESOURCE;
if(des->des_src->seg_len <= mca_btl_openib_component.eager_limit &&
eager_limit = mca_btl_openib_component.eager_limit +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t);
if(des->des_src->seg_len + frag->coalesced_length <= eager_limit &&
(des->des_flags & MCA_BTL_DES_FLAGS_PRIORITY)) {
/* High priority frag. Try to send over eager RDMA */
if(acquire_eager_rdma_send_credit(endpoint) == OMPI_SUCCESS)

Просмотреть файл

@ -28,8 +28,8 @@
#include "opal/event/event.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/btl/btl.h"
#include "btl_openib_frag.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
#include "btl_openib_eager_rdma.h"
#include <errno.h>
#include <string.h>

Просмотреть файл

@ -19,6 +19,7 @@
* $HEADER$
*/
#include "btl_openib.h"
#include "btl_openib_frag.h"
#include "btl_openib_eager_rdma.h"
@ -31,7 +32,9 @@ void mca_btl_openib_frag_init(ompi_free_list_item_t* item, void* ctx)
to_recv_frag(frag)->qp_idx = init_data->order;
to_com_frag(frag)->sg_entry.length =
mca_btl_openib_component.qp_infos[init_data->order].size +
sizeof(mca_btl_openib_header_t);
sizeof(mca_btl_openib_header_t) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t);
}
if(MCA_BTL_OPENIB_FRAG_SEND == frag->type)
@ -92,10 +95,15 @@ static void send_constructor(mca_btl_openib_send_frag_t *frag)
base_frag->type = MCA_BTL_OPENIB_FRAG_SEND;
frag->hdr = (mca_btl_openib_header_t*)base_frag->base.super.ptr;
base_frag->segment.seg_addr.pval =
((unsigned char* )frag->hdr) + sizeof(mca_btl_openib_header_t);
frag->chdr = (mca_btl_openib_header_t*)base_frag->base.super.ptr;
frag->hdr = (mca_btl_openib_header_t*)
(((unsigned char*)base_frag->base.super.ptr) +
sizeof(mca_btl_openib_header_coalesced_t) +
sizeof(mca_btl_openib_control_header_t));
base_frag->segment.seg_addr.pval = frag->hdr + 1;
to_com_frag(frag)->sg_entry.addr = (uint64_t)frag->hdr;
frag->coalesced_length = 0;
OBJ_CONSTRUCT(&frag->coalesced_frags, opal_list_t);
}
static void recv_constructor(mca_btl_openib_recv_frag_t *frag)
@ -138,6 +146,18 @@ static void get_constructor(mca_btl_openib_get_frag_t *frag)
frag->sr_desc.next = NULL;
}
static void coalesced_constructor(mca_btl_openib_coalesced_frag_t *frag)
{
mca_btl_openib_frag_t *base_frag = to_base_frag(frag);
base_frag->type = MCA_BTL_OPENIB_FRAG_COALESCED;
base_frag->base.des_src = &base_frag->segment;
base_frag->base.des_src_cnt = 1;
base_frag->base.des_dst = NULL;
base_frag->base.des_dst_cnt = 0;
}
OBJ_CLASS_INSTANCE(
mca_btl_openib_frag_t,
mca_btl_base_descriptor_t,
@ -191,3 +211,9 @@ OBJ_CLASS_INSTANCE(
mca_btl_openib_in_frag_t,
get_constructor,
NULL);
OBJ_CLASS_INSTANCE(
mca_btl_openib_coalesced_frag_t,
mca_btl_openib_frag_t,
coalesced_constructor,
NULL);

Просмотреть файл

@ -53,6 +53,11 @@ do { \
(h).credits = ntohs((h).credits); \
} while (0)
typedef struct mca_btl_openib_header_coalesced_t {
mca_btl_base_tag_t tag;
uint32_t size;
uint32_t alloc_size;
} mca_btl_openib_header_coalesced_t;
struct mca_btl_openib_footer_t {
#if OMPI_ENABLE_DEBUG
@ -101,8 +106,9 @@ typedef struct mca_btl_openib_footer_t mca_btl_openib_footer_t;
#endif
#define MCA_BTL_OPENIB_CONTROL_CREDITS 0
#define MCA_BTL_OPENIB_CONTROL_RDMA 1
#define MCA_BTL_OPENIB_CONTROL_CREDITS 0
#define MCA_BTL_OPENIB_CONTROL_RDMA 1
#define MCA_BTL_OPENIB_CONTROL_COALESCED 2
struct mca_btl_openib_control_header_t {
uint8_t type;
@ -153,7 +159,8 @@ enum mca_btl_openib_frag_type_t {
MCA_BTL_OPENIB_FRAG_SEND,
MCA_BTL_OPENIB_FRAG_SEND_USER,
MCA_BTL_OPENIB_FRAG_EAGER_RDMA,
MCA_BTL_OPENIB_FRAG_CONTROL
MCA_BTL_OPENIB_FRAG_CONTROL,
MCA_BTL_OPENIB_FRAG_COALESCED
};
typedef enum mca_btl_openib_frag_type_t mca_btl_openib_frag_type_t;
@ -199,9 +206,11 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_in_frag_t);
typedef struct mca_btl_openib_send_frag_t {
mca_btl_openib_out_frag_t super;
mca_btl_openib_header_t *hdr;
mca_btl_openib_header_t *hdr, *chdr;
mca_btl_openib_footer_t *ftr;
uint8_t qp_idx;
uint32_t coalesced_length;
opal_list_t coalesced_frags;
} mca_btl_openib_send_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_frag_t);
@ -235,6 +244,16 @@ typedef struct mca_btl_openib_send_frag_t mca_btl_openib_send_control_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_send_control_frag_t);
#define to_send_control_frag(f) ((mca_btl_openib_send_control_frag_t*)(f))
typedef struct mca_btl_openib_coalesced_frag_t {
mca_btl_openib_frag_t super;
mca_btl_openib_send_frag_t *send_frag;
mca_btl_openib_header_coalesced_t *hdr;
} mca_btl_openib_coalesced_frag_t;
OBJ_CLASS_DECLARATION(mca_btl_openib_coalesced_frag_t);
#define to_coalesced_frag(f) ((mca_btl_openib_coalesced_frag_t*)(f))
/*
* Allocate an IB send descriptor
*
@ -247,6 +266,17 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_control_frag_t);
frag = to_send_control_frag(item); \
} while(0)
static inline uint8_t frag_size_to_order(mca_btl_openib_module_t* btl,
size_t size)
{
int qp;
for(qp = 0; qp < mca_btl_openib_component.num_qps; qp++)
if(mca_btl_openib_component.qp_infos[qp].size >= size)
return qp;
return MCA_BTL_NO_ORDER;
}
#define MCA_BTL_IB_FRAG_ALLOC_SEND_USER(btl, frag, rc) \
do { \
ompi_free_list_item_t *item; \
@ -261,6 +291,14 @@ OBJ_CLASS_DECLARATION(mca_btl_openib_send_control_frag_t);
frag = to_com_frag(item); \
} while(0)
#define MCA_BTL_IB_FRAG_ALLOC_COALESCED(btl, frag) \
do { \
int ign_rc; \
ompi_free_list_item_t *item; \
OMPI_FREE_LIST_GET(&(btl)->send_free_coalesced, item, ign_rc) \
frag = to_coalesced_frag(item); \
} while(0)
#define MCA_BTL_IB_FRAG_RETURN(frag) \
do { \
OMPI_FREE_LIST_RETURN(to_base_frag(frag)->list, \

Просмотреть файл

@ -416,6 +416,10 @@ int btl_openib_register_mca_params(void)
mca_btl_openib_component.buffer_alignment = (uint32_t) ival;
}
CHECK(reg_int("use_message_coalescing",
"Use message coalescing", 1, &ival, 0));
mca_btl_openib_component.use_message_coalescing = (0 != ival);
/* Info only */
mca_base_param_reg_int(&mca_btl_openib_component.super.btl_version,