1
1

Cisco CSCuj13135: Data corruption in MPI_Bsend_ator_c

Do not assume that the "size" passed to alloc_send() will be the same as
the size of the message the resulting fragment will hold when
usnic_send() is called.  This means usnic_send()/usnic_put() can never
trust any pre-computed size values, and are only allowed to look at the
lengths and pointers of the elements in the desc SG list.

This commit was SVN r29183.
Этот коммит содержится в:
Reese Faucette 2013-09-17 07:25:05 +00:00
родитель b9103c0f66
Коммит 25b5c84d0f
5 изменённых файлов: 97 добавлений и 213 удалений

Просмотреть файл

@ -144,14 +144,14 @@ recv_seg_constructor(
seg->rs_desc.des_src = NULL;
seg->rs_desc.des_src_cnt = 0;
/* PML want to see its header
/*
* This pointer is only correct for incoming segments of type
* OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG, but that's the only time
* we ever give segment directly to PML, so its OK
* we ever give segment directly to upper layer, so its OK
*/
bseg->us_payload.pml_header = (mca_btl_base_header_t *)
bseg->us_payload.ompi_header = (mca_btl_base_header_t *)
(bseg->us_btl_header+1);
seg->rs_segment.seg_addr.pval = bseg->us_payload.pml_header;
seg->rs_segment.seg_addr.pval = bseg->us_payload.ompi_header;
}
static void
@ -162,6 +162,8 @@ send_frag_constructor(ompi_btl_usnic_send_frag_t *frag)
/* Fill in source descriptor */
desc = &frag->sf_base.uf_base;
desc->des_src = frag->sf_base.uf_src_seg;
frag->sf_base.uf_src_seg[0].seg_len = 0;
frag->sf_base.uf_src_seg[1].seg_len = 0;
desc->des_src_cnt = 2;
desc->des_dst = frag->sf_base.uf_dst_seg;
desc->des_dst_cnt = 0;
@ -210,9 +212,9 @@ large_send_frag_constructor(ompi_btl_usnic_large_send_frag_t *lfrag)
{
lfrag->lsf_base.sf_base.uf_type = OMPI_BTL_USNIC_FRAG_LARGE_SEND;
/* save data pointer for PML */
/* save data pointer for upper layer */
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_addr.pval =
&lfrag->lsf_pml_header;
&lfrag->lsf_ompi_header;
OBJ_CONSTRUCT(&lfrag->lsf_seg_chain, opal_list_t);
}
@ -279,99 +281,3 @@ OBJ_CLASS_INSTANCE(ompi_btl_usnic_put_dest_frag_t,
ompi_btl_usnic_frag_t,
put_dest_frag_constructor,
NULL);
/*******************************************************************************/
#if MSGDEBUG
static void dump_ack_frag(ompi_btl_usnic_frag_t* frag)
{
char out[256];
memset(out, 0, sizeof(out));
snprintf(out, sizeof(out),
"=== ACK frag %p (MCW %d): alloced %d",
(void*) frag,
ompi_proc_local()->proc_name.vpid,
FRAG_STATE_ISSET(frag, FRAG_ALLOCED));
opal_output(0, out);
}
static void dump_send_frag(ompi_btl_usnic_frag_t* frag)
{
char out[256];
memset(out, 0, sizeof(out));
snprintf(out, sizeof(out),
"=== SEND frag %p (MCW %d): alloced %d send_wr %d acked %d enqueued %d pml_callback %d hotel %d || seq %lu",
(void*) frag,
ompi_proc_local()->proc_name.vpid,
FRAG_STATE_ISSET(frag, FRAG_ALLOCED),
frag->send_wr_posted,
FRAG_STATE_ISSET(frag, FRAG_SEND_ACKED),
FRAG_STATE_ISSET(frag, FRAG_SEND_ENQUEUED),
FRAG_STATE_ISSET(frag, FRAG_PML_CALLED_BACK),
FRAG_STATE_ISSET(frag, FRAG_IN_HOTEL),
FRAG_STATE_ISSET(frag, FRAG_ALLOCED) ?
frag->btl_header->seq : (ompi_btl_usnic_seq_t) ~0
);
opal_output(0, out);
}
static void dump_recv_frag(ompi_btl_usnic_frag_t* frag)
{
char out[256];
memset(out, 0, sizeof(out));
snprintf(out, sizeof(out),
"=== RECV frag %p (MCW %d): alloced %d posted %d",
(void*) frag,
ompi_proc_local()->proc_name.vpid,
FRAG_STATE_ISSET(frag, FRAG_ALLOCED),
FRAG_STATE_ISSET(frag, FRAG_RECV_WR_POSTED));
opal_output(0, out);
}
void ompi_btl_usnic_frag_dump(ompi_btl_usnic_frag_t *frag)
{
switch(frag->type) {
case OMPI_BTL_USNIC_FRAG_ACK:
dump_ack_frag(frag);
break;
case OMPI_BTL_USNIC_FRAG_SEND:
dump_send_frag(frag);
break;
case OMPI_BTL_USNIC_FRAG_RECV:
dump_recv_frag(frag);
break;
default:
opal_output(0, "=== UNKNOWN type frag %p: (!)", (void*) frag);
break;
}
}
#endif
/*******************************************************************************/
#if HISTORY
void ompi_btl_usnic_frag_history(ompi_btl_usnic_frag_t *frag,
char *file, int line,
const char *message)
{
int i = frag->history_next;
ompi_btl_usnic_frag_history_t *h = &(frag->history[i]);
memset(h, 0, sizeof(*h));
strncpy(h->file, file, sizeof(h->file));
h->line = line;
strncpy(h->message, message, sizeof(h->message));
frag->history_next = (frag->history_next + 1) % NUM_FRAG_HISTORY;
if (frag->history_start == frag->history_next) {
frag->history_start = (frag->history_start + 1) % NUM_FRAG_HISTORY;
}
}
#endif

Просмотреть файл

@ -39,7 +39,7 @@ struct ompi_btl_usnic_module_t;
/*
* Some definitions:
* frag - what the PML later hands us to send, may be large or small
* frag - what the upper layer hands us to send, may be large or small
* segment - one packet on the wire
* chunk - when a fragment is too big to fit into one segment, it is
* broken into chunks, each chunk fitting in one segment
@ -47,7 +47,7 @@ struct ompi_btl_usnic_module_t;
/**
* Fragment types
* The PML may give us very large "fragements" to send, larger than
* The upper layer may give us very large "fragements" to send, larger than
* an MTU. We break fragments into segments for sending, a segment being
* defined to fit within an MTU.
*/
@ -58,7 +58,7 @@ typedef enum {
} ompi_btl_usnic_frag_type_t;
#if MSGDEBUG2
static inline char *
static inline const char *
usnic_frag_type(ompi_btl_usnic_frag_type_t t)
{
switch (t) {
@ -78,7 +78,7 @@ typedef enum {
} ompi_btl_usnic_seg_type_t;
#if MSGDEBUG2
static inline char *
static inline const char *
usnic_seg_type(ompi_btl_usnic_seg_type_t t)
{
switch (t) {
@ -111,8 +111,8 @@ typedef struct {
*/
typedef enum {
OMPI_BTL_USNIC_PAYLOAD_TYPE_ACK = 1,
OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG = 2, /* an entire PML fragment */
OMPI_BTL_USNIC_PAYLOAD_TYPE_CHUNK = 3 /* one chunk of PML frag */
OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG = 2, /* an entire fragment */
OMPI_BTL_USNIC_PAYLOAD_TYPE_CHUNK = 3 /* one chunk of fragment */
} ompi_btl_usnic_payload_type_t;
/**
@ -141,7 +141,7 @@ typedef struct {
/* Type of BTL header (see enum, above) */
uint8_t payload_type;
/* tag for PML, etc */
/* tag for upper layer */
mca_btl_base_tag_t tag;
} ompi_btl_usnic_btl_header_t;
@ -156,25 +156,6 @@ typedef struct {
uint32_t ch_frag_offset; /* where in fragment this goes */
} ompi_btl_usnic_btl_chunk_header_t;
/*
* Enums for the states of frags
*/
typedef enum {
/* Frag states: all frags */
FRAG_ALLOCED = 0x01,
/* Frag states: send frags */
FRAG_SEND_ACKED = 0x02,
FRAG_SEND_ENQUEUED = 0x04,
FRAG_PML_CALLED_BACK = 0x08,
FRAG_IN_HOTEL = 0x10,
/* Frag states: receive frags */
FRAG_RECV_WR_POSTED = 0x40,
FRAG_MAX = 0xff
} ompi_btl_usnic_frag_state_flags_t;
/**
* Descriptor for a common segment. This is exactly one packet and may
* be send or receive
@ -197,7 +178,7 @@ typedef struct ompi_btl_usnic_segment_t {
union {
uint8_t *raw;
void *pml_header;
void *ompi_header;
} us_payload;
} ompi_btl_usnic_segment_t;
@ -270,7 +251,7 @@ typedef struct ompi_btl_usnic_send_frag_t {
struct mca_btl_base_endpoint_t *sf_endpoint;
size_t sf_size; /* total_fragment size (PML + user payload) */
size_t sf_size; /* total_fragment size (upper + user payload) */
/* original message data if convertor required */
struct opal_convertor_t* sf_convertor;
@ -283,13 +264,13 @@ typedef struct ompi_btl_usnic_send_frag_t {
/**
* Descriptor for a large fragment
* Large fragment uses two SG entries - one points to PML header,
* Large fragment uses two SG entries - one points to upper layer header,
* other points to data.
*/
typedef struct ompi_btl_usnic_large_send_frag_t {
ompi_btl_usnic_send_frag_t lsf_base;
char lsf_pml_header[64]; /* space for PML header */
char lsf_ompi_header[64]; /* space for upper layer header */
mca_btl_base_tag_t lsf_tag; /* save tag */
uint32_t lsf_frag_id; /* fragment ID for reassembly */
@ -306,10 +287,10 @@ typedef struct ompi_btl_usnic_large_send_frag_t {
* an inline send, but will convert to a single SG entry is inline cannot
* be done and data must be copied.
* First segment will point to registered memory of associated segment to
* hold BTL and PML headers.
* hold BTL and upper layer headers.
* Second segment will point directly to user data. If inlining fails, we
* will copy user data into the registered memory after the PML header and
* convert to a single segment.
* will copy user data into the registered memory after the upper layer header
* and convert to a single segment.
*/
typedef struct ompi_btl_usnic_small_send_frag_t {
ompi_btl_usnic_send_frag_t ssf_base;
@ -417,7 +398,7 @@ ompi_btl_usnic_put_dest_frag_alloc(
* A send frag can be returned to the freelist when all of the
* following are true:
*
* 1. PML is freeing it (via module.free())
* 1. upper layer is freeing it (via module.free())
* 2. Or all of these:
* a) it finishes sending all its segments
* b) all of its segments have been ACKed
@ -449,6 +430,8 @@ ompi_btl_usnic_frag_return(
opal_output(0, "freeing frag %p, type %s\n", (void *)frag,
usnic_frag_type(frag->uf_type));
#endif
frag->uf_src_seg[0].seg_len = 0;
frag->uf_src_seg[1].seg_len = 0;
OMPI_FREE_LIST_RETURN_MT(frag->uf_freelist, &(frag->uf_base.super));
}

Просмотреть файл

@ -204,7 +204,7 @@ static int usnic_register_pml_err_cb(struct mca_btl_base_module_t* btl,
}
/**
* Allocate PML control messages or eager frags if BTL does not have
* Allocate control messages or eager frags if BTL does not have
* INPLACE flag. To be clear: max it will ever alloc is eager_limit.
* THEREFORE: eager_limit is the max that ALLOC must always be able to
* alloc.
@ -222,9 +222,9 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
ompi_btl_usnic_small_send_frag_t *sfrag;
mca_btl_base_descriptor_t *desc;
if (size > module->max_frag_payload)
if (size > module->max_frag_payload) {
size = module->max_frag_payload;
}
sfrag = ompi_btl_usnic_small_send_frag_alloc(module);
if (NULL == sfrag) {
@ -238,10 +238,6 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
(void *)frag, (int)size, flags);
#endif
/* set # of bytes remaining to be ACKed */
frag->sf_ack_bytes_left = size;
frag->sf_size = size;
/* set endpoint */
frag->sf_endpoint = endpoint;
@ -281,7 +277,7 @@ static int usnic_free(struct mca_btl_base_module_t* btl,
/*
* Notes from george:
*
* - BTL ALLOC: allocating PML control messages or eager frags if BTL
* - BTL ALLOC: allocating control messages or eager frags if BTL
does not have INPLACE flag. To be clear: max it will ever alloc
is eager_limit. THEREFORE: eager_limit is the max that ALLOC
must always be able to alloc.
@ -305,12 +301,12 @@ static int usnic_free(struct mca_btl_base_module_t* btl,
* we will leave data in place
*
* small,convertor: copy the data into the segment associated with small frag,
* PML will put header in this seg, single entry in desc SG
* small,no convertor: PML will put header in attached segment SG[0],
* caller will put header in this seg, single entry in desc SG
* small,no convertor: caller will put header in attached segment SG[0],
* save pointer to user data in SG[1], 2 SG entries
* large,convertor: copy data into chain of segments, leaving room for
* PML header at start of 1st segment, 2 SG entries
* large,not convertor: PML will put header in buffer in the large frag itself,
* caller header at start of 1st segment, 2 SG entries
* large,not convertor: caller will put header in buffer in the large frag itself,
* save pointer to user data in SG[1]. 2 SG entries
*
* NOTE that the *only* reason this routine is allowed to return a size smaller
@ -358,7 +354,7 @@ usnic_prepare_src(
*/
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
/* put user data just after end of 1st seg (PML header) */
/* put user data just after end of 1st seg (upper layer header) */
if (payload_len > module->max_frag_payload) {
payload_len = module->max_frag_payload;
}
@ -400,14 +396,14 @@ usnic_prepare_src(
* If a covertor is required, pack the data into a chain of segments.
* We will later send from the segments one at a time. This allows
* us to absorb a large convertor-based send and still give an accurate
* data count back to the PML
* data count back to the upper layer
*/
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
ompi_btl_usnic_chunk_segment_t *seg;
unsigned pml_hdr_len;
unsigned ompi_hdr_len;
unsigned bytes_to_pack;
pml_hdr_len = reserve;
ompi_hdr_len = reserve;
bytes_to_pack = *size;
while (bytes_to_pack > 0) {
seg = ompi_btl_usnic_chunk_segment_alloc(module);
@ -416,14 +412,14 @@ usnic_prepare_src(
abort(); /* XXX */
}
/* put user data just after end of 1st seg (PML header) */
payload_len = pml_hdr_len + bytes_to_pack;
/* put user data just after end of 1st seg (upper header) */
payload_len = ompi_hdr_len + bytes_to_pack;
if (payload_len > module->max_chunk_payload) {
payload_len = module->max_chunk_payload;
}
iov.iov_len = payload_len - pml_hdr_len;
iov.iov_len = payload_len - ompi_hdr_len;
iov.iov_base = (IOVBASE_TYPE*)
(seg->ss_base.us_payload.raw + pml_hdr_len);
(seg->ss_base.us_payload.raw + ompi_hdr_len);
iov_count = 1;
max_data = iov.iov_len;
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
@ -446,9 +442,9 @@ usnic_prepare_src(
opal_list_append(&lfrag->lsf_seg_chain,
&seg->ss_base.us_list.super);
seg->ss_parent_frag = &lfrag->lsf_base;
seg->ss_base.us_sg_entry[0].length = max_data + pml_hdr_len;
seg->ss_base.us_sg_entry[0].length = max_data + ompi_hdr_len;
pml_hdr_len = 0;
ompi_hdr_len = 0;
bytes_to_pack -= max_data;
}
payload_len = *size + reserve;
@ -464,7 +460,7 @@ usnic_prepare_src(
&lfrag->lsf_base.sf_base.uf_src_seg[1].seg_addr.pval);
lfrag->lsf_base.sf_convertor = NULL;
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_addr.pval =
&lfrag->lsf_pml_header;
&lfrag->lsf_ompi_header;
}
@ -472,23 +468,18 @@ usnic_prepare_src(
lfrag->lsf_cur_offset = 0;
lfrag->lsf_bytes_left = payload_len;
/* make sure PML header small enough */
assert(reserve < sizeof(lfrag->lsf_pml_header));
/* make sure upper header small enough */
assert(reserve < sizeof(lfrag->lsf_ompi_header));
frag->sf_base.uf_base.des_src_cnt = 2;
frag->sf_base.uf_src_seg[0].seg_len = reserve;
frag->sf_base.uf_src_seg[1].seg_len = *size;
}
/* fill in segment sizes */
frag->sf_size = payload_len;
/* set up common parts of frag */
frag->sf_base.uf_base.des_flags = flags;
frag->sf_endpoint = endpoint;
/* fragment accounting */
frag->sf_ack_bytes_left = payload_len;
desc = &frag->sf_base.uf_base;
#if MSGDEBUG2
@ -496,10 +487,10 @@ usnic_prepare_src(
module->device->name,
payload_len <= module->max_frag_payload?"small":"large",
(void *)frag, (int)reserve, (int)*size);
{ int i;
{ unsigned i;
for (i=0; i<desc->des_src_cnt; ++i)
opal_output(0, " %d: ptr:%p len:%d\n", i,
desc->des_src[i].seg_addr.pval,
(void *)desc->des_src[i].seg_addr.pval,
desc->des_src[i].seg_len);
}
#endif
@ -565,16 +556,27 @@ usnic_put(
frag = (ompi_btl_usnic_send_frag_t *)des;
/*
* Our descriptors are always either 1 or 2 segments.
* We always clear these lengths when the fragment is freed
* and only fill in what's needed in either prepare_src or usnic_alloc,
* so the total fragment length is always the sum of the 2 lengths.
*/
frag->sf_size = frag->sf_base.uf_src_seg[0].seg_len +
frag->sf_base.uf_src_seg[1].seg_len;
frag->sf_ack_bytes_left = frag->sf_size;
#if MSGDEBUG2
opal_output(0, "usnic_put, frag=%p, source=\n", frag);
{ int i;
opal_output(0, "usnic_put, frag=%p, size=%d, source=\n", (void *)frag,
(int)frag->sf_size);
{ unsigned i;
for (i=0; i<des->des_src_cnt; ++i)
opal_output(0, " %d: ptr:%p len:%d\n", i,
des->des_src[i].seg_addr.pval,
des->des_src[i].seg_len);
}
opal_output(0, "dest:\n");
{ int i;
{ unsigned i;
for (i=0; i<des->des_dst_cnt; ++i)
opal_output(0, " %d: ptr:%p len:%d\n", i,
des->des_dst[i].seg_addr.pval,
@ -582,7 +584,7 @@ usnic_put(
}
#endif
/* copy out address - why does he not use ours? silly PML */
/* copy out address - why does he not use ours? */
frag->sf_base.uf_dst_seg[0].seg_addr.pval = des->des_dst->seg_addr.pval;
/*
@ -619,7 +621,8 @@ usnic_put(
/* set up VERBS SG list */
sseg->ss_send_desc.num_sge = 1;
sseg->ss_base.us_sg_entry[0].length =
sizeof(ompi_btl_usnic_btl_header_t) + frag->sf_size;
sizeof(ompi_btl_usnic_btl_header_t) +
frag->sf_base.uf_base.des_src[1].seg_len;
/* use standard channel */
sseg->ss_channel = USNIC_DATA_CHANNEL;
@ -961,16 +964,16 @@ usnic_handle_large_send(
sseg->ss_parent_frag = frag;
/* If this is the first chunk of the frag, need to insert
* the PML header at the start. On subsequent chunks,
* skip the PML header
* the upper header at the start. On subsequent chunks,
* skip the upper header
*/
if (lfrag->lsf_cur_offset == 0) {
/* copy in the PML header */
memcpy(sseg->ss_base.us_payload.raw, lfrag->lsf_pml_header,
/* copy in the upper header */
memcpy(sseg->ss_base.us_payload.raw, lfrag->lsf_ompi_header,
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_len);
/* adjust data pointer and len to skip PML */
/* adjust data pointer and len to skip upper header */
iov.iov_base = sseg->ss_base.us_payload.raw +
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_len;
@ -1004,7 +1007,7 @@ usnic_handle_large_send(
payload_len += max_data;
/* We are sending converted data, which means we have a list of segments
* containing the data. PML header is already in first segment
* containing the data. upper layer header is already in first segment
*/
} else {
sseg = (ompi_btl_usnic_send_segment_t *)
@ -1040,7 +1043,7 @@ usnic_handle_large_send(
/* done with fragment? */
if (lfrag->lsf_bytes_left == 0) {
/* remove this frag from sending list now because PML may
/* remove this frag from sending list now because upper layer may
* decide to put it on some other list in the callback
*/
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
@ -1055,7 +1058,8 @@ usnic_handle_large_send(
#if MSGDEBUG2
opal_output(0, "callback for large frag %p, len=%zd\n",
(void *)frag->sf_base.uf_base.des_cbfunc, frag->sf_size);
(void *)(uintptr_t)frag->sf_base.uf_base.des_cbfunc,
frag->sf_size);
#endif
frag->sf_base.uf_base.des_cbfunc(&module->super,
frag->sf_endpoint, &frag->sf_base.uf_base,
@ -1123,7 +1127,7 @@ ompi_btl_usnic_module_progress_sends(
*/
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_SMALL_SEND) {
/* remove this frag from sending list now because PML may
/* remove this frag from sending list now because upper layer may
* decide to put it on some other list in the callback
*/
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
@ -1163,7 +1167,8 @@ ompi_btl_usnic_module_progress_sends(
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
#if MSGDEBUG2
opal_output(0, "callback frag small %p, len=%"PRIu64"\n",
(void*)frag, frag->sf_base.uf_src_seg[0].seg_len);
(void*)frag,
(unsigned long)frag->sf_base.uf_src_seg[0].seg_len);
#endif
frag->sf_base.uf_base.des_cbfunc(&module->super,
frag->sf_endpoint, &frag->sf_base.uf_base,
@ -1247,11 +1252,24 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
module = (ompi_btl_usnic_module_t *)base_module;
frag = (ompi_btl_usnic_send_frag_t*) descriptor;
assert(frag->sf_endpoint == endpoint);
frag->sf_base.uf_dst_seg[0].seg_addr.pval = NULL; /* not a PUT */
/*
* Our descriptors are always either 1 or 2 segments.
* We always clear these lengths when the fragment is freed
* and only fill in what's needed in either prepare_src or usnic_alloc,
* so the total fragment length is always the sum of the 2 lengths.
*/
frag->sf_size = frag->sf_base.uf_src_seg[0].seg_len +
frag->sf_base.uf_src_seg[1].seg_len;
frag->sf_ack_bytes_left = frag->sf_size;
#if MSGDEBUG2
opal_output(0, "usnic_send: frag=%p, endpoint=%p, tag=%d, sendreq=%p\n",
opal_output(0, "usnic_send: frag=%p, endpoint=%p, tag=%d, sf_size=%d\n",
(void *)frag, (void *)endpoint,
tag, (void *)descriptor->des_cbdata);
{ int i;
tag, (int)frag->sf_size);
{ unsigned i;
for (i=0; i<descriptor->des_src_cnt; ++i)
opal_output(0, " %d: ptr:%p len:%d\n", i,
descriptor->des_src[i].seg_addr.pval,
@ -1259,16 +1277,6 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
}
#endif
assert(frag->sf_endpoint == endpoint);
frag->sf_base.uf_dst_seg[0].seg_addr.pval = NULL; /* not a PUT */
/* JMS From Dec OMPI meeting....
if PML doesn't set SEND_ALWAYS_CALLBACK, then we can return 1
here to say "the data is gone, PML can complete the request".
And then we don't need to do the PML callback (!). WE DON'T
NEED TO SET ALWAYS_CALLBACK! */
/*
* If this fragment is small enough to inline,
* and we have enough send WQEs,
@ -1305,19 +1313,7 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
sseg->ss_channel = USNIC_PRIORITY_CHANNEL;
sseg->ss_base.us_btl_header->tag = tag;
#if MSGDEBUG2
opal_output(0, "conv = %p\n", frag->sf_convertor);
opal_output(0, " inline frag %d segs %p(%d) + %p(%d)\n",
(int)frag->sf_base.uf_base.des_src_cnt,
frag->sf_base.uf_src_seg[0].seg_addr.pval,
(int)frag->sf_base.uf_src_seg[0].seg_len,
frag->sf_base.uf_src_seg[1].seg_addr.pval,
(int)frag->sf_base.uf_src_seg[1].seg_len);
opal_output(0, " inline seg %d segs %p(%d) + %p(%d)\n",
sseg->ss_send_desc.num_sge,
(void *)sseg->ss_send_desc.sg_list[0].addr,
sseg->ss_send_desc.sg_list[0].length,
(void *)sseg->ss_send_desc.sg_list[1].addr,
sseg->ss_send_desc.sg_list[1].length);
opal_output(0, "INLINE send, conv=%p", (void *)frag->sf_convertor);
#endif
/* post the segment now */

Просмотреть файл

@ -283,7 +283,7 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
#if MSGDEBUG2
opal_output(0, " large FRAG complete, pass up %p, %"PRIu64" bytes, tag=%d\n",
desc.des_dst->seg_addr.pval, desc.des_dst->seg_len,
chunk_hdr->ch_hdr.tag);
(int)chunk_hdr->ch_hdr.tag);
#endif
reg = mca_btl_base_active_message_trigger +
chunk_hdr->ch_hdr.tag;

Просмотреть файл

@ -200,9 +200,8 @@ ompi_btl_usnic_check_rx_seq(
i = WINDOW_SIZE_MOD(i + endpoint->endpoint_rfstart);
if (endpoint->endpoint_rcvd_segs[i]) {
#if MSGDEBUG
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ " from %s to %s, seg %p: duplicate -- DROPPED\n",
(void*) endpoint, bseg->us_btl_header->seq, src_mac, dest_mac,
(void*) seg);
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ ", seg %p: duplicate -- DROPPED\n",
(void*) endpoint, seg->rs_base.us_btl_header->seq, (void*) seg);
#endif
/* highest_seq_rcvd is for debug stats only; it's not used
in any window calculations */