Cisco CSCuj13135: Data corruption in MPI_Bsend_ator_c
Do not assume that the "size" passed to alloc_send() will be the same as the size of the message the resulting fragment will hold when usnic_send() is called. This means usnic_send()/usnic_put() can never trust any pre-computed size values, and are only allowed to look at the lengths and pointers of the elements in the desc SG list. This commit was SVN r29183.
Этот коммит содержится в:
родитель
b9103c0f66
Коммит
25b5c84d0f
@ -144,14 +144,14 @@ recv_seg_constructor(
|
||||
seg->rs_desc.des_src = NULL;
|
||||
seg->rs_desc.des_src_cnt = 0;
|
||||
|
||||
/* PML want to see its header
|
||||
/*
|
||||
* This pointer is only correct for incoming segments of type
|
||||
* OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG, but that's the only time
|
||||
* we ever give segment directly to PML, so its OK
|
||||
* we ever give segment directly to upper layer, so its OK
|
||||
*/
|
||||
bseg->us_payload.pml_header = (mca_btl_base_header_t *)
|
||||
bseg->us_payload.ompi_header = (mca_btl_base_header_t *)
|
||||
(bseg->us_btl_header+1);
|
||||
seg->rs_segment.seg_addr.pval = bseg->us_payload.pml_header;
|
||||
seg->rs_segment.seg_addr.pval = bseg->us_payload.ompi_header;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -162,6 +162,8 @@ send_frag_constructor(ompi_btl_usnic_send_frag_t *frag)
|
||||
/* Fill in source descriptor */
|
||||
desc = &frag->sf_base.uf_base;
|
||||
desc->des_src = frag->sf_base.uf_src_seg;
|
||||
frag->sf_base.uf_src_seg[0].seg_len = 0;
|
||||
frag->sf_base.uf_src_seg[1].seg_len = 0;
|
||||
desc->des_src_cnt = 2;
|
||||
desc->des_dst = frag->sf_base.uf_dst_seg;
|
||||
desc->des_dst_cnt = 0;
|
||||
@ -210,9 +212,9 @@ large_send_frag_constructor(ompi_btl_usnic_large_send_frag_t *lfrag)
|
||||
{
|
||||
lfrag->lsf_base.sf_base.uf_type = OMPI_BTL_USNIC_FRAG_LARGE_SEND;
|
||||
|
||||
/* save data pointer for PML */
|
||||
/* save data pointer for upper layer */
|
||||
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_addr.pval =
|
||||
&lfrag->lsf_pml_header;
|
||||
&lfrag->lsf_ompi_header;
|
||||
|
||||
OBJ_CONSTRUCT(&lfrag->lsf_seg_chain, opal_list_t);
|
||||
}
|
||||
@ -279,99 +281,3 @@ OBJ_CLASS_INSTANCE(ompi_btl_usnic_put_dest_frag_t,
|
||||
ompi_btl_usnic_frag_t,
|
||||
put_dest_frag_constructor,
|
||||
NULL);
|
||||
|
||||
|
||||
/*******************************************************************************/
|
||||
|
||||
#if MSGDEBUG
|
||||
static void dump_ack_frag(ompi_btl_usnic_frag_t* frag)
|
||||
{
|
||||
char out[256];
|
||||
memset(out, 0, sizeof(out));
|
||||
|
||||
snprintf(out, sizeof(out),
|
||||
"=== ACK frag %p (MCW %d): alloced %d",
|
||||
(void*) frag,
|
||||
ompi_proc_local()->proc_name.vpid,
|
||||
FRAG_STATE_ISSET(frag, FRAG_ALLOCED));
|
||||
opal_output(0, out);
|
||||
}
|
||||
|
||||
static void dump_send_frag(ompi_btl_usnic_frag_t* frag)
|
||||
{
|
||||
char out[256];
|
||||
memset(out, 0, sizeof(out));
|
||||
|
||||
snprintf(out, sizeof(out),
|
||||
"=== SEND frag %p (MCW %d): alloced %d send_wr %d acked %d enqueued %d pml_callback %d hotel %d || seq %lu",
|
||||
(void*) frag,
|
||||
ompi_proc_local()->proc_name.vpid,
|
||||
FRAG_STATE_ISSET(frag, FRAG_ALLOCED),
|
||||
frag->send_wr_posted,
|
||||
FRAG_STATE_ISSET(frag, FRAG_SEND_ACKED),
|
||||
FRAG_STATE_ISSET(frag, FRAG_SEND_ENQUEUED),
|
||||
FRAG_STATE_ISSET(frag, FRAG_PML_CALLED_BACK),
|
||||
FRAG_STATE_ISSET(frag, FRAG_IN_HOTEL),
|
||||
FRAG_STATE_ISSET(frag, FRAG_ALLOCED) ?
|
||||
frag->btl_header->seq : (ompi_btl_usnic_seq_t) ~0
|
||||
);
|
||||
opal_output(0, out);
|
||||
}
|
||||
|
||||
static void dump_recv_frag(ompi_btl_usnic_frag_t* frag)
|
||||
{
|
||||
char out[256];
|
||||
memset(out, 0, sizeof(out));
|
||||
|
||||
snprintf(out, sizeof(out),
|
||||
"=== RECV frag %p (MCW %d): alloced %d posted %d",
|
||||
(void*) frag,
|
||||
ompi_proc_local()->proc_name.vpid,
|
||||
FRAG_STATE_ISSET(frag, FRAG_ALLOCED),
|
||||
FRAG_STATE_ISSET(frag, FRAG_RECV_WR_POSTED));
|
||||
opal_output(0, out);
|
||||
}
|
||||
|
||||
void ompi_btl_usnic_frag_dump(ompi_btl_usnic_frag_t *frag)
|
||||
{
|
||||
switch(frag->type) {
|
||||
case OMPI_BTL_USNIC_FRAG_ACK:
|
||||
dump_ack_frag(frag);
|
||||
break;
|
||||
|
||||
case OMPI_BTL_USNIC_FRAG_SEND:
|
||||
dump_send_frag(frag);
|
||||
break;
|
||||
|
||||
case OMPI_BTL_USNIC_FRAG_RECV:
|
||||
dump_recv_frag(frag);
|
||||
break;
|
||||
|
||||
default:
|
||||
opal_output(0, "=== UNKNOWN type frag %p: (!)", (void*) frag);
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*******************************************************************************/
|
||||
|
||||
#if HISTORY
|
||||
void ompi_btl_usnic_frag_history(ompi_btl_usnic_frag_t *frag,
|
||||
char *file, int line,
|
||||
const char *message)
|
||||
{
|
||||
int i = frag->history_next;
|
||||
ompi_btl_usnic_frag_history_t *h = &(frag->history[i]);
|
||||
|
||||
memset(h, 0, sizeof(*h));
|
||||
strncpy(h->file, file, sizeof(h->file));
|
||||
h->line = line;
|
||||
strncpy(h->message, message, sizeof(h->message));
|
||||
|
||||
frag->history_next = (frag->history_next + 1) % NUM_FRAG_HISTORY;
|
||||
if (frag->history_start == frag->history_next) {
|
||||
frag->history_start = (frag->history_start + 1) % NUM_FRAG_HISTORY;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -39,7 +39,7 @@ struct ompi_btl_usnic_module_t;
|
||||
|
||||
/*
|
||||
* Some definitions:
|
||||
* frag - what the PML later hands us to send, may be large or small
|
||||
* frag - what the upper layer hands us to send, may be large or small
|
||||
* segment - one packet on the wire
|
||||
* chunk - when a fragment is too big to fit into one segment, it is
|
||||
* broken into chunks, each chunk fitting in one segment
|
||||
@ -47,7 +47,7 @@ struct ompi_btl_usnic_module_t;
|
||||
|
||||
/**
|
||||
* Fragment types
|
||||
* The PML may give us very large "fragements" to send, larger than
|
||||
* The upper layer may give us very large "fragements" to send, larger than
|
||||
* an MTU. We break fragments into segments for sending, a segment being
|
||||
* defined to fit within an MTU.
|
||||
*/
|
||||
@ -58,7 +58,7 @@ typedef enum {
|
||||
} ompi_btl_usnic_frag_type_t;
|
||||
|
||||
#if MSGDEBUG2
|
||||
static inline char *
|
||||
static inline const char *
|
||||
usnic_frag_type(ompi_btl_usnic_frag_type_t t)
|
||||
{
|
||||
switch (t) {
|
||||
@ -78,7 +78,7 @@ typedef enum {
|
||||
} ompi_btl_usnic_seg_type_t;
|
||||
|
||||
#if MSGDEBUG2
|
||||
static inline char *
|
||||
static inline const char *
|
||||
usnic_seg_type(ompi_btl_usnic_seg_type_t t)
|
||||
{
|
||||
switch (t) {
|
||||
@ -111,8 +111,8 @@ typedef struct {
|
||||
*/
|
||||
typedef enum {
|
||||
OMPI_BTL_USNIC_PAYLOAD_TYPE_ACK = 1,
|
||||
OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG = 2, /* an entire PML fragment */
|
||||
OMPI_BTL_USNIC_PAYLOAD_TYPE_CHUNK = 3 /* one chunk of PML frag */
|
||||
OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG = 2, /* an entire fragment */
|
||||
OMPI_BTL_USNIC_PAYLOAD_TYPE_CHUNK = 3 /* one chunk of fragment */
|
||||
} ompi_btl_usnic_payload_type_t;
|
||||
|
||||
/**
|
||||
@ -141,7 +141,7 @@ typedef struct {
|
||||
/* Type of BTL header (see enum, above) */
|
||||
uint8_t payload_type;
|
||||
|
||||
/* tag for PML, etc */
|
||||
/* tag for upper layer */
|
||||
mca_btl_base_tag_t tag;
|
||||
} ompi_btl_usnic_btl_header_t;
|
||||
|
||||
@ -156,25 +156,6 @@ typedef struct {
|
||||
uint32_t ch_frag_offset; /* where in fragment this goes */
|
||||
} ompi_btl_usnic_btl_chunk_header_t;
|
||||
|
||||
/*
|
||||
* Enums for the states of frags
|
||||
*/
|
||||
typedef enum {
|
||||
/* Frag states: all frags */
|
||||
FRAG_ALLOCED = 0x01,
|
||||
|
||||
/* Frag states: send frags */
|
||||
FRAG_SEND_ACKED = 0x02,
|
||||
FRAG_SEND_ENQUEUED = 0x04,
|
||||
FRAG_PML_CALLED_BACK = 0x08,
|
||||
FRAG_IN_HOTEL = 0x10,
|
||||
|
||||
/* Frag states: receive frags */
|
||||
FRAG_RECV_WR_POSTED = 0x40,
|
||||
|
||||
FRAG_MAX = 0xff
|
||||
} ompi_btl_usnic_frag_state_flags_t;
|
||||
|
||||
/**
|
||||
* Descriptor for a common segment. This is exactly one packet and may
|
||||
* be send or receive
|
||||
@ -197,7 +178,7 @@ typedef struct ompi_btl_usnic_segment_t {
|
||||
|
||||
union {
|
||||
uint8_t *raw;
|
||||
void *pml_header;
|
||||
void *ompi_header;
|
||||
} us_payload;
|
||||
} ompi_btl_usnic_segment_t;
|
||||
|
||||
@ -270,7 +251,7 @@ typedef struct ompi_btl_usnic_send_frag_t {
|
||||
|
||||
struct mca_btl_base_endpoint_t *sf_endpoint;
|
||||
|
||||
size_t sf_size; /* total_fragment size (PML + user payload) */
|
||||
size_t sf_size; /* total_fragment size (upper + user payload) */
|
||||
|
||||
/* original message data if convertor required */
|
||||
struct opal_convertor_t* sf_convertor;
|
||||
@ -283,13 +264,13 @@ typedef struct ompi_btl_usnic_send_frag_t {
|
||||
|
||||
/**
|
||||
* Descriptor for a large fragment
|
||||
* Large fragment uses two SG entries - one points to PML header,
|
||||
* Large fragment uses two SG entries - one points to upper layer header,
|
||||
* other points to data.
|
||||
*/
|
||||
typedef struct ompi_btl_usnic_large_send_frag_t {
|
||||
ompi_btl_usnic_send_frag_t lsf_base;
|
||||
|
||||
char lsf_pml_header[64]; /* space for PML header */
|
||||
char lsf_ompi_header[64]; /* space for upper layer header */
|
||||
mca_btl_base_tag_t lsf_tag; /* save tag */
|
||||
|
||||
uint32_t lsf_frag_id; /* fragment ID for reassembly */
|
||||
@ -306,10 +287,10 @@ typedef struct ompi_btl_usnic_large_send_frag_t {
|
||||
* an inline send, but will convert to a single SG entry is inline cannot
|
||||
* be done and data must be copied.
|
||||
* First segment will point to registered memory of associated segment to
|
||||
* hold BTL and PML headers.
|
||||
* hold BTL and upper layer headers.
|
||||
* Second segment will point directly to user data. If inlining fails, we
|
||||
* will copy user data into the registered memory after the PML header and
|
||||
* convert to a single segment.
|
||||
* will copy user data into the registered memory after the upper layer header
|
||||
* and convert to a single segment.
|
||||
*/
|
||||
typedef struct ompi_btl_usnic_small_send_frag_t {
|
||||
ompi_btl_usnic_send_frag_t ssf_base;
|
||||
@ -417,7 +398,7 @@ ompi_btl_usnic_put_dest_frag_alloc(
|
||||
* A send frag can be returned to the freelist when all of the
|
||||
* following are true:
|
||||
*
|
||||
* 1. PML is freeing it (via module.free())
|
||||
* 1. upper layer is freeing it (via module.free())
|
||||
* 2. Or all of these:
|
||||
* a) it finishes sending all its segments
|
||||
* b) all of its segments have been ACKed
|
||||
@ -449,6 +430,8 @@ ompi_btl_usnic_frag_return(
|
||||
opal_output(0, "freeing frag %p, type %s\n", (void *)frag,
|
||||
usnic_frag_type(frag->uf_type));
|
||||
#endif
|
||||
frag->uf_src_seg[0].seg_len = 0;
|
||||
frag->uf_src_seg[1].seg_len = 0;
|
||||
OMPI_FREE_LIST_RETURN_MT(frag->uf_freelist, &(frag->uf_base.super));
|
||||
}
|
||||
|
||||
|
@ -204,7 +204,7 @@ static int usnic_register_pml_err_cb(struct mca_btl_base_module_t* btl,
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate PML control messages or eager frags if BTL does not have
|
||||
* Allocate control messages or eager frags if BTL does not have
|
||||
* INPLACE flag. To be clear: max it will ever alloc is eager_limit.
|
||||
* THEREFORE: eager_limit is the max that ALLOC must always be able to
|
||||
* alloc.
|
||||
@ -222,9 +222,9 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
|
||||
ompi_btl_usnic_small_send_frag_t *sfrag;
|
||||
mca_btl_base_descriptor_t *desc;
|
||||
|
||||
if (size > module->max_frag_payload)
|
||||
if (size > module->max_frag_payload) {
|
||||
size = module->max_frag_payload;
|
||||
|
||||
}
|
||||
|
||||
sfrag = ompi_btl_usnic_small_send_frag_alloc(module);
|
||||
if (NULL == sfrag) {
|
||||
@ -238,10 +238,6 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
|
||||
(void *)frag, (int)size, flags);
|
||||
#endif
|
||||
|
||||
/* set # of bytes remaining to be ACKed */
|
||||
frag->sf_ack_bytes_left = size;
|
||||
frag->sf_size = size;
|
||||
|
||||
/* set endpoint */
|
||||
frag->sf_endpoint = endpoint;
|
||||
|
||||
@ -281,7 +277,7 @@ static int usnic_free(struct mca_btl_base_module_t* btl,
|
||||
/*
|
||||
* Notes from george:
|
||||
*
|
||||
* - BTL ALLOC: allocating PML control messages or eager frags if BTL
|
||||
* - BTL ALLOC: allocating control messages or eager frags if BTL
|
||||
does not have INPLACE flag. To be clear: max it will ever alloc
|
||||
is eager_limit. THEREFORE: eager_limit is the max that ALLOC
|
||||
must always be able to alloc.
|
||||
@ -305,12 +301,12 @@ static int usnic_free(struct mca_btl_base_module_t* btl,
|
||||
* we will leave data in place
|
||||
*
|
||||
* small,convertor: copy the data into the segment associated with small frag,
|
||||
* PML will put header in this seg, single entry in desc SG
|
||||
* small,no convertor: PML will put header in attached segment SG[0],
|
||||
* caller will put header in this seg, single entry in desc SG
|
||||
* small,no convertor: caller will put header in attached segment SG[0],
|
||||
* save pointer to user data in SG[1], 2 SG entries
|
||||
* large,convertor: copy data into chain of segments, leaving room for
|
||||
* PML header at start of 1st segment, 2 SG entries
|
||||
* large,not convertor: PML will put header in buffer in the large frag itself,
|
||||
* caller header at start of 1st segment, 2 SG entries
|
||||
* large,not convertor: caller will put header in buffer in the large frag itself,
|
||||
* save pointer to user data in SG[1]. 2 SG entries
|
||||
*
|
||||
* NOTE that the *only* reason this routine is allowed to return a size smaller
|
||||
@ -358,7 +354,7 @@ usnic_prepare_src(
|
||||
*/
|
||||
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
|
||||
|
||||
/* put user data just after end of 1st seg (PML header) */
|
||||
/* put user data just after end of 1st seg (upper layer header) */
|
||||
if (payload_len > module->max_frag_payload) {
|
||||
payload_len = module->max_frag_payload;
|
||||
}
|
||||
@ -400,14 +396,14 @@ usnic_prepare_src(
|
||||
* If a covertor is required, pack the data into a chain of segments.
|
||||
* We will later send from the segments one at a time. This allows
|
||||
* us to absorb a large convertor-based send and still give an accurate
|
||||
* data count back to the PML
|
||||
* data count back to the upper layer
|
||||
*/
|
||||
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
|
||||
ompi_btl_usnic_chunk_segment_t *seg;
|
||||
unsigned pml_hdr_len;
|
||||
unsigned ompi_hdr_len;
|
||||
unsigned bytes_to_pack;
|
||||
|
||||
pml_hdr_len = reserve;
|
||||
ompi_hdr_len = reserve;
|
||||
bytes_to_pack = *size;
|
||||
while (bytes_to_pack > 0) {
|
||||
seg = ompi_btl_usnic_chunk_segment_alloc(module);
|
||||
@ -416,14 +412,14 @@ usnic_prepare_src(
|
||||
abort(); /* XXX */
|
||||
}
|
||||
|
||||
/* put user data just after end of 1st seg (PML header) */
|
||||
payload_len = pml_hdr_len + bytes_to_pack;
|
||||
/* put user data just after end of 1st seg (upper header) */
|
||||
payload_len = ompi_hdr_len + bytes_to_pack;
|
||||
if (payload_len > module->max_chunk_payload) {
|
||||
payload_len = module->max_chunk_payload;
|
||||
}
|
||||
iov.iov_len = payload_len - pml_hdr_len;
|
||||
iov.iov_len = payload_len - ompi_hdr_len;
|
||||
iov.iov_base = (IOVBASE_TYPE*)
|
||||
(seg->ss_base.us_payload.raw + pml_hdr_len);
|
||||
(seg->ss_base.us_payload.raw + ompi_hdr_len);
|
||||
iov_count = 1;
|
||||
max_data = iov.iov_len;
|
||||
rc = opal_convertor_pack(convertor, &iov, &iov_count, &max_data);
|
||||
@ -446,9 +442,9 @@ usnic_prepare_src(
|
||||
opal_list_append(&lfrag->lsf_seg_chain,
|
||||
&seg->ss_base.us_list.super);
|
||||
seg->ss_parent_frag = &lfrag->lsf_base;
|
||||
seg->ss_base.us_sg_entry[0].length = max_data + pml_hdr_len;
|
||||
seg->ss_base.us_sg_entry[0].length = max_data + ompi_hdr_len;
|
||||
|
||||
pml_hdr_len = 0;
|
||||
ompi_hdr_len = 0;
|
||||
bytes_to_pack -= max_data;
|
||||
}
|
||||
payload_len = *size + reserve;
|
||||
@ -464,7 +460,7 @@ usnic_prepare_src(
|
||||
&lfrag->lsf_base.sf_base.uf_src_seg[1].seg_addr.pval);
|
||||
lfrag->lsf_base.sf_convertor = NULL;
|
||||
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_addr.pval =
|
||||
&lfrag->lsf_pml_header;
|
||||
&lfrag->lsf_ompi_header;
|
||||
}
|
||||
|
||||
|
||||
@ -472,23 +468,18 @@ usnic_prepare_src(
|
||||
lfrag->lsf_cur_offset = 0;
|
||||
lfrag->lsf_bytes_left = payload_len;
|
||||
|
||||
/* make sure PML header small enough */
|
||||
assert(reserve < sizeof(lfrag->lsf_pml_header));
|
||||
/* make sure upper header small enough */
|
||||
assert(reserve < sizeof(lfrag->lsf_ompi_header));
|
||||
|
||||
frag->sf_base.uf_base.des_src_cnt = 2;
|
||||
frag->sf_base.uf_src_seg[0].seg_len = reserve;
|
||||
frag->sf_base.uf_src_seg[1].seg_len = *size;
|
||||
}
|
||||
|
||||
/* fill in segment sizes */
|
||||
frag->sf_size = payload_len;
|
||||
|
||||
/* set up common parts of frag */
|
||||
frag->sf_base.uf_base.des_flags = flags;
|
||||
frag->sf_endpoint = endpoint;
|
||||
|
||||
/* fragment accounting */
|
||||
frag->sf_ack_bytes_left = payload_len;
|
||||
desc = &frag->sf_base.uf_base;
|
||||
|
||||
#if MSGDEBUG2
|
||||
@ -496,10 +487,10 @@ usnic_prepare_src(
|
||||
module->device->name,
|
||||
payload_len <= module->max_frag_payload?"small":"large",
|
||||
(void *)frag, (int)reserve, (int)*size);
|
||||
{ int i;
|
||||
{ unsigned i;
|
||||
for (i=0; i<desc->des_src_cnt; ++i)
|
||||
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||
desc->des_src[i].seg_addr.pval,
|
||||
(void *)desc->des_src[i].seg_addr.pval,
|
||||
desc->des_src[i].seg_len);
|
||||
}
|
||||
#endif
|
||||
@ -565,16 +556,27 @@ usnic_put(
|
||||
|
||||
frag = (ompi_btl_usnic_send_frag_t *)des;
|
||||
|
||||
/*
|
||||
* Our descriptors are always either 1 or 2 segments.
|
||||
* We always clear these lengths when the fragment is freed
|
||||
* and only fill in what's needed in either prepare_src or usnic_alloc,
|
||||
* so the total fragment length is always the sum of the 2 lengths.
|
||||
*/
|
||||
frag->sf_size = frag->sf_base.uf_src_seg[0].seg_len +
|
||||
frag->sf_base.uf_src_seg[1].seg_len;
|
||||
frag->sf_ack_bytes_left = frag->sf_size;
|
||||
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "usnic_put, frag=%p, source=\n", frag);
|
||||
{ int i;
|
||||
opal_output(0, "usnic_put, frag=%p, size=%d, source=\n", (void *)frag,
|
||||
(int)frag->sf_size);
|
||||
{ unsigned i;
|
||||
for (i=0; i<des->des_src_cnt; ++i)
|
||||
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||
des->des_src[i].seg_addr.pval,
|
||||
des->des_src[i].seg_len);
|
||||
}
|
||||
opal_output(0, "dest:\n");
|
||||
{ int i;
|
||||
{ unsigned i;
|
||||
for (i=0; i<des->des_dst_cnt; ++i)
|
||||
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||
des->des_dst[i].seg_addr.pval,
|
||||
@ -582,7 +584,7 @@ usnic_put(
|
||||
}
|
||||
#endif
|
||||
|
||||
/* copy out address - why does he not use ours? silly PML */
|
||||
/* copy out address - why does he not use ours? */
|
||||
frag->sf_base.uf_dst_seg[0].seg_addr.pval = des->des_dst->seg_addr.pval;
|
||||
|
||||
/*
|
||||
@ -619,7 +621,8 @@ usnic_put(
|
||||
/* set up VERBS SG list */
|
||||
sseg->ss_send_desc.num_sge = 1;
|
||||
sseg->ss_base.us_sg_entry[0].length =
|
||||
sizeof(ompi_btl_usnic_btl_header_t) + frag->sf_size;
|
||||
sizeof(ompi_btl_usnic_btl_header_t) +
|
||||
frag->sf_base.uf_base.des_src[1].seg_len;
|
||||
|
||||
/* use standard channel */
|
||||
sseg->ss_channel = USNIC_DATA_CHANNEL;
|
||||
@ -961,16 +964,16 @@ usnic_handle_large_send(
|
||||
sseg->ss_parent_frag = frag;
|
||||
|
||||
/* If this is the first chunk of the frag, need to insert
|
||||
* the PML header at the start. On subsequent chunks,
|
||||
* skip the PML header
|
||||
* the upper header at the start. On subsequent chunks,
|
||||
* skip the upper header
|
||||
*/
|
||||
if (lfrag->lsf_cur_offset == 0) {
|
||||
|
||||
/* copy in the PML header */
|
||||
memcpy(sseg->ss_base.us_payload.raw, lfrag->lsf_pml_header,
|
||||
/* copy in the upper header */
|
||||
memcpy(sseg->ss_base.us_payload.raw, lfrag->lsf_ompi_header,
|
||||
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_len);
|
||||
|
||||
/* adjust data pointer and len to skip PML */
|
||||
/* adjust data pointer and len to skip upper header */
|
||||
iov.iov_base = sseg->ss_base.us_payload.raw +
|
||||
lfrag->lsf_base.sf_base.uf_src_seg[0].seg_len;
|
||||
|
||||
@ -1004,7 +1007,7 @@ usnic_handle_large_send(
|
||||
payload_len += max_data;
|
||||
|
||||
/* We are sending converted data, which means we have a list of segments
|
||||
* containing the data. PML header is already in first segment
|
||||
* containing the data. upper layer header is already in first segment
|
||||
*/
|
||||
} else {
|
||||
sseg = (ompi_btl_usnic_send_segment_t *)
|
||||
@ -1040,7 +1043,7 @@ usnic_handle_large_send(
|
||||
/* done with fragment? */
|
||||
if (lfrag->lsf_bytes_left == 0) {
|
||||
|
||||
/* remove this frag from sending list now because PML may
|
||||
/* remove this frag from sending list now because upper layer may
|
||||
* decide to put it on some other list in the callback
|
||||
*/
|
||||
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
|
||||
@ -1055,7 +1058,8 @@ usnic_handle_large_send(
|
||||
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "callback for large frag %p, len=%zd\n",
|
||||
(void *)frag->sf_base.uf_base.des_cbfunc, frag->sf_size);
|
||||
(void *)(uintptr_t)frag->sf_base.uf_base.des_cbfunc,
|
||||
frag->sf_size);
|
||||
#endif
|
||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||
frag->sf_endpoint, &frag->sf_base.uf_base,
|
||||
@ -1123,7 +1127,7 @@ ompi_btl_usnic_module_progress_sends(
|
||||
*/
|
||||
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_SMALL_SEND) {
|
||||
|
||||
/* remove this frag from sending list now because PML may
|
||||
/* remove this frag from sending list now because upper layer may
|
||||
* decide to put it on some other list in the callback
|
||||
*/
|
||||
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
|
||||
@ -1163,7 +1167,8 @@ ompi_btl_usnic_module_progress_sends(
|
||||
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "callback frag small %p, len=%"PRIu64"\n",
|
||||
(void*)frag, frag->sf_base.uf_src_seg[0].seg_len);
|
||||
(void*)frag,
|
||||
(unsigned long)frag->sf_base.uf_src_seg[0].seg_len);
|
||||
#endif
|
||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||
frag->sf_endpoint, &frag->sf_base.uf_base,
|
||||
@ -1247,11 +1252,24 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
|
||||
module = (ompi_btl_usnic_module_t *)base_module;
|
||||
frag = (ompi_btl_usnic_send_frag_t*) descriptor;
|
||||
|
||||
assert(frag->sf_endpoint == endpoint);
|
||||
frag->sf_base.uf_dst_seg[0].seg_addr.pval = NULL; /* not a PUT */
|
||||
|
||||
/*
|
||||
* Our descriptors are always either 1 or 2 segments.
|
||||
* We always clear these lengths when the fragment is freed
|
||||
* and only fill in what's needed in either prepare_src or usnic_alloc,
|
||||
* so the total fragment length is always the sum of the 2 lengths.
|
||||
*/
|
||||
frag->sf_size = frag->sf_base.uf_src_seg[0].seg_len +
|
||||
frag->sf_base.uf_src_seg[1].seg_len;
|
||||
frag->sf_ack_bytes_left = frag->sf_size;
|
||||
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "usnic_send: frag=%p, endpoint=%p, tag=%d, sendreq=%p\n",
|
||||
opal_output(0, "usnic_send: frag=%p, endpoint=%p, tag=%d, sf_size=%d\n",
|
||||
(void *)frag, (void *)endpoint,
|
||||
tag, (void *)descriptor->des_cbdata);
|
||||
{ int i;
|
||||
tag, (int)frag->sf_size);
|
||||
{ unsigned i;
|
||||
for (i=0; i<descriptor->des_src_cnt; ++i)
|
||||
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||
descriptor->des_src[i].seg_addr.pval,
|
||||
@ -1259,16 +1277,6 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(frag->sf_endpoint == endpoint);
|
||||
frag->sf_base.uf_dst_seg[0].seg_addr.pval = NULL; /* not a PUT */
|
||||
|
||||
/* JMS From Dec OMPI meeting....
|
||||
|
||||
if PML doesn't set SEND_ALWAYS_CALLBACK, then we can return 1
|
||||
here to say "the data is gone, PML can complete the request".
|
||||
And then we don't need to do the PML callback (!). WE DON'T
|
||||
NEED TO SET ALWAYS_CALLBACK! */
|
||||
|
||||
/*
|
||||
* If this fragment is small enough to inline,
|
||||
* and we have enough send WQEs,
|
||||
@ -1305,19 +1313,7 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
|
||||
sseg->ss_channel = USNIC_PRIORITY_CHANNEL;
|
||||
sseg->ss_base.us_btl_header->tag = tag;
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, "conv = %p\n", frag->sf_convertor);
|
||||
opal_output(0, " inline frag %d segs %p(%d) + %p(%d)\n",
|
||||
(int)frag->sf_base.uf_base.des_src_cnt,
|
||||
frag->sf_base.uf_src_seg[0].seg_addr.pval,
|
||||
(int)frag->sf_base.uf_src_seg[0].seg_len,
|
||||
frag->sf_base.uf_src_seg[1].seg_addr.pval,
|
||||
(int)frag->sf_base.uf_src_seg[1].seg_len);
|
||||
opal_output(0, " inline seg %d segs %p(%d) + %p(%d)\n",
|
||||
sseg->ss_send_desc.num_sge,
|
||||
(void *)sseg->ss_send_desc.sg_list[0].addr,
|
||||
sseg->ss_send_desc.sg_list[0].length,
|
||||
(void *)sseg->ss_send_desc.sg_list[1].addr,
|
||||
sseg->ss_send_desc.sg_list[1].length);
|
||||
opal_output(0, "INLINE send, conv=%p", (void *)frag->sf_convertor);
|
||||
#endif
|
||||
|
||||
/* post the segment now */
|
||||
|
@ -283,7 +283,7 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
|
||||
#if MSGDEBUG2
|
||||
opal_output(0, " large FRAG complete, pass up %p, %"PRIu64" bytes, tag=%d\n",
|
||||
desc.des_dst->seg_addr.pval, desc.des_dst->seg_len,
|
||||
chunk_hdr->ch_hdr.tag);
|
||||
(int)chunk_hdr->ch_hdr.tag);
|
||||
#endif
|
||||
reg = mca_btl_base_active_message_trigger +
|
||||
chunk_hdr->ch_hdr.tag;
|
||||
|
@ -200,9 +200,8 @@ ompi_btl_usnic_check_rx_seq(
|
||||
i = WINDOW_SIZE_MOD(i + endpoint->endpoint_rfstart);
|
||||
if (endpoint->endpoint_rcvd_segs[i]) {
|
||||
#if MSGDEBUG
|
||||
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ " from %s to %s, seg %p: duplicate -- DROPPED\n",
|
||||
(void*) endpoint, bseg->us_btl_header->seq, src_mac, dest_mac,
|
||||
(void*) seg);
|
||||
opal_output(0, "<-- Received FRAG/CHUNK ep %p, seq %" UDSEQ ", seg %p: duplicate -- DROPPED\n",
|
||||
(void*) endpoint, seg->rs_base.us_btl_header->seq, (void*) seg);
|
||||
#endif
|
||||
/* highest_seq_rcvd is for debug stats only; it's not used
|
||||
in any window calculations */
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user