Cisco CSCuj12520: various problems running c_fence_put_1
- tag needs to be sent in *our* header, not the PML header - usnic_alloc() should return smaller value if too much data requested - be careful about callbacks vs removing items from lists (we need to remove from outr lists *before* the callback) - improve send callback handling - add some more MSGDEBUG2 logging and cleanup This commit was SVN r29181.
Этот коммит содержится в:
родитель
2245ac0e7e
Коммит
89b5f0899b
@ -134,20 +134,22 @@ ompi_btl_usnic_handle_ack(
|
|||||||
(void*)sseg, (void*)frag, bytes_acked, frag->sf_ack_bytes_left);
|
(void*)sseg, (void*)frag, bytes_acked, frag->sf_ack_bytes_left);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* perform completion callback for PUT here */
|
/* If all ACKs received, and this is a put or a regular send
|
||||||
|
* that needs a callback, perform the callback now
|
||||||
|
*/
|
||||||
if (frag->sf_ack_bytes_left == 0 &&
|
if (frag->sf_ack_bytes_left == 0 &&
|
||||||
frag->sf_base.uf_dst_seg[0].seg_addr.pval != NULL) {
|
((frag->sf_base.uf_dst_seg[0].seg_addr.pval != NULL) ||
|
||||||
#if MSGDEBUG1
|
(frag->sf_base.uf_base.des_flags &
|
||||||
opal_output(0, "Calling back %p for PUT completion, frag=%p\n",
|
MCA_BTL_DES_SEND_ALWAYS_CALLBACK))) {
|
||||||
(void*)(uintptr_t)frag->sf_base.uf_base.des_cbfunc, (void*)frag);
|
#if MSGDEBUG2
|
||||||
|
opal_output(0, "completion callback for frag=%p, dest=%p\n",
|
||||||
|
(void*)frag, frag->sf_base.uf_dst_seg[0].seg_addr.pval);
|
||||||
#endif
|
#endif
|
||||||
frag->sf_base.uf_base.des_cbfunc(&module->super, frag->sf_endpoint,
|
frag->sf_base.uf_base.des_cbfunc(&module->super, frag->sf_endpoint,
|
||||||
&frag->sf_base.uf_base, OMPI_SUCCESS);
|
&frag->sf_base.uf_base, OMPI_SUCCESS);
|
||||||
|
frag->sf_base.uf_base.des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* OK to return this fragment? */
|
|
||||||
ompi_btl_usnic_send_frag_return_cond(module, frag);
|
|
||||||
|
|
||||||
/* free this segment */
|
/* free this segment */
|
||||||
sseg->ss_ack_pending = false;
|
sseg->ss_ack_pending = false;
|
||||||
if (sseg->ss_base.us_type == OMPI_BTL_USNIC_SEG_CHUNK &&
|
if (sseg->ss_base.us_type == OMPI_BTL_USNIC_SEG_CHUNK &&
|
||||||
@ -155,6 +157,9 @@ ompi_btl_usnic_handle_ack(
|
|||||||
ompi_btl_usnic_chunk_segment_return(module, sseg);
|
ompi_btl_usnic_chunk_segment_return(module, sseg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* OK to return this fragment? */
|
||||||
|
ompi_btl_usnic_send_frag_return_cond(module, frag);
|
||||||
|
|
||||||
/* indicate this segment has been ACKed */
|
/* indicate this segment has been ACKed */
|
||||||
endpoint->endpoint_sent_segs[WINDOW_SIZE_MOD(is)] = NULL;
|
endpoint->endpoint_sent_segs[WINDOW_SIZE_MOD(is)] = NULL;
|
||||||
}
|
}
|
||||||
|
@ -57,6 +57,19 @@ typedef enum {
|
|||||||
OMPI_BTL_USNIC_FRAG_PUT_DEST
|
OMPI_BTL_USNIC_FRAG_PUT_DEST
|
||||||
} ompi_btl_usnic_frag_type_t;
|
} ompi_btl_usnic_frag_type_t;
|
||||||
|
|
||||||
|
#if MSGDEBUG2
|
||||||
|
static inline char *
|
||||||
|
usnic_frag_type(ompi_btl_usnic_frag_type_t t)
|
||||||
|
{
|
||||||
|
switch (t) {
|
||||||
|
case OMPI_BTL_USNIC_FRAG_LARGE_SEND: return "large";
|
||||||
|
case OMPI_BTL_USNIC_FRAG_SMALL_SEND: return "small";
|
||||||
|
case OMPI_BTL_USNIC_FRAG_PUT_DEST: return "put dest";
|
||||||
|
default: return "unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
OMPI_BTL_USNIC_SEG_ACK,
|
OMPI_BTL_USNIC_SEG_ACK,
|
||||||
OMPI_BTL_USNIC_SEG_FRAG,
|
OMPI_BTL_USNIC_SEG_FRAG,
|
||||||
@ -64,6 +77,20 @@ typedef enum {
|
|||||||
OMPI_BTL_USNIC_SEG_RECV
|
OMPI_BTL_USNIC_SEG_RECV
|
||||||
} ompi_btl_usnic_seg_type_t;
|
} ompi_btl_usnic_seg_type_t;
|
||||||
|
|
||||||
|
#if MSGDEBUG2
|
||||||
|
static inline char *
|
||||||
|
usnic_seg_type(ompi_btl_usnic_seg_type_t t)
|
||||||
|
{
|
||||||
|
switch (t) {
|
||||||
|
case OMPI_BTL_USNIC_SEG_ACK: return "ACK";
|
||||||
|
case OMPI_BTL_USNIC_SEG_FRAG: return "FRAG";
|
||||||
|
case OMPI_BTL_USNIC_SEG_CHUNK: return "CHUNK";
|
||||||
|
case OMPI_BTL_USNIC_SEG_RECV: return "RECV";
|
||||||
|
default: return "unknown";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
typedef struct ompi_btl_usnic_reg_t {
|
typedef struct ompi_btl_usnic_reg_t {
|
||||||
mca_mpool_base_registration_t base;
|
mca_mpool_base_registration_t base;
|
||||||
@ -94,6 +121,7 @@ typedef enum {
|
|||||||
* holes.
|
* holes.
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
||||||
/* Hashed RTE process name of the sender */
|
/* Hashed RTE process name of the sender */
|
||||||
uint64_t sender;
|
uint64_t sender;
|
||||||
|
|
||||||
@ -112,8 +140,9 @@ typedef struct {
|
|||||||
|
|
||||||
/* Type of BTL header (see enum, above) */
|
/* Type of BTL header (see enum, above) */
|
||||||
uint8_t payload_type;
|
uint8_t payload_type;
|
||||||
/* Yuck */
|
|
||||||
uint8_t padding;
|
/* tag for PML, etc */
|
||||||
|
mca_btl_base_tag_t tag;
|
||||||
} ompi_btl_usnic_btl_header_t;
|
} ompi_btl_usnic_btl_header_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -146,15 +175,6 @@ typedef enum {
|
|||||||
FRAG_MAX = 0xff
|
FRAG_MAX = 0xff
|
||||||
} ompi_btl_usnic_frag_state_flags_t;
|
} ompi_btl_usnic_frag_state_flags_t;
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convenience macros for states
|
|
||||||
*/
|
|
||||||
#define FRAG_STATE_SET(frag, state) (frag)->state_flags |= (state)
|
|
||||||
#define FRAG_STATE_CLR(frag, state) (frag)->state_flags &= ~(state)
|
|
||||||
#define FRAG_STATE_GET(frag, state) ((frag)->state_flags & (state))
|
|
||||||
#define FRAG_STATE_ISSET(frag, state) (((frag)->state_flags & (state)) != 0)
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Descriptor for a common segment. This is exactly one packet and may
|
* Descriptor for a common segment. This is exactly one packet and may
|
||||||
* be send or receive
|
* be send or receive
|
||||||
@ -177,7 +197,7 @@ typedef struct ompi_btl_usnic_segment_t {
|
|||||||
|
|
||||||
union {
|
union {
|
||||||
uint8_t *raw;
|
uint8_t *raw;
|
||||||
mca_btl_base_header_t *pml_header;
|
void *pml_header;
|
||||||
} us_payload;
|
} us_payload;
|
||||||
} ompi_btl_usnic_segment_t;
|
} ompi_btl_usnic_segment_t;
|
||||||
|
|
||||||
@ -270,6 +290,7 @@ typedef struct ompi_btl_usnic_large_send_frag_t {
|
|||||||
ompi_btl_usnic_send_frag_t lsf_base;
|
ompi_btl_usnic_send_frag_t lsf_base;
|
||||||
|
|
||||||
char lsf_pml_header[64]; /* space for PML header */
|
char lsf_pml_header[64]; /* space for PML header */
|
||||||
|
mca_btl_base_tag_t lsf_tag; /* save tag */
|
||||||
|
|
||||||
uint32_t lsf_frag_id; /* fragment ID for reassembly */
|
uint32_t lsf_frag_id; /* fragment ID for reassembly */
|
||||||
size_t lsf_cur_offset; /* current offset into message */
|
size_t lsf_cur_offset; /* current offset into message */
|
||||||
@ -424,6 +445,10 @@ ompi_btl_usnic_frag_return(
|
|||||||
struct ompi_btl_usnic_module_t *module,
|
struct ompi_btl_usnic_module_t *module,
|
||||||
ompi_btl_usnic_frag_t *frag)
|
ompi_btl_usnic_frag_t *frag)
|
||||||
{
|
{
|
||||||
|
#if MSGDEBUG2
|
||||||
|
opal_output(0, "freeing frag %p, type %s\n", (void *)frag,
|
||||||
|
usnic_frag_type(frag->uf_type));
|
||||||
|
#endif
|
||||||
OMPI_FREE_LIST_RETURN_MT(frag->uf_freelist, &(frag->uf_base.super));
|
OMPI_FREE_LIST_RETURN_MT(frag->uf_freelist, &(frag->uf_base.super));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,35 +219,23 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
|
|||||||
{
|
{
|
||||||
ompi_btl_usnic_send_frag_t *frag;
|
ompi_btl_usnic_send_frag_t *frag;
|
||||||
ompi_btl_usnic_module_t *module = (ompi_btl_usnic_module_t*) btl;
|
ompi_btl_usnic_module_t *module = (ompi_btl_usnic_module_t*) btl;
|
||||||
|
ompi_btl_usnic_small_send_frag_t *sfrag;
|
||||||
mca_btl_base_descriptor_t *desc;
|
mca_btl_base_descriptor_t *desc;
|
||||||
|
|
||||||
/* will this fit into a small send? */
|
if (size > module->max_frag_payload)
|
||||||
if (size <= module->max_frag_payload) {
|
size = module->max_frag_payload;
|
||||||
ompi_btl_usnic_small_send_frag_t *sfrag;
|
|
||||||
|
|
||||||
sfrag = ompi_btl_usnic_small_send_frag_alloc(module);
|
|
||||||
if (NULL == sfrag) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
frag = &sfrag->ssf_base;
|
|
||||||
|
|
||||||
} else {
|
sfrag = ompi_btl_usnic_small_send_frag_alloc(module);
|
||||||
ompi_btl_usnic_large_send_frag_t *lfrag;
|
if (NULL == sfrag) {
|
||||||
|
return NULL;
|
||||||
lfrag = ompi_btl_usnic_large_send_frag_alloc(module);
|
|
||||||
if (NULL == lfrag) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
frag = &lfrag->lsf_base;
|
|
||||||
|
|
||||||
BTL_ERROR(("large frag in usnic_alloc()\n"));
|
|
||||||
abort(); /* XXX - we don't ever want to see this... */
|
|
||||||
}
|
}
|
||||||
|
frag = &sfrag->ssf_base;
|
||||||
|
|
||||||
#if MSGDEBUG2
|
#if MSGDEBUG2
|
||||||
opal_output(0, "usnic_alloc: %s frag=%p, size=%d\n",
|
opal_output(0, "usnic_alloc: %s frag=%p, size=%d, flags=0x%x\n",
|
||||||
(size <= module->max_frag_payload)?"small":"large",
|
(size <= module->max_frag_payload)?"small":"large",
|
||||||
(void *)frag, (int)size);
|
(void *)frag, (int)size, flags);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* set # of bytes remaining to be ACKed */
|
/* set # of bytes remaining to be ACKed */
|
||||||
@ -262,28 +250,34 @@ usnic_alloc(struct mca_btl_base_module_t* btl,
|
|||||||
|
|
||||||
/* set up descriptor */
|
/* set up descriptor */
|
||||||
desc = &frag->sf_base.uf_base;
|
desc = &frag->sf_base.uf_base;
|
||||||
|
desc->des_flags = flags;
|
||||||
desc->des_src[0].seg_len = size;
|
desc->des_src[0].seg_len = size;
|
||||||
desc->des_src_cnt = 1;
|
desc->des_src_cnt = 1;
|
||||||
desc->des_flags = flags;
|
|
||||||
|
|
||||||
return desc;
|
return desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a small send fragment
|
* Return an allocated fragment
|
||||||
*
|
*
|
||||||
* Return the send fragment to the appropriate list
|
* Return the send fragment to the appropriate list
|
||||||
*/
|
*/
|
||||||
static int usnic_free(struct mca_btl_base_module_t* btl,
|
static int usnic_free(struct mca_btl_base_module_t* btl,
|
||||||
mca_btl_base_descriptor_t* des)
|
mca_btl_base_descriptor_t* des)
|
||||||
{
|
{
|
||||||
ompi_btl_usnic_frag_t* frag = (ompi_btl_usnic_frag_t*)des;
|
ompi_btl_usnic_send_frag_t* frag = (ompi_btl_usnic_send_frag_t*)des;
|
||||||
|
|
||||||
#if MSGDEBUG1
|
#if MSGDEBUG2
|
||||||
opal_output(0, "usnic_free: %p\n", (void*)frag);
|
opal_output(0, "usnic_free: %p\n", (void*)frag);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if 1 /* seperate commit for seperate bug */
|
||||||
OMPI_FREE_LIST_RETURN_MT(frag->uf_freelist, &(frag->uf_base.super));
|
OMPI_FREE_LIST_RETURN_MT(frag->uf_freelist, &(frag->uf_base.super));
|
||||||
|
#else
|
||||||
|
ompi_btl_usnic_send_frag_return_cond((struct ompi_btl_usnic_module_t *)btl,
|
||||||
|
frag);
|
||||||
|
#endif
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -338,6 +332,7 @@ usnic_prepare_src(
|
|||||||
uint32_t flags)
|
uint32_t flags)
|
||||||
{
|
{
|
||||||
ompi_btl_usnic_module_t *module = (ompi_btl_usnic_module_t*) base_module;
|
ompi_btl_usnic_module_t *module = (ompi_btl_usnic_module_t*) base_module;
|
||||||
|
mca_btl_base_descriptor_t *desc;
|
||||||
ompi_btl_usnic_send_frag_t *frag;
|
ompi_btl_usnic_send_frag_t *frag;
|
||||||
uint32_t payload_len;
|
uint32_t payload_len;
|
||||||
struct iovec iov;
|
struct iovec iov;
|
||||||
@ -498,18 +493,22 @@ usnic_prepare_src(
|
|||||||
|
|
||||||
/* fragment accounting */
|
/* fragment accounting */
|
||||||
frag->sf_ack_bytes_left = payload_len;
|
frag->sf_ack_bytes_left = payload_len;
|
||||||
|
desc = &frag->sf_base.uf_base;
|
||||||
|
|
||||||
#if MSGDEBUG2
|
#if MSGDEBUG2
|
||||||
opal_output(0, "prep_src: %s %s frag %p, size=%d+%d, src=%p\n",
|
opal_output(0, "prep_src: %s %s frag %p, size=%d+%d\n",
|
||||||
module->device->name,
|
module->device->name,
|
||||||
payload_len <= module->max_frag_payload?"small":"large",
|
payload_len <= module->max_frag_payload?"small":"large",
|
||||||
(void *)frag, (int)reserve, (int)*size,
|
(void *)frag, (int)reserve, (int)*size);
|
||||||
frag->sf_base.uf_base.des_src[0].seg_addr.pval);
|
{ int i;
|
||||||
opal_output(0, " data_ptr = %p, conv=%p\n",
|
for (i=0; i<desc->des_src_cnt; ++i)
|
||||||
data_ptr, (void *)frag->sf_convertor);
|
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||||
|
desc->des_src[i].seg_addr.pval,
|
||||||
|
desc->des_src[i].seg_len);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return &frag->sf_base.uf_base;
|
return desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
static mca_btl_base_descriptor_t*
|
static mca_btl_base_descriptor_t*
|
||||||
@ -571,12 +570,20 @@ usnic_put(
|
|||||||
frag = (ompi_btl_usnic_send_frag_t *)des;
|
frag = (ompi_btl_usnic_send_frag_t *)des;
|
||||||
|
|
||||||
#if MSGDEBUG2
|
#if MSGDEBUG2
|
||||||
opal_output(0, "usnic_put: %"PRIu64" bytes to %p\n",
|
opal_output(0, "usnic_put, frag=%p, source=\n", frag);
|
||||||
des->des_dst->seg_len,
|
{ int i;
|
||||||
des->des_dst->seg_addr.pval);
|
for (i=0; i<des->des_src_cnt; ++i)
|
||||||
opal_output(0, " des_dst=%p, frag->uf_dst_seg=%p\n",
|
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||||
(void *)des->des_dst,
|
des->des_src[i].seg_addr.pval,
|
||||||
(void *)frag->sf_base.uf_dst_seg);
|
des->des_src[i].seg_len);
|
||||||
|
}
|
||||||
|
opal_output(0, "dest:\n");
|
||||||
|
{ int i;
|
||||||
|
for (i=0; i<des->des_dst_cnt; ++i)
|
||||||
|
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||||
|
des->des_dst[i].seg_addr.pval,
|
||||||
|
des->des_dst[i].seg_len);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* copy out address - why does he not use ours? silly PML */
|
/* copy out address - why does he not use ours? silly PML */
|
||||||
@ -1014,6 +1021,7 @@ usnic_handle_large_send(
|
|||||||
chp->ch_frag_id = lfrag->lsf_frag_id;
|
chp->ch_frag_id = lfrag->lsf_frag_id;
|
||||||
chp->ch_frag_size = lfrag->lsf_base.sf_size;
|
chp->ch_frag_size = lfrag->lsf_base.sf_size;
|
||||||
chp->ch_frag_offset = lfrag->lsf_cur_offset;
|
chp->ch_frag_offset = lfrag->lsf_cur_offset;
|
||||||
|
chp->ch_hdr.tag = lfrag->lsf_tag;
|
||||||
|
|
||||||
/* set actual packet length for verbs */
|
/* set actual packet length for verbs */
|
||||||
sseg->ss_base.us_sg_entry[0].length =
|
sseg->ss_base.us_sg_entry[0].length =
|
||||||
@ -1036,24 +1044,29 @@ usnic_handle_large_send(
|
|||||||
/* done with fragment? */
|
/* done with fragment? */
|
||||||
if (lfrag->lsf_bytes_left == 0) {
|
if (lfrag->lsf_bytes_left == 0) {
|
||||||
|
|
||||||
/* only callback now if this was not a PUT, otherwise
|
/* remove this frag from sending list now because PML may
|
||||||
* we need to wait until last byte is ACKed
|
* decide to put it on some other list in the callback
|
||||||
*/
|
*/
|
||||||
if (frag->sf_base.uf_dst_seg[0].seg_addr.pval == NULL) {
|
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
|
||||||
|
&frag->sf_base.uf_base.super.super);
|
||||||
|
|
||||||
#if MSGDEBUG1
|
/* only callback now if this was not a PUT and we own the fragment,
|
||||||
opal_output(0, " calling back %p, len=%zd\n",
|
* otherwise we need to wait until last byte is ACKed
|
||||||
(void*)(uintptr_t)frag->sf_base.uf_base.des_cbfunc,
|
*/
|
||||||
frag->sf_size);
|
if (frag->sf_base.uf_dst_seg[0].seg_addr.pval == NULL &&
|
||||||
|
(frag->sf_base.uf_base.des_flags &
|
||||||
|
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||||
|
|
||||||
|
#if MSGDEBUG2
|
||||||
|
opal_output(0, "callback for large frag %p, len=%zd\n",
|
||||||
|
(void *)frag->sf_base.uf_base.des_cbfunc, frag->sf_size);
|
||||||
#endif
|
#endif
|
||||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||||
frag->sf_endpoint, &frag->sf_base.uf_base,
|
frag->sf_endpoint, &frag->sf_base.uf_base,
|
||||||
OMPI_SUCCESS);
|
OMPI_SUCCESS);
|
||||||
++module->pml_send_callbacks;
|
++module->pml_send_callbacks;
|
||||||
|
frag->sf_base.uf_base.des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
|
|
||||||
&frag->sf_base.uf_base.super.super);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1113,6 +1126,13 @@ ompi_btl_usnic_module_progress_sends(
|
|||||||
* Send ptr and length will be in uf_src_seg[0]
|
* Send ptr and length will be in uf_src_seg[0]
|
||||||
*/
|
*/
|
||||||
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_SMALL_SEND) {
|
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_SMALL_SEND) {
|
||||||
|
|
||||||
|
/* remove this frag from sending list now because PML may
|
||||||
|
* decide to put it on some other list in the callback
|
||||||
|
*/
|
||||||
|
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
|
||||||
|
&frag->sf_base.uf_base.super.super);
|
||||||
|
|
||||||
sfrag = (ompi_btl_usnic_small_send_frag_t *)frag;
|
sfrag = (ompi_btl_usnic_small_send_frag_t *)frag;
|
||||||
sseg = &sfrag->ssf_segment;
|
sseg = &sfrag->ssf_segment;
|
||||||
|
|
||||||
@ -1121,9 +1141,12 @@ ompi_btl_usnic_module_progress_sends(
|
|||||||
sseg->ss_base.us_btl_header->payload_len = payload_len;
|
sseg->ss_base.us_btl_header->payload_len = payload_len;
|
||||||
|
|
||||||
#if MSGDEBUG1
|
#if MSGDEBUG1
|
||||||
opal_output(0, "send small, ptr=%"PRIu64", payload=%zd, len=%"PRIu32"\n",
|
opal_output(0, "progress send small, frag=%p, ptr=%p, payload=%zd, len=%"PRIu32", ep=%p, tag=%d\n",
|
||||||
sseg->ss_base.us_sg_entry[0].addr, payload_len,
|
(void *)frag,
|
||||||
sseg->ss_base.us_sg_entry[0].length);
|
(void *)sseg->ss_base.us_sg_entry[0].addr, payload_len,
|
||||||
|
sseg->ss_base.us_sg_entry[0].length,
|
||||||
|
(void *)frag->sf_endpoint,
|
||||||
|
sseg->ss_base.us_btl_header->tag);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* post the send */
|
/* post the send */
|
||||||
@ -1131,23 +1154,30 @@ ompi_btl_usnic_module_progress_sends(
|
|||||||
|
|
||||||
/* don't do callback yet if this is a put */
|
/* don't do callback yet if this is a put */
|
||||||
if (frag->sf_base.uf_dst_seg[0].seg_addr.pval == NULL) {
|
if (frag->sf_base.uf_dst_seg[0].seg_addr.pval == NULL) {
|
||||||
#if MSGDEBUG1
|
/* we have copied the data, perform a callback if
|
||||||
opal_output(0, " calling back %p, len=%"PRIu64"\n",
|
* we own the fragment and callback is requested.
|
||||||
(void*)(uintptr_t)frag->sf_base.uf_base.des_cbfunc,
|
* If we don't own the fragment, we cannot callback yet
|
||||||
frag->sf_base.uf_src_seg[0].seg_len);
|
* because we are not done with the segment inside.
|
||||||
|
* (ACK not received yet)
|
||||||
|
*/
|
||||||
|
if ((frag->sf_base.uf_base.des_flags &
|
||||||
|
(MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
|
||||||
|
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) ==
|
||||||
|
(MCA_BTL_DES_SEND_ALWAYS_CALLBACK |
|
||||||
|
MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||||
|
#if MSGDEBUG2
|
||||||
|
opal_output(0, "callback frag small %p, len=%"PRIu64"\n",
|
||||||
|
(void*)frag, frag->sf_base.uf_src_seg[0].seg_len);
|
||||||
#endif
|
#endif
|
||||||
/* we have copied the data, proceed with callback */
|
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||||
/* could be done in usnic_send? XXX */
|
frag->sf_endpoint, &frag->sf_base.uf_base,
|
||||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
OMPI_SUCCESS);
|
||||||
frag->sf_endpoint, &frag->sf_base.uf_base,
|
++module->pml_send_callbacks;
|
||||||
OMPI_SUCCESS);
|
frag->sf_base.uf_base.des_flags &=
|
||||||
++module->pml_send_callbacks;
|
~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* remove frag from sending list */
|
|
||||||
opal_list_remove_item(&endpoint->endpoint_frag_send_queue,
|
|
||||||
&sfrag->ssf_base.sf_base.uf_base.super.super);
|
|
||||||
|
|
||||||
/* Large sends... */
|
/* Large sends... */
|
||||||
} else {
|
} else {
|
||||||
usnic_handle_large_send(module, endpoint, frag);
|
usnic_handle_large_send(module, endpoint, frag);
|
||||||
@ -1189,6 +1219,21 @@ ompi_btl_usnic_module_progress_sends(
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Initiate a send.
|
* Initiate a send.
|
||||||
|
*
|
||||||
|
* Send completion callbacks can be done from a few different places.
|
||||||
|
*
|
||||||
|
* If this is a send from a fragment we do not own, we always have
|
||||||
|
* to wait for the last ACK of the fragment, because we cannot allow
|
||||||
|
* the fragment to be re-used until we know we have no more retransmits to do.
|
||||||
|
*
|
||||||
|
* If this is a send from a fragment we own, and we know we have copied the
|
||||||
|
* data from the user's buffer, we can perform the callback immediately
|
||||||
|
* (or possibly not at all, simply returning "1" to indicate completion.
|
||||||
|
*
|
||||||
|
* If this is a send from a fragment we own and we have not yet copied out
|
||||||
|
* all the data (as is the case in a large send) then we defer the callback
|
||||||
|
* until the last of the data has been copied out by routines called
|
||||||
|
* from ompi_btl_usnic_progress_sends()
|
||||||
*/
|
*/
|
||||||
static int usnic_send(struct mca_btl_base_module_t* base_module,
|
static int usnic_send(struct mca_btl_base_module_t* base_module,
|
||||||
struct mca_btl_base_endpoint_t* base_endpoint,
|
struct mca_btl_base_endpoint_t* base_endpoint,
|
||||||
@ -1210,7 +1255,12 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
|
|||||||
opal_output(0, "usnic_send: frag=%p, endpoint=%p, tag=%d, sendreq=%p\n",
|
opal_output(0, "usnic_send: frag=%p, endpoint=%p, tag=%d, sendreq=%p\n",
|
||||||
(void *)frag, (void *)endpoint,
|
(void *)frag, (void *)endpoint,
|
||||||
tag, (void *)descriptor->des_cbdata);
|
tag, (void *)descriptor->des_cbdata);
|
||||||
opal_output(0, " data = %p\n", descriptor->des_src[0].seg_addr.pval);
|
{ int i;
|
||||||
|
for (i=0; i<descriptor->des_src_cnt; ++i)
|
||||||
|
opal_output(0, " %d: ptr:%p len:%d\n", i,
|
||||||
|
descriptor->des_src[i].seg_addr.pval,
|
||||||
|
descriptor->des_src[i].seg_len);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(frag->sf_endpoint == endpoint);
|
assert(frag->sf_endpoint == endpoint);
|
||||||
@ -1257,6 +1307,7 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
|
|||||||
|
|
||||||
sseg->ss_send_desc.send_flags |= IBV_SEND_INLINE;
|
sseg->ss_send_desc.send_flags |= IBV_SEND_INLINE;
|
||||||
sseg->ss_channel = USNIC_PRIORITY_CHANNEL;
|
sseg->ss_channel = USNIC_PRIORITY_CHANNEL;
|
||||||
|
sseg->ss_base.us_btl_header->tag = tag;
|
||||||
#if MSGDEBUG2
|
#if MSGDEBUG2
|
||||||
opal_output(0, "conv = %p\n", frag->sf_convertor);
|
opal_output(0, "conv = %p\n", frag->sf_convertor);
|
||||||
opal_output(0, " inline frag %d segs %p(%d) + %p(%d)\n",
|
opal_output(0, " inline frag %d segs %p(%d) + %p(%d)\n",
|
||||||
@ -1292,14 +1343,30 @@ static int usnic_send(struct mca_btl_base_module_t* base_module,
|
|||||||
sizeof(ompi_btl_usnic_btl_header_t) + frag->sf_size;
|
sizeof(ompi_btl_usnic_btl_header_t) + frag->sf_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If requested, callback now, else just return 1 to show completion */
|
/* If we own the frag and callback was requested, callback now,
|
||||||
if (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
* else just return 1 to show completion.
|
||||||
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
* If we don't own the frag, need to wait for ACK before
|
||||||
frag->sf_endpoint, &frag->sf_base.uf_base,
|
* performing callback on the frag
|
||||||
OMPI_SUCCESS);
|
*/
|
||||||
rc = 0;
|
if (descriptor->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
|
||||||
|
if (descriptor->des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||||
|
#if MSGDEBUG2
|
||||||
|
opal_output(0, "immediate callback for frag %p\n", (void *)frag);
|
||||||
|
#endif
|
||||||
|
frag->sf_base.uf_base.des_cbfunc(&module->super,
|
||||||
|
frag->sf_endpoint, &frag->sf_base.uf_base,
|
||||||
|
OMPI_SUCCESS);
|
||||||
|
rc = 0;
|
||||||
|
descriptor->des_flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||||
|
} else {
|
||||||
|
#if MSGDEBUG2
|
||||||
|
opal_output(0, "skipping callback for frag %p\n", (void *)frag);
|
||||||
|
#endif
|
||||||
|
rc = 1;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
rc = 1;
|
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||||
|
rc = 0;
|
||||||
}
|
}
|
||||||
++module->pml_module_sends;
|
++module->pml_module_sends;
|
||||||
++module->pml_send_callbacks; /* returning "1" is an implicit CB */
|
++module->pml_send_callbacks; /* returning "1" is an implicit CB */
|
||||||
|
@ -56,6 +56,7 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
|||||||
mca_btl_active_message_callback_t* reg;
|
mca_btl_active_message_callback_t* reg;
|
||||||
ompi_btl_usnic_endpoint_t *endpoint;
|
ompi_btl_usnic_endpoint_t *endpoint;
|
||||||
ompi_btl_usnic_btl_chunk_header_t *chunk_hdr;
|
ompi_btl_usnic_btl_chunk_header_t *chunk_hdr;
|
||||||
|
ompi_btl_usnic_btl_header_t *hdr;
|
||||||
uint32_t window_index;
|
uint32_t window_index;
|
||||||
int rc;
|
int rc;
|
||||||
#if MSGDEBUG1
|
#if MSGDEBUG1
|
||||||
@ -110,11 +111,11 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hdr = seg->rs_base.us_btl_header;
|
||||||
|
|
||||||
#if MSGDEBUG1
|
#if MSGDEBUG1
|
||||||
opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ ", len=%d\n",
|
opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ ", len=%d\n",
|
||||||
(void*) endpoint,
|
(void*) endpoint, hdr->seq, hdr->payload_len);
|
||||||
seg->rs_base.us_btl_header->seq,
|
|
||||||
seg->rs_base.us_btl_header->payload_len);
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|
||||||
opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ " from %s to %s: GOOD! (rel seq %d, lowest seq %" UDSEQ ", highest seq: %" UDSEQ ", rwstart %d) seg %p, module %p\n",
|
opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ " from %s to %s: GOOD! (rel seq %d, lowest seq %" UDSEQ ", highest seq: %" UDSEQ ", rwstart %d) seg %p, module %p\n",
|
||||||
@ -126,7 +127,7 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
|||||||
endpoint->endpoint_highest_seq_rcvd,
|
endpoint->endpoint_highest_seq_rcvd,
|
||||||
endpoint->endpoint_rfstart,
|
endpoint->endpoint_rfstart,
|
||||||
(void*) seg, (void*) module);
|
(void*) seg, (void*) module);
|
||||||
if (seg->rs_base.us_btl_header->put_addr != NULL) {
|
if (hdr->put_addr != NULL) {
|
||||||
opal_output(0, " put_addr = %p\n",
|
opal_output(0, " put_addr = %p\n",
|
||||||
seg->rs_base.us_btl_header->put_addr);
|
seg->rs_base.us_btl_header->put_addr);
|
||||||
}
|
}
|
||||||
@ -139,12 +140,10 @@ void ompi_btl_usnic_recv_call(ompi_btl_usnic_module_t *module,
|
|||||||
* the frame length to meet minimum sizes, add protocol information,
|
* the frame length to meet minimum sizes, add protocol information,
|
||||||
* etc.
|
* etc.
|
||||||
*/
|
*/
|
||||||
if (seg->rs_base.us_btl_header->put_addr == NULL) {
|
if (hdr->put_addr == NULL) {
|
||||||
reg = mca_btl_base_active_message_trigger +
|
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||||
bseg->us_payload.pml_header->tag;
|
seg->rs_segment.seg_len = hdr->payload_len;
|
||||||
seg->rs_segment.seg_len = bseg->us_btl_header->payload_len;
|
reg->cbfunc(&module->super, hdr->tag, &seg->rs_desc, reg->cbdata);
|
||||||
reg->cbfunc(&module->super, bseg->us_payload.pml_header->tag,
|
|
||||||
&seg->rs_desc, reg->cbdata);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If this is a PUT, need to copy it to user buffer
|
* If this is a PUT, need to copy it to user buffer
|
||||||
@ -269,15 +268,10 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
|
|||||||
|
|
||||||
fip->rfi_bytes_left -= chunk_hdr->ch_hdr.payload_len;
|
fip->rfi_bytes_left -= chunk_hdr->ch_hdr.payload_len;
|
||||||
if (0 == fip->rfi_bytes_left) {
|
if (0 == fip->rfi_bytes_left) {
|
||||||
mca_btl_base_header_t *pml_header;
|
|
||||||
mca_btl_base_descriptor_t desc;
|
mca_btl_base_descriptor_t desc;
|
||||||
mca_btl_base_segment_t segment;
|
mca_btl_base_segment_t segment;
|
||||||
|
|
||||||
/* Get access to PML header in assembled fragment so we
|
segment.seg_addr.pval = fip->rfi_data;
|
||||||
* can pull out the tag
|
|
||||||
*/
|
|
||||||
pml_header = (mca_btl_base_header_t *)(fip->rfi_data);
|
|
||||||
segment.seg_addr.pval = pml_header;
|
|
||||||
segment.seg_len = fip->rfi_frag_size;
|
segment.seg_len = fip->rfi_frag_size;
|
||||||
desc.des_dst = &segment;
|
desc.des_dst = &segment;
|
||||||
desc.des_dst_cnt = 1;
|
desc.des_dst_cnt = 1;
|
||||||
@ -289,12 +283,13 @@ opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr);
|
|||||||
#if MSGDEBUG2
|
#if MSGDEBUG2
|
||||||
opal_output(0, " large FRAG complete, pass up %p, %"PRIu64" bytes, tag=%d\n",
|
opal_output(0, " large FRAG complete, pass up %p, %"PRIu64" bytes, tag=%d\n",
|
||||||
desc.des_dst->seg_addr.pval, desc.des_dst->seg_len,
|
desc.des_dst->seg_addr.pval, desc.des_dst->seg_len,
|
||||||
pml_header->tag);
|
chunk_hdr->ch_hdr.tag);
|
||||||
#endif
|
#endif
|
||||||
reg = mca_btl_base_active_message_trigger + pml_header->tag;
|
reg = mca_btl_base_active_message_trigger +
|
||||||
|
chunk_hdr->ch_hdr.tag;
|
||||||
|
|
||||||
/* mca_pml_ob1_recv_frag_callback_frag() */
|
/* mca_pml_ob1_recv_frag_callback_frag() */
|
||||||
reg->cbfunc(&module->super, pml_header->tag,
|
reg->cbfunc(&module->super, chunk_hdr->ch_hdr.tag,
|
||||||
&desc, reg->cbdata);
|
&desc, reg->cbdata);
|
||||||
|
|
||||||
/* free temp buffer for non-put */
|
/* free temp buffer for non-put */
|
||||||
|
@ -283,10 +283,9 @@ ompi_btl_usnic_recv_fast(ompi_btl_usnic_module_t *module,
|
|||||||
* the frame length to meet minimum sizes, add protocol information,
|
* the frame length to meet minimum sizes, add protocol information,
|
||||||
* etc.
|
* etc.
|
||||||
*/
|
*/
|
||||||
reg = mca_btl_base_active_message_trigger +
|
reg = mca_btl_base_active_message_trigger + bseg->us_btl_header->tag;
|
||||||
bseg->us_payload.pml_header->tag;
|
|
||||||
seg->rs_segment.seg_len = bseg->us_btl_header->payload_len;
|
seg->rs_segment.seg_len = bseg->us_btl_header->payload_len;
|
||||||
reg->cbfunc(&module->super, bseg->us_payload.pml_header->tag,
|
reg->cbfunc(&module->super, bseg->us_btl_header->tag,
|
||||||
&seg->rs_desc, reg->cbdata);
|
&seg->rs_desc, reg->cbdata);
|
||||||
|
|
||||||
drop:
|
drop:
|
||||||
@ -384,10 +383,9 @@ ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module,
|
|||||||
* the frame length to meet minimum sizes, add protocol information,
|
* the frame length to meet minimum sizes, add protocol information,
|
||||||
* etc.
|
* etc.
|
||||||
*/
|
*/
|
||||||
reg = mca_btl_base_active_message_trigger +
|
reg = mca_btl_base_active_message_trigger + bseg->us_btl_header->tag;
|
||||||
bseg->us_payload.pml_header->tag;
|
|
||||||
seg->rs_segment.seg_len = bseg->us_btl_header->payload_len;
|
seg->rs_segment.seg_len = bseg->us_btl_header->payload_len;
|
||||||
reg->cbfunc(&module->super, bseg->us_payload.pml_header->tag,
|
reg->cbfunc(&module->super, bseg->us_btl_header->tag,
|
||||||
&seg->rs_desc, reg->cbdata);
|
&seg->rs_desc, reg->cbdata);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -152,6 +152,7 @@ ompi_btl_usnic_send_slower(
|
|||||||
|
|
||||||
/* use standard channel */
|
/* use standard channel */
|
||||||
sseg->ss_channel = USNIC_DATA_CHANNEL;
|
sseg->ss_channel = USNIC_DATA_CHANNEL;
|
||||||
|
sseg->ss_base.us_btl_header->tag = tag;
|
||||||
#if MSGDEBUG2
|
#if MSGDEBUG2
|
||||||
opal_output(0, " small frag %d segs %p(%d) + %p(%d)\n",
|
opal_output(0, " small frag %d segs %p(%d) + %p(%d)\n",
|
||||||
(int)frag->sf_base.uf_base.des_src_cnt,
|
(int)frag->sf_base.uf_base.des_src_cnt,
|
||||||
@ -166,6 +167,12 @@ ompi_btl_usnic_send_slower(
|
|||||||
(void *)sseg->ss_send_desc.sg_list[1].addr,
|
(void *)sseg->ss_send_desc.sg_list[1].addr,
|
||||||
sseg->ss_send_desc.sg_list[1].length);
|
sseg->ss_send_desc.sg_list[1].length);
|
||||||
#endif
|
#endif
|
||||||
|
} else {
|
||||||
|
ompi_btl_usnic_large_send_frag_t *lfrag;
|
||||||
|
|
||||||
|
lfrag = (ompi_btl_usnic_large_send_frag_t *)frag;
|
||||||
|
|
||||||
|
lfrag->lsf_tag = tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* queue this fragment into the send engine */
|
/* queue this fragment into the send engine */
|
||||||
|
@ -53,6 +53,21 @@ ompi_btl_usnic_check_rts(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if MSGDEBUG2
|
||||||
|
static inline
|
||||||
|
int sge_total(struct ibv_send_wr *wr)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int len;
|
||||||
|
len=0;
|
||||||
|
for (i=0; i<wr->num_sge; ++i) {
|
||||||
|
len += wr->sg_list[i].length;
|
||||||
|
}
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Common point for posting a segment to VERBS
|
* Common point for posting a segment to VERBS
|
||||||
*/
|
*/
|
||||||
@ -68,10 +83,11 @@ ompi_btl_usnic_post_segment(
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
#if MSGDEBUG1
|
#if MSGDEBUG1
|
||||||
opal_output(0, "post_send: type=%d, addr=%p, len=%d\n",
|
opal_output(0, "post_send: type=%s, addr=%p, len=%d, payload=%d\n",
|
||||||
sseg->ss_base.us_type,
|
usnic_seg_type(sseg->ss_base.us_type),
|
||||||
(void*) sseg->ss_send_desc.sg_list->addr,
|
(void*) sseg->ss_send_desc.sg_list->addr,
|
||||||
sseg->ss_send_desc.sg_list->length);
|
sge_total(&sseg->ss_send_desc),
|
||||||
|
sseg->ss_base.us_btl_header->payload_len);
|
||||||
/*ompi_btl_usnic_dump_hex((void *)(sseg->ss_send_desc.sg_list->addr + sizeof(ompi_btl_usnic_btl_header_t)), 16); */
|
/*ompi_btl_usnic_dump_hex((void *)(sseg->ss_send_desc.sg_list->addr + sizeof(ompi_btl_usnic_btl_header_t)), 16); */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -215,8 +231,9 @@ ompi_btl_usnic_endpoint_enqueue_frag(
|
|||||||
ompi_btl_usnic_send_frag_t *frag)
|
ompi_btl_usnic_send_frag_t *frag)
|
||||||
{
|
{
|
||||||
#if MSGDEBUG1
|
#if MSGDEBUG1
|
||||||
opal_output(0, "enq_frag: frag=%p, endpoint=%p, type=%d, len=%"PRIu64"\n",
|
opal_output(0, "enq_frag: frag=%p, endpoint=%p, %s, len=%"PRIu64"\n",
|
||||||
(void*)frag, (void*)endpoint, frag->sf_base.uf_type,
|
(void*)frag, (void*)endpoint,
|
||||||
|
usnic_frag_type(frag->sf_base.uf_type),
|
||||||
frag->sf_base.uf_base.des_src->seg_len);
|
frag->sf_base.uf_base.des_src->seg_len);
|
||||||
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_LARGE_SEND) {
|
if (frag->sf_base.uf_type == OMPI_BTL_USNIC_FRAG_LARGE_SEND) {
|
||||||
ompi_btl_usnic_large_send_frag_t *lfrag;
|
ompi_btl_usnic_large_send_frag_t *lfrag;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user