From bb9e73232aa7c0d3979e8d36342fc12f5b303ef4 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Wed, 28 Sep 2011 21:18:47 +0000 Subject: [PATCH] * Leverage hdr_data and opcount to improve debugging * Clean up handling of short synchronous messages This commit was SVN r25208. --- ompi/mca/mtl/portals4/mtl_portals4.c | 73 +------------------ ompi/mca/mtl/portals4/mtl_portals4.h | 30 +++++--- .../mca/mtl/portals4/mtl_portals4_component.c | 19 ++--- ompi/mca/mtl/portals4/mtl_portals4_probe.c | 2 +- ompi/mca/mtl/portals4/mtl_portals4_recv.c | 44 +++++++++-- ompi/mca/mtl/portals4/mtl_portals4_request.h | 6 +- ompi/mca/mtl/portals4/mtl_portals4_send.c | 53 +++++++++----- 7 files changed, 105 insertions(+), 122 deletions(-) diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index 5a3c0cd0ff..1cbb243ca4 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -166,7 +166,7 @@ ompi_mtl_portals4_progress(void) while (true) { ret = PtlEQGet(ompi_mtl_portals4.eq_h, &ev); if (PTL_OK == ret) { - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, + OPAL_OUTPUT_VERBOSE((60, ompi_mtl_base_output, "Found event of type %d\n", ev.type)); switch (ev.type) { case PTL_EVENT_GET: @@ -234,77 +234,6 @@ ompi_mtl_portals4_progress(void) "Error returned from PtlEQGet: %d", ret); abort(); } - - ret = PtlEQGet(ompi_mtl_portals4.tmp_eq_h, &ev); - if (PTL_OK == ret) { - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, - "Found event of type %d\n", ev.type)); - switch (ev.type) { - case PTL_EVENT_GET: - case PTL_EVENT_PUT: - case PTL_EVENT_PUT_OVERFLOW: - case PTL_EVENT_ATOMIC: - case PTL_EVENT_ATOMIC_OVERFLOW: - if (NULL != ev.user_ptr) { - ptl_request = ev.user_ptr; - ret = ptl_request->event_callback(&ev, ptl_request); - if (OMPI_SUCCESS != ret) { - opal_output(ompi_mtl_base_output, - "Error returned from target event callback: %d", ret); - abort(); - } - } - break; - case PTL_EVENT_REPLY: - case PTL_EVENT_SEND: - case PTL_EVENT_ACK: - if (NULL != ev.user_ptr) { - ptl_request = ev.user_ptr; - ret = ptl_request->event_callback(&ev, ptl_request); - if (OMPI_SUCCESS != ret) { - opal_output(ompi_mtl_base_output, - "Error returned from initiator event callback: %d", ret); - abort(); - } - } - break; - case PTL_EVENT_PT_DISABLED: - /* do stuff - flow control */ - opal_output(ompi_mtl_base_output, "Unhandled read flow control event."); - abort(); - break; - case PTL_EVENT_AUTO_UNLINK: - break; - case PTL_EVENT_AUTO_FREE: - if (OMPI_SUCCESS != (ret = ompi_mtl_portals4_recv_short_block_repost(&ev))) { - opal_output(ompi_mtl_base_output, - "Error returned from PTL_EVENT_FREE callback: %d", ret); - abort(); - } - break; - case PTL_EVENT_SEARCH: - if (NULL != ev.user_ptr) { - ptl_request = ev.user_ptr; - ret = ptl_request->event_callback(&ev, ptl_request); - if (OMPI_SUCCESS != ret) { - opal_output(ompi_mtl_base_output, - "Error returned from target event callback: %d", ret); - abort(); - } - } - break; - default: - opal_output(ompi_mtl_base_output, - "Unknown event type %d (error: %d)", (int)ev.type, ret); - abort(); - } - } else if (PTL_EQ_EMPTY == ret) { - break; - } else { - opal_output(ompi_mtl_base_output, - "Error returned from PtlEQGet: %d", ret); - abort(); - } } return count; diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index 0f51dcadb9..2a0b4b9b2f 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -48,7 +48,6 @@ struct mca_mtl_portals4_module_t { /* global handles */ ptl_handle_ni_t ni_h; ptl_handle_eq_t eq_h; - ptl_handle_eq_t tmp_eq_h; /* for zero-length sends and acks */ ptl_handle_md_t zero_md_h; @@ -58,8 +57,11 @@ struct mca_mtl_portals4_module_t { opal_list_t recv_short_blocks; - /* number of send-side operations started */ + /* number of operations started */ uint32_t opcount; +#if OPAL_ENABLE_DEBUG + uint32_t recv_opcount; +#endif enum { eager, rndv } protocol; }; @@ -91,9 +93,8 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4; #define MTL_PORTALS4_TAG_IGNR 0x000000007FFFFFFFULL #define MTL_PORTALS4_SHORT_MSG 0x1000000000000000ULL -#define MTL_PORTALS4_SHORT_SYNC_MSG 0x2000000000000000ULL -#define MTL_PORTALS4_LONG_MSG 0x4000000000000000ULL -#define MTL_PORTALS4_READY_MSG 0x8000000000000000ULL +#define MTL_PORTALS4_LONG_MSG 0x2000000000000000ULL +#define MTL_PORTALS4_READY_MSG 0x4000000000000000ULL /* send posting */ #define MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, source, tag, type) \ @@ -135,22 +136,27 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4; (0 != (MTL_PORTALS4_LONG_MSG & match_bits)) #define MTL_PORTALS4_IS_READY_MSG(match_bits) \ (0 != (MTL_PORTALS4_READY_MSG & match_bits)) -#define MTL_PORTALS4_IS_SYNC_MSG(match_bits) \ - (0 != (MTL_PORTALS4_SHORT_SYNC_MSG & match_bits)) #define MTL_PORTALS4_GET_TAG(match_bits) \ ((int)(match_bits & MTL_PORTALS4_TAG_MASK)) #define MTL_PORTALS4_GET_SOURCE(match_bits) \ ((int)((match_bits & MTL_PORTALS4_SOURCE_MASK) >> 32)) -#define MTL_PORTALS4_SET_HDR_DATA(hdr_data, opcount, length) \ - { \ - hdr_data = opcount & 0xFFFFULL; \ - hdr_data = (hdr_data << 48); \ - hdr_data |= (length & 0xFFFFFFFFFFFFULL); \ + +#define MTL_PORTALS4_SYNC_MSG 0x8000000000000000ULL + +#define MTL_PORTALS4_SET_HDR_DATA(hdr_data, opcount, length, sync) \ + { \ + hdr_data = (sync) ? 1 : 0; \ + hdr_data = (hdr_data << 15); \ + hdr_data |= opcount & 0x7FFFULL; \ + hdr_data = (hdr_data << 48); \ + hdr_data |= (length & 0xFFFFFFFFFFFFULL); \ } #define MTL_PORTALS4_GET_LENGTH(hdr_data) ((size_t)(hdr_data & 0xFFFFFFFFFFFFULL)) +#define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \ + (0 != (MTL_PORTALS4_SYNC_MSG & hdr_data)) /* MTL interface functions */ extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 72b2572b23..4064a13822 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -200,6 +200,10 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, goto error; } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, + "My nid,pid = %x,%x", + id.phys.nid, id.phys.pid)); + /* create event queue */ ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.queue_size, @@ -211,16 +215,6 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, goto error; } - ret = PtlEQAlloc(ompi_mtl_portals4.ni_h, - ompi_mtl_portals4.queue_size, - &ompi_mtl_portals4.tmp_eq_h); - if (PTL_OK != ret) { - opal_output(ompi_mtl_base_output, - "%s:%d: PtlEQAlloc failed: %d\n", - __FILE__, __LINE__, ret); - goto error; - } - /* Create portal table entries */ ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, PTL_PT_FLOWCTRL, @@ -235,7 +229,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, } ret = PtlPTAlloc(ompi_mtl_portals4.ni_h, PTL_PT_FLOWCTRL, - ompi_mtl_portals4.tmp_eq_h, + ompi_mtl_portals4.eq_h, REQ_READ_TABLE_ID, &ompi_mtl_portals4.read_idx); if (PTL_OK != ret) { @@ -296,6 +290,9 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, } ompi_mtl_portals4.opcount = 0; +#if OPAL_ENABLE_DEBUG + ompi_mtl_portals4.recv_opcount = 0; +#endif /* activate progress callback */ ret = opal_progress_register(ompi_mtl_portals4_progress); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_probe.c b/ompi/mca/mtl/portals4/mtl_portals4_probe.c index a491d2964d..8d3ed0d28b 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_probe.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_probe.c @@ -110,5 +110,5 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl, *status = request.status; } - return OMPI_ERR_NOT_IMPLEMENTED; + return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 5521b9cb0e..b6b016afd2 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -44,6 +44,9 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, switch (ev->type) { case PTL_EVENT_PUT: + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event", + ptl_request->opcount, ev->hdr_data)); + if (ev->ni_fail_type != PTL_NI_OK) { opal_output(ompi_mtl_base_output, "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d", @@ -57,9 +60,15 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, ptl_request->super.super.ompi_req->req_status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); if (msg_length > ptl_request->delivery_len) { + opal_output(ompi_mtl_base_output, "truncate: %d %d", + msg_length, ptl_request->delivery_len); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; } +#if OPAL_ENABLE_DEBUG + ptl_request->hdr_data = ev->hdr_data; +#endif + if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) { ptl_md_t md; @@ -111,12 +120,16 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed")); + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, expected", + ptl_request->opcount, ptl_request->hdr_data)); ptl_request->super.super.completion_callback(&ptl_request->super.super); } break; case PTL_EVENT_REPLY: + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got reply event", + ptl_request->opcount, ptl_request->hdr_data)); + if (ev->ni_fail_type != PTL_NI_OK) { opal_output(ompi_mtl_base_output, "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d", @@ -144,11 +157,15 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } PtlMDRelease(ptl_request->md_h); - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed")); + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, reply", + ptl_request->opcount, ptl_request->hdr_data)); ptl_request->super.super.completion_callback(&ptl_request->super.super); break; case PTL_EVENT_PUT_OVERFLOW: + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put_overflow event", + ptl_request->opcount, ev->hdr_data)); + if (ev->ni_fail_type != PTL_NI_OK) { opal_output(ompi_mtl_base_output, "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d", @@ -162,9 +179,15 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, ptl_request->super.super.ompi_req->req_status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); if (msg_length > ptl_request->delivery_len) { + opal_output(ompi_mtl_base_output, "truncate: %d %d", + msg_length, ptl_request->delivery_len); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; } +#if OPAL_ENABLE_DEBUG + ptl_request->hdr_data = ev->hdr_data; +#endif + /* overflow case. Short messages have the buffer stashed somewhere. Long messages left in buffer at the source */ if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) { @@ -188,9 +211,10 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, goto callback_error; } } - /* if it's a sync, send the ack */ - if (MTL_PORTALS4_IS_SYNC_MSG(ev->match_bits)) { + if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) { + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) sending sync ack", + ptl_request->opcount, ptl_request->hdr_data)); ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, 0, @@ -209,7 +233,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } } - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed")); + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short", + ptl_request->opcount, ptl_request->hdr_data)); ptl_request->super.super.completion_callback(&ptl_request->super.super); } else { @@ -233,6 +258,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, goto callback_error; } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) getting long data", + ptl_request->opcount, ptl_request->hdr_data)); ret = PtlGet(ptl_request->md_h, 0, md.length, @@ -309,6 +336,10 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, return ret; } +#if OPAL_ENABLE_DEBUG + ptl_request->opcount = ++ompi_mtl_portals4.recv_opcount; + ptl_request->hdr_data = 0; +#endif ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->convertor = convertor; @@ -317,7 +348,8 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, - "Recv from %x,%x of length %d\n", + "Recv %d from %x,%x of length %d\n", + ptl_request->opcount, remote_proc.phys.nid, remote_proc.phys.pid, (int)length)); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index 5e9702eab8..81dc2f264d 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -37,6 +37,7 @@ struct ompi_mtl_portals4_send_request_t { ptl_handle_md_t md_h; ptl_handle_me_t me_h; volatile int event_count; + int opcount; }; typedef struct ompi_mtl_portals4_send_request_t ompi_mtl_portals4_send_request_t; @@ -46,10 +47,13 @@ struct ompi_mtl_portals4_recv_request_t { void *buffer_ptr; ptl_handle_md_t md_h; ptl_handle_me_t me_h; - ptl_handle_ct_t ct_h; struct opal_convertor_t *convertor; void *delivery_ptr; size_t delivery_len; +#if OPAL_ENABLE_DEBUG + int opcount; + ptl_hdr_data_t hdr_data; +#endif }; typedef struct ompi_mtl_portals4_recv_request_t ompi_mtl_portals4_recv_request_t; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index ad647af519..9a251b1f2c 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -30,24 +30,28 @@ static int -ompi_mtl_portals4_callback(ptl_event_t *ev, struct ompi_mtl_portals4_base_request_t* ptl_base_request) +ompi_mtl_portals4_send_callback(ptl_event_t *ev, struct ompi_mtl_portals4_base_request_t* ptl_base_request) { int ret; ompi_mtl_portals4_send_request_t* ptl_request = (ompi_mtl_portals4_send_request_t*) ptl_base_request; - assert(ev->type == PTL_EVENT_SEND || ev->type == PTL_EVENT_ACK || ev->type == PTL_EVENT_GET); assert(NULL != ptl_request->super.super.ompi_req); if (ev->ni_fail_type != PTL_NI_OK) { - opal_output_verbose(1, ompi_mtl_base_output, - "%s:%d: long send callback ni_fail_type: %d", - __FILE__, __LINE__, ev->ni_fail_type); + opal_output(ompi_mtl_base_output, + "%s:%d: send callback ni_fail_type: %d", + __FILE__, __LINE__, ev->ni_fail_type); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR; ptl_request->super.super.completion_callback(&ptl_request->super.super); + abort(); return OMPI_ERROR; } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, + "Send %d got event of type %d", + ptl_request->opcount, ev->type)); + /* we only receive an ack if the message was received into an expected message. Otherwise, we don't get an ack, but mark completion when the message was pulled (long message). A short @@ -59,11 +63,12 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, struct ompi_mtl_portals4_base_reques ret = PtlMDRelease(ptl_request->md_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_output, - "%s:%d: long send callback PtlMDRelease returned %d", + "%s:%d: send callback PtlMDRelease returned %d", __FILE__, __LINE__, ret); ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR; } - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed")); + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Send %d completed", + ptl_request->opcount)); ptl_request->super.super.completion_callback(&ptl_request->super.super); } @@ -72,7 +77,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, struct ompi_mtl_portals4_base_reques ret = PtlMEUnlink(ptl_request->me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_mtl_base_output, - "%s:%d: long send callback PtlMDUnlink returned %d", + "%s:%d: send callback PtlMDUnlink returned %d", __FILE__, __LINE__, ret); } } @@ -93,13 +98,13 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, ptl_hdr_data_t hdr_data; ptl_md_t md; - ptl_request->super.event_callback = ompi_mtl_portals4_callback; + ptl_request->super.event_callback = ompi_mtl_portals4_send_callback; ptl_request->event_count = 1; mode_bits = (MCA_PML_BASE_SEND_READY != mode) ? MTL_PORTALS4_SHORT_MSG : MTL_PORTALS4_READY_MSG; MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, mode_bits); - MTL_PORTALS4_SET_HDR_DATA(hdr_data, ompi_mtl_portals4.opcount, length); + MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0); md.start = start; md.length = length; @@ -117,6 +122,10 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, return ompi_mtl_portals4_get_error(ret); } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, + "Send %d short send with hdr_data 0x%lx", + ptl_request->opcount, hdr_data)); + ret = PtlPut(ptl_request->md_h, 0, length, @@ -151,12 +160,12 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int tag, ptl_me_t me; ptl_hdr_data_t hdr_data; - ptl_request->super.event_callback = ompi_mtl_portals4_callback; + ptl_request->super.event_callback = ompi_mtl_portals4_send_callback; MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, - MTL_PORTALS4_SHORT_SYNC_MSG); + MTL_PORTALS4_SHORT_MSG); - MTL_PORTALS4_SET_HDR_DATA(hdr_data, ompi_mtl_portals4.opcount, length); + MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 1); md.start = start; md.length = length; @@ -198,7 +207,10 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int tag, return ompi_mtl_portals4_get_error(ret); } - printf("sync send started\n"); fflush(NULL); + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, + "Send %d short sync send with hdr_data 0x%lx", + ptl_request->opcount, hdr_data)); + ret = PtlPut(ptl_request->md_h, 0, length, @@ -235,11 +247,11 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag, ptl_me_t me; ptl_hdr_data_t hdr_data; - ptl_request->super.event_callback = ompi_mtl_portals4_callback; + ptl_request->super.event_callback = ompi_mtl_portals4_send_callback; MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_LONG_MSG); - MTL_PORTALS4_SET_HDR_DATA(hdr_data, ompi_mtl_portals4.opcount, length); + MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0); md.start = start; md.length = length; @@ -281,6 +293,10 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int tag, return ompi_mtl_portals4_get_error(ret); } + OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, + "Send %d long send with hdr_data 0x%lx", + ptl_request->opcount, hdr_data)); + if (ompi_mtl_portals4.protocol == rndv) { ret = PtlPut(ptl_request->md_h, 0, @@ -340,15 +356,14 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl, ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); if (OMPI_SUCCESS != ret) return ret; + ptl_request->opcount = ++ompi_mtl_portals4.opcount; ptl_request->buffer_ptr = (free_after) ? start : NULL; ptl_request->event_count = 0; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; - ompi_mtl_portals4.opcount++; - OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Send %d to %x,%x of length %d\n", - ompi_mtl_portals4.opcount, + ptl_request->opcount, endpoint->ptl_proc.phys.nid, endpoint->ptl_proc.phys.pid, (int)length));