diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index e79000bc9a..b8f312804c 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -53,6 +53,8 @@ struct mca_mtl_portals4_module_t { /* Use flow control: 1 (true) : 0 (false) */ int32_t use_flowctl; + /** Short limit; Size limit for short messages */ + uint64_t short_limit; /** Eager limit; messages greater than this use a rendezvous protocol */ uint64_t eager_limit; /** Size of short message blocks */ diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 1c0851c3f6..eef04c55dc 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -100,6 +100,18 @@ ompi_mtl_portals4_component_register(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, ¶m_priority); + ompi_mtl_portals4.short_limit = 2 * 1024; + (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, + "short_limit", + "Size limit for short messages", + MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG, + NULL, + 0, + 0, + OPAL_INFO_LVL_5, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_mtl_portals4.short_limit); + ompi_mtl_portals4.eager_limit = 2 * 1024; (void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version, @@ -196,6 +208,9 @@ ompi_mtl_portals4_component_open(void) "no" #endif ); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Short limit: %d", (int) + ompi_mtl_portals4.short_limit); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Eager limit: %d", (int) ompi_mtl_portals4.eager_limit); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 92b9b4797b..ca7f506bde 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -114,16 +114,16 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, #endif ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; - if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) { - /* If it's not a short message and we're doing rndv, we + if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv && msg_length != ev->mlength) { + /* If it's not a short message and we're doing rndv and the message is not complete, we only have the first part of the message. Issue the get to pull the second part of the message. */ - ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit, + ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit, + ptl_request->delivery_len : msg_length) - ev->mlength, ev->initiator, ev->hdr_data, - ompi_mtl_portals4.eager_limit, + ev->mlength, ptl_request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); @@ -164,7 +164,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, } /* set the received length in the status, now that we know - excatly how much data was sent. */ + exactly how much data was sent. */ ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength; #if OMPI_MTL_PORTALS4_FLOW_CONTROL @@ -280,12 +280,12 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev, /* For long messages in the overflow list, ev->mlength = 0 */ ptl_request->super.super.ompi_req->req_status._ucount = 0; - ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, - ((msg_length > ptl_request->delivery_len) ? - ptl_request->delivery_len : msg_length) - ev->mlength, + ret = read_msg((char*) ptl_request->delivery_ptr, + (msg_length > ptl_request->delivery_len) ? + ptl_request->delivery_len : msg_length, ev->initiator, ev->hdr_data, - ev->mlength, + 0, ptl_request); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); @@ -383,7 +383,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, PTL_ME_OP_PUT | PTL_ME_USE_ONCE | PTL_ME_EVENT_UNLINK_DISABLE; - if (length <= ompi_mtl_portals4.eager_limit) { + if (length <= ompi_mtl_portals4.short_limit) { me.options |= PTL_ME_EVENT_LINK_DISABLE; } me.match_id = remote_proc; @@ -407,7 +407,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, /* if a long message, spin until we either have a comm event or a link event, guaranteeing progress for long unexpected messages. */ - if (length > ompi_mtl_portals4.eager_limit) { + if (length > ompi_mtl_portals4.short_limit) { while (true != ptl_request->req_started) { ompi_mtl_portals4_progress(); } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c index 936b92e1ec..23cd049022 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv_short.c @@ -191,7 +191,7 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block) me.start = block->start; me.length = ompi_mtl_portals4.recv_short_size; me.ct_handle = PTL_CT_NONE; - me.min_free = ompi_mtl_portals4.eager_limit; + me.min_free = ompi_mtl_portals4.short_limit; me.uid = ompi_mtl_portals4.uid; me.options = PTL_ME_OP_PUT | diff --git a/ompi/mca/mtl/portals4/mtl_portals4_request.h b/ompi/mca/mtl/portals4/mtl_portals4_request.h index f76846115c..eb814dafa7 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_request.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_request.h @@ -52,6 +52,7 @@ struct ompi_mtl_portals4_isend_request_t { #if OMPI_MTL_PORTALS4_FLOW_CONTROL struct ompi_mtl_portals4_pending_request_t *pending; #endif + ptl_size_t length; uint32_t event_count; }; typedef struct ompi_mtl_portals4_isend_request_t ompi_mtl_portals4_isend_request_t; diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 4ee2e77532..d6b39a994b 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -91,9 +91,11 @@ ompi_mtl_portals4_callback(ptl_event_t *ev, if ((PTL_EVENT_ACK == ev->type) && (PTL_PRIORITY_LIST == ev->ptl_list) && - (eager == ompi_mtl_portals4.protocol) && + (ev->mlength == ptl_request->length) && (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE))) { - /* long expected messages with the eager protocol won't see a + /* long expected messages with the eager protocol + (and also with the rndv protocol if the length + is less or egal to eager_limit) won't see a get event to complete the message. Give them an extra count to cause the message to complete with just the SEND and ACK events and remove the ME. (we wait for the counter @@ -307,8 +309,10 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag, "Send %lu long send with hdr_data 0x%lx (0x%lx)", ptl_request->opcount, hdr_data, match_bits)); - put_length = (rndv == ompi_mtl_portals4.protocol) ? - (ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length; + if ((rndv == ompi_mtl_portals4.protocol) && ((ptl_size_t) length > (ptl_size_t) ompi_mtl_portals4.eager_limit)) + put_length = (ptl_size_t) ompi_mtl_portals4.eager_limit; + else put_length = (ptl_size_t) length; + ret = PtlPut(ompi_mtl_portals4.send_md_h, (ptl_size_t) start, @@ -355,7 +359,7 @@ ompi_mtl_portals4_pending_list_progress() } pending = (ompi_mtl_portals4_pending_request_t*) item; - if (pending->length <= ompi_mtl_portals4.eager_limit) { + if (pending->length <= ompi_mtl_portals4.short_limit) { ret = ompi_mtl_portals4_short_isend(pending->mode, pending->start, pending->length, @@ -414,6 +418,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1); ptl_request->buffer_ptr = (free_after) ? start : NULL; + ptl_request->length = length; ptl_request->event_count = 0; OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, @@ -461,7 +466,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, return OMPI_SUCCESS; } #endif - if (length <= ompi_mtl_portals4.eager_limit) { + if (length <= ompi_mtl_portals4.short_limit) { ret = ompi_mtl_portals4_short_isend(mode, start, length,