Ensure async progress for long unexpected messages by waiting for an
event on the ME. The events we're likely to see are LINK (the ME was added to the match list), PUT (weird to see first, but means that the ME was linked to the match list and then matched), or PUT_OVERFLOW, meaning the message was unexpected. This commit was SVN r26199.
Этот коммит содержится в:
родитель
2a26d0f9a2
Коммит
451af0e832
@ -471,6 +471,7 @@ ompi_mtl_portals4_progress(void)
|
||||
case PTL_EVENT_AUTO_FREE:
|
||||
case PTL_EVENT_AUTO_UNLINK:
|
||||
case PTL_EVENT_SEARCH:
|
||||
case PTL_EVENT_LINK:
|
||||
if (NULL != ev.user_ptr) {
|
||||
ptl_request = ev.user_ptr;
|
||||
ret = ptl_request->event_callback(&ev, ptl_request);
|
||||
@ -497,14 +498,13 @@ ompi_mtl_portals4_progress(void)
|
||||
#endif
|
||||
break;
|
||||
|
||||
case PTL_EVENT_LINK:
|
||||
case PTL_EVENT_GET_OVERFLOW:
|
||||
case PTL_EVENT_FETCH_ATOMIC:
|
||||
case PTL_EVENT_FETCH_ATOMIC_OVERFLOW:
|
||||
case PTL_EVENT_ATOMIC:
|
||||
case PTL_EVENT_ATOMIC_OVERFLOW:
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"Unexpected event of type %d", ev.type);
|
||||
opal_output(ompi_mtl_base_output,
|
||||
"Unexpected event of type %d", ev.type);
|
||||
}
|
||||
} else if (PTL_EQ_EMPTY == ret) {
|
||||
break;
|
||||
|
@ -44,6 +44,10 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
||||
(ompi_mtl_portals4_recv_request_t*) ptl_base_request;
|
||||
size_t msg_length = 0;
|
||||
|
||||
/* as soon as we've seen any event associated with a request, it's
|
||||
started */
|
||||
ptl_request->req_started = true;
|
||||
|
||||
switch (ev->type) {
|
||||
case PTL_EVENT_PUT:
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event",
|
||||
@ -295,6 +299,9 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
||||
|
||||
break;
|
||||
|
||||
case PTL_EVENT_LINK:
|
||||
break;
|
||||
|
||||
default:
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"Unhandled receive callback with event type %d",
|
||||
@ -358,6 +365,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
ptl_request->convertor = convertor;
|
||||
ptl_request->delivery_ptr = start;
|
||||
ptl_request->delivery_len = length;
|
||||
ptl_request->req_started = false;
|
||||
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||
@ -374,8 +382,10 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
me.options =
|
||||
PTL_ME_OP_PUT |
|
||||
PTL_ME_USE_ONCE |
|
||||
PTL_ME_EVENT_LINK_DISABLE | /* BWB: FIX ME */
|
||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||
if (length <= ompi_mtl_portals4.eager_limit) {
|
||||
me.options |= PTL_ME_EVENT_LINK_DISABLE;
|
||||
}
|
||||
me.match_id = remote_proc;
|
||||
me.match_bits = match_bits;
|
||||
me.ignore_bits = ignore_bits;
|
||||
@ -394,6 +404,16 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
}
|
||||
|
||||
/* if a long message, spin until we either have a comm event or a
|
||||
link event, guaranteeing progress for long unexpected
|
||||
messages. */
|
||||
if (length > ompi_mtl_portals4.eager_limit) {
|
||||
while (true != ptl_request->req_started) {
|
||||
ompi_mtl_portals4_progress();
|
||||
opal_atomic_rmb();
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,7 @@ struct ompi_mtl_portals4_recv_request_t {
|
||||
struct opal_convertor_t *convertor;
|
||||
void *delivery_ptr;
|
||||
size_t delivery_len;
|
||||
volatile bool req_started;
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
int opcount;
|
||||
ptl_hdr_data_t hdr_data;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user