1
1

Ensure async progress for long unexpected messages by waiting for an

event on the ME.  The events we're likely to see are LINK (the ME was
added to the match list), PUT (weird to see first, but means that the ME
was linked to the match list and then matched), or PUT_OVERFLOW, meaning
the message was unexpected.

This commit was SVN r26199.
Этот коммит содержится в:
Brian Barrett 2012-03-26 22:54:35 +00:00
родитель 2a26d0f9a2
Коммит 451af0e832
3 изменённых файлов: 25 добавлений и 4 удалений

Просмотреть файл

@ -471,6 +471,7 @@ ompi_mtl_portals4_progress(void)
case PTL_EVENT_AUTO_FREE:
case PTL_EVENT_AUTO_UNLINK:
case PTL_EVENT_SEARCH:
case PTL_EVENT_LINK:
if (NULL != ev.user_ptr) {
ptl_request = ev.user_ptr;
ret = ptl_request->event_callback(&ev, ptl_request);
@ -497,14 +498,13 @@ ompi_mtl_portals4_progress(void)
#endif
break;
case PTL_EVENT_LINK:
case PTL_EVENT_GET_OVERFLOW:
case PTL_EVENT_FETCH_ATOMIC:
case PTL_EVENT_FETCH_ATOMIC_OVERFLOW:
case PTL_EVENT_ATOMIC:
case PTL_EVENT_ATOMIC_OVERFLOW:
opal_output_verbose(1, ompi_mtl_base_output,
"Unexpected event of type %d", ev.type);
opal_output(ompi_mtl_base_output,
"Unexpected event of type %d", ev.type);
}
} else if (PTL_EQ_EMPTY == ret) {
break;

Просмотреть файл

@ -44,6 +44,10 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
(ompi_mtl_portals4_recv_request_t*) ptl_base_request;
size_t msg_length = 0;
/* as soon as we've seen any event associated with a request, it's
started */
ptl_request->req_started = true;
switch (ev->type) {
case PTL_EVENT_PUT:
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event",
@ -295,6 +299,9 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
break;
case PTL_EVENT_LINK:
break;
default:
opal_output_verbose(1, ompi_mtl_base_output,
"Unhandled receive callback with event type %d",
@ -358,6 +365,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
ptl_request->convertor = convertor;
ptl_request->delivery_ptr = start;
ptl_request->delivery_len = length;
ptl_request->req_started = false;
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
@ -374,8 +382,10 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
me.options =
PTL_ME_OP_PUT |
PTL_ME_USE_ONCE |
PTL_ME_EVENT_LINK_DISABLE | /* BWB: FIX ME */
PTL_ME_EVENT_UNLINK_DISABLE;
if (length <= ompi_mtl_portals4.eager_limit) {
me.options |= PTL_ME_EVENT_LINK_DISABLE;
}
me.match_id = remote_proc;
me.match_bits = match_bits;
me.ignore_bits = ignore_bits;
@ -394,6 +404,16 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
return ompi_mtl_portals4_get_error(ret);
}
/* if a long message, spin until we either have a comm event or a
link event, guaranteeing progress for long unexpected
messages. */
if (length > ompi_mtl_portals4.eager_limit) {
while (true != ptl_request->req_started) {
ompi_mtl_portals4_progress();
opal_atomic_rmb();
}
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -68,6 +68,7 @@ struct ompi_mtl_portals4_recv_request_t {
struct opal_convertor_t *convertor;
void *delivery_ptr;
size_t delivery_len;
volatile bool req_started;
#if OPAL_ENABLE_DEBUG
int opcount;
ptl_hdr_data_t hdr_data;