added optional rendezvous protocol for long messages
This commit was SVN r17124.
Этот коммит содержится в:
родитель
3fca3973d3
Коммит
b02cad2a0b
@ -89,6 +89,9 @@ struct mca_mtl_portals_module_t {
|
|||||||
/* turn off aggressive polling of the unex msg event queue */
|
/* turn off aggressive polling of the unex msg event queue */
|
||||||
bool ptl_aggressive_polling;
|
bool ptl_aggressive_polling;
|
||||||
|
|
||||||
|
/* use rendezvous for long messages */
|
||||||
|
bool ptl_use_rendezvous;
|
||||||
|
|
||||||
};
|
};
|
||||||
typedef struct mca_mtl_portals_module_t mca_mtl_portals_module_t;
|
typedef struct mca_mtl_portals_module_t mca_mtl_portals_module_t;
|
||||||
|
|
||||||
@ -165,6 +168,8 @@ OBJ_CLASS_DECLARATION(ompi_mtl_portals_event_t);
|
|||||||
|
|
||||||
#define PTL_IS_SHORT_MSG(match_bits) \
|
#define PTL_IS_SHORT_MSG(match_bits) \
|
||||||
(0 != (PTL_SHORT_MSG & match_bits))
|
(0 != (PTL_SHORT_MSG & match_bits))
|
||||||
|
#define PTL_IS_LONG_MSG(match_bits) \
|
||||||
|
(0 != (PTL_LONG_MSG & match_bits))
|
||||||
#define PTL_IS_READY_MSG(match_bits) \
|
#define PTL_IS_READY_MSG(match_bits) \
|
||||||
(0 != (PTL_READY_MSG & match_bits))
|
(0 != (PTL_READY_MSG & match_bits))
|
||||||
#define PTL_IS_SYNC_MSG(event) \
|
#define PTL_IS_SYNC_MSG(event) \
|
||||||
|
@ -137,10 +137,19 @@ ompi_mtl_portals_component_open(void)
|
|||||||
"Turn off aggressive polling of unexpected messages",
|
"Turn off aggressive polling of unexpected messages",
|
||||||
false,
|
false,
|
||||||
false,
|
false,
|
||||||
|
1,
|
||||||
|
&tmp);
|
||||||
|
ompi_mtl_portals.ptl_aggressive_polling = (tmp == 0) ? false : true;
|
||||||
|
|
||||||
|
mca_base_param_reg_int(&mca_mtl_portals_component.mtl_version,
|
||||||
|
"use_rendezvous",
|
||||||
|
"Use a rendezvous protocol for long messages",
|
||||||
|
false,
|
||||||
|
false,
|
||||||
0,
|
0,
|
||||||
&tmp);
|
&tmp);
|
||||||
ompi_mtl_portals.ptl_aggressive_polling = (tmp == 0) ? true : false;
|
|
||||||
|
|
||||||
|
ompi_mtl_portals.ptl_use_rendezvous = ((tmp == 0) ? false : true);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,52 @@
|
|||||||
#define CHECK_MATCH(incoming_bits, match_bits, ignore_bits) \
|
#define CHECK_MATCH(incoming_bits, match_bits, ignore_bits) \
|
||||||
(((incoming_bits ^ match_bits) & ~ignore_bits) == 0)
|
(((incoming_bits ^ match_bits) & ~ignore_bits) == 0)
|
||||||
|
|
||||||
|
static int
|
||||||
|
ompi_mtl_portals_recv_progress(ptl_event_t *, struct ompi_mtl_portals_request_t* );
|
||||||
|
|
||||||
|
static int
|
||||||
|
ompi_mtl_portals_rendezvous_get(ptl_event_t *ev,
|
||||||
|
ompi_mtl_portals_request_t *ptl_request)
|
||||||
|
{
|
||||||
|
ptl_md_t md;
|
||||||
|
ptl_handle_md_t md_h;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
md.start = ev->md.start;
|
||||||
|
md.length = ev->md.length;
|
||||||
|
md.threshold = 2; /* send and reply */
|
||||||
|
md.options = PTL_MD_EVENT_START_DISABLE;
|
||||||
|
md.user_ptr = ptl_request;
|
||||||
|
md.eq_handle = ompi_mtl_portals.ptl_eq_h;
|
||||||
|
|
||||||
|
ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md, PTL_UNLINK, &md_h);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output(fileno(stderr)," Error returned from PtlMDBind(). Error code - %d \n",ret);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
ptl_request->is_complete = false;
|
||||||
|
ptl_request->event_callback = ompi_mtl_portals_recv_progress;
|
||||||
|
|
||||||
|
ret = PtlGet(md_h,
|
||||||
|
ev->initiator,
|
||||||
|
OMPI_MTL_PORTALS_READ_TABLE_ID,
|
||||||
|
0,
|
||||||
|
ev->hdr_data,
|
||||||
|
0);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output(fileno(stderr)," Error returned from PtlGet. Error code - %d \n",ret);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* stay here until the reply comes */
|
||||||
|
while (ptl_request->is_complete == false) {
|
||||||
|
ompi_mtl_portals_progress();
|
||||||
|
}
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* called when a receive should be progressed */
|
/* called when a receive should be progressed */
|
||||||
static int
|
static int
|
||||||
ompi_mtl_portals_recv_progress(ptl_event_t *ev,
|
ompi_mtl_portals_recv_progress(ptl_event_t *ev,
|
||||||
@ -45,9 +91,19 @@ ompi_mtl_portals_recv_progress(ptl_event_t *ev,
|
|||||||
|
|
||||||
switch (ev->type) {
|
switch (ev->type) {
|
||||||
case PTL_EVENT_PUT_END:
|
case PTL_EVENT_PUT_END:
|
||||||
|
if (PTL_IS_LONG_MSG(ev->match_bits) && (ompi_mtl_portals.ptl_use_rendezvous == true)) {
|
||||||
|
/* get the data */
|
||||||
|
ret = ompi_mtl_portals_rendezvous_get(ev, ptl_request);
|
||||||
|
if ( OMPI_SUCCESS != ret ) {
|
||||||
|
opal_output(fileno(stderr)," Error returned from ompi_mtl_portals_rendezvous_get(). Error code - %d \n",ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* make sure the data is in the right place */
|
/* make sure the data is in the right place */
|
||||||
ompi_mtl_datatype_unpack(ptl_request->convertor,
|
ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
|
||||||
ev->md.start, ev->mlength);
|
ev->md.start, ev->mlength);
|
||||||
|
if (OMPI_SUCCESS != ret) return ret;
|
||||||
|
|
||||||
/* set the status */
|
/* set the status */
|
||||||
ptl_request->super.ompi_req->req_status.MPI_SOURCE =
|
ptl_request->super.ompi_req->req_status.MPI_SOURCE =
|
||||||
@ -64,17 +120,13 @@ ompi_mtl_portals_recv_progress(ptl_event_t *ev,
|
|||||||
"recv complete: 0x%016llx\n", ev->match_bits));
|
"recv complete: 0x%016llx\n", ev->match_bits));
|
||||||
|
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
|
ptl_request->is_complete = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PTL_EVENT_REPLY_END:
|
case PTL_EVENT_REPLY_END:
|
||||||
/* make sure the data is in the right place */
|
/* make sure the data is in the right place */
|
||||||
ompi_mtl_datatype_unpack(ptl_request->convertor,
|
ret = ompi_mtl_datatype_unpack(ptl_request->convertor, ev->md.start, ev->mlength);
|
||||||
ev->md.start, ev->mlength);
|
if (OMPI_SUCCESS != ret) return ret;
|
||||||
|
|
||||||
ret=PtlMDUnlink(ev->md_handle);
|
|
||||||
if( ret !=PTL_OK) {
|
|
||||||
return ompi_common_portals_error_ptl_to_ompi(ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* set the status - most of this filled in right after issuing
|
/* set the status - most of this filled in right after issuing
|
||||||
the PtlGet*/
|
the PtlGet*/
|
||||||
@ -85,6 +137,7 @@ ompi_mtl_portals_recv_progress(ptl_event_t *ev,
|
|||||||
"recv complete: 0x%016llx\n", ev->match_bits));
|
"recv complete: 0x%016llx\n", ev->match_bits));
|
||||||
|
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
|
ptl_request->is_complete = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
@ -128,8 +181,9 @@ ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event,
|
|||||||
|
|
||||||
/* pull out the data */
|
/* pull out the data */
|
||||||
if (iov.iov_len > 0) {
|
if (iov.iov_len > 0) {
|
||||||
ompi_convertor_unpack(convertor, &iov, &iov_count,
|
ret = ompi_convertor_unpack(convertor, &iov, &iov_count,
|
||||||
&max_data );
|
&max_data );
|
||||||
|
if (0 > ret) return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if synchronous, return an ack */
|
/* if synchronous, return an ack */
|
||||||
@ -186,6 +240,7 @@ ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event,
|
|||||||
recv_event->ev.match_bits));
|
recv_event->ev.match_bits));
|
||||||
|
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
|
ptl_request->is_complete = true;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
|
ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
|
||||||
@ -442,7 +497,7 @@ ompi_mtl_portals_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
recv_event = ompi_mtl_portals_search_unex_q(match_bits, ignore_bits, false);
|
recv_event = ompi_mtl_portals_search_unex_q(match_bits, ignore_bits, false);
|
||||||
if (NULL != recv_event) {
|
if (NULL != recv_event) {
|
||||||
/* found it */
|
/* found it */
|
||||||
ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
|
ret = ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
|
||||||
OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
|
OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
|
||||||
(ompi_free_list_item_t*)recv_event);
|
(ompi_free_list_item_t*)recv_event);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -452,7 +507,7 @@ restart_search:
|
|||||||
recv_event = ompi_mtl_portals_search_unex_events(match_bits, ignore_bits, false);
|
recv_event = ompi_mtl_portals_search_unex_events(match_bits, ignore_bits, false);
|
||||||
if (NULL != recv_event) {
|
if (NULL != recv_event) {
|
||||||
/* found it */
|
/* found it */
|
||||||
ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
|
ret = ompi_mtl_portals_get_data(recv_event, convertor, ptl_request);
|
||||||
OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
|
OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl,
|
||||||
(ompi_free_list_item_t*)recv_event);
|
(ompi_free_list_item_t*)recv_event);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
@ -463,6 +518,10 @@ restart_search:
|
|||||||
if ( false == did_once ) {
|
if ( false == did_once ) {
|
||||||
ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
|
ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
|
||||||
&ptl_request->free_after);
|
&ptl_request->free_after);
|
||||||
|
if (OMPI_SUCCESS != ret) {
|
||||||
|
opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf(). Error code - %d \n",ret);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
did_once = true;
|
did_once = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -501,6 +560,6 @@ restart_search:
|
|||||||
free(md.start);
|
free(md.start);
|
||||||
}
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,6 +105,39 @@ ompi_mtl_portals_long_callback(ptl_event_t *ev, struct ompi_mtl_portals_request_
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* called for a rendezvous long send */
|
||||||
|
static int
|
||||||
|
ompi_mtl_portals_long_rendezvous_callback(ptl_event_t *ev, struct ompi_mtl_portals_request_t* ptl_request)
|
||||||
|
{
|
||||||
|
|
||||||
|
switch (ev->type) {
|
||||||
|
|
||||||
|
case PTL_EVENT_GET_END:
|
||||||
|
|
||||||
|
if (ptl_request->free_after) {
|
||||||
|
free(ev->md.start);
|
||||||
|
}
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||||
|
"send complete: 0x%016llx\n",
|
||||||
|
ev->match_bits));
|
||||||
|
|
||||||
|
ptl_request->is_complete = true;
|
||||||
|
if ( NULL != ptl_request->super.ompi_req ) {
|
||||||
|
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||||
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
opal_output(fileno(stderr)," Unexpected event type %d in ompi_mtl_portals_long_callback()\n",ev->type);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
/* called when sync send should wait for an ack or put */
|
/* called when sync send should wait for an ack or put */
|
||||||
static int
|
static int
|
||||||
ompi_mtl_portals_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals_request_t* ptl_request)
|
ompi_mtl_portals_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals_request_t* ptl_request)
|
||||||
@ -280,7 +313,13 @@ ompi_mtl_portals_long_isend( void *start, int length, int contextid, int localra
|
|||||||
|
|
||||||
md.start = start;
|
md.start = start;
|
||||||
md.length = length;
|
md.length = length;
|
||||||
md.threshold = 2; /* send, {ack, get} */
|
|
||||||
|
if (ompi_mtl_portals.ptl_use_rendezvous == true) {
|
||||||
|
md.threshold = 1; /* get event */
|
||||||
|
} else {
|
||||||
|
md.threshold = 2; /* sent event, ack or get event */
|
||||||
|
}
|
||||||
|
|
||||||
md.options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
|
md.options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
|
||||||
md.user_ptr = ptl_request;
|
md.user_ptr = ptl_request;
|
||||||
md.eq_handle = ompi_mtl_portals.ptl_eq_h;
|
md.eq_handle = ompi_mtl_portals.ptl_eq_h;
|
||||||
@ -302,6 +341,8 @@ ompi_mtl_portals_long_isend( void *start, int length, int contextid, int localra
|
|||||||
return ompi_common_portals_error_ptl_to_ompi(ret);
|
return ompi_common_portals_error_ptl_to_ompi(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ompi_mtl_portals.ptl_use_rendezvous == false) {
|
||||||
|
|
||||||
ret = PtlPut(md_h,
|
ret = PtlPut(md_h,
|
||||||
PTL_ACK_REQ,
|
PTL_ACK_REQ,
|
||||||
dest,
|
dest,
|
||||||
@ -310,6 +351,25 @@ ompi_mtl_portals_long_isend( void *start, int length, int contextid, int localra
|
|||||||
match_bits,
|
match_bits,
|
||||||
0,
|
0,
|
||||||
(ptl_hdr_data_t)(uintptr_t)ptl_request);
|
(ptl_hdr_data_t)(uintptr_t)ptl_request);
|
||||||
|
|
||||||
|
ptl_request->event_callback = ompi_mtl_portals_long_callback;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
/* just send a zero-length message */
|
||||||
|
ret = PtlPut(ompi_mtl_portals.ptl_zero_md_h,
|
||||||
|
PTL_NO_ACK_REQ,
|
||||||
|
dest,
|
||||||
|
OMPI_MTL_PORTALS_SEND_TABLE_ID,
|
||||||
|
0,
|
||||||
|
match_bits,
|
||||||
|
0,
|
||||||
|
(ptl_hdr_data_t)(uintptr_t)ptl_request);
|
||||||
|
|
||||||
|
ptl_request->event_callback = ompi_mtl_portals_long_rendezvous_callback;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
PtlMEUnlink(me_h);
|
PtlMEUnlink(me_h);
|
||||||
if (ptl_request->free_after) free(start);
|
if (ptl_request->free_after) free(start);
|
||||||
@ -317,7 +377,6 @@ ompi_mtl_portals_long_isend( void *start, int length, int contextid, int localra
|
|||||||
}
|
}
|
||||||
|
|
||||||
ptl_request->is_complete = false;
|
ptl_request->is_complete = false;
|
||||||
ptl_request->event_callback = ompi_mtl_portals_long_callback;
|
|
||||||
ptl_request->event_count = 0;
|
ptl_request->event_count = 0;
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user