1
1

Merge pull request #1443 from francois-wellenreiter/fix_trig_rndv

MTL portals4 : fix around triggered rndv operations
Этот коммит содержится в:
Todd Kordenbrock 2016-03-21 08:16:33 -05:00
родитель 4315435963 2bc432d95f
Коммит 2122a15217
4 изменённых файлов: 181 добавлений и 158 удалений

Просмотреть файл

@ -158,7 +158,8 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
/* send posting */ /* send posting */
#define MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, source, tag, type) \ #define MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, source, tag, type) \
{ \ { \
match_bits = contextid; \ match_bits = 0; \
match_bits |= contextid; \
match_bits = (match_bits << 24); \ match_bits = (match_bits << 24); \
match_bits |= source; \ match_bits |= source; \
match_bits = (match_bits << 24); \ match_bits = (match_bits << 24); \
@ -171,7 +172,7 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
match_bits = 0; \ match_bits = 0; \
ignore_bits = MTL_PORTALS4_PROTOCOL_IGNR; \ ignore_bits = MTL_PORTALS4_PROTOCOL_IGNR; \
\ \
match_bits = contextid; \ match_bits |= contextid; \
match_bits = (match_bits << 24); \ match_bits = (match_bits << 24); \
\ \
if (MPI_ANY_SOURCE == source) { \ if (MPI_ANY_SOURCE == source) { \
@ -193,38 +194,38 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4;
(0 != (MTL_PORTALS4_SHORT_MSG & match_bits)) (0 != (MTL_PORTALS4_SHORT_MSG & match_bits))
#define MTL_PORTALS4_IS_LONG_MSG(match_bits) \ #define MTL_PORTALS4_IS_LONG_MSG(match_bits) \
(0 != (MTL_PORTALS4_LONG_MSG & match_bits)) (0 != (MTL_PORTALS4_LONG_MSG & match_bits))
#define MTL_PORTALS4_IS_READY_MSG(match_bits) \
(0 != (MTL_PORTALS4_READY_MSG & match_bits))
#define MTL_PORTALS4_GET_TAG(match_bits) \ #define MTL_PORTALS4_GET_TAG(match_bits) \
((int)(match_bits & MTL_PORTALS4_TAG_MASK)) ((int)(match_bits & MTL_PORTALS4_TAG_MASK))
#define MTL_PORTALS4_GET_SOURCE(match_bits) \ #define MTL_PORTALS4_GET_SOURCE(match_bits) \
((int)((match_bits & MTL_PORTALS4_SOURCE_MASK) >> 24)) ((int)((match_bits & MTL_PORTALS4_SOURCE_MASK) >> 24))
#define MTL_PORTALS4_GET_CONTEXT(match_bits) \
((int)((match_bits & MTL_PORTALS4_CONTEXT_MASK) >> 48))
/* hda_data bit manipulation
*
* 0 1234567 01234567 01234567 0123 4567 01234567 01234567 01234567 01234567
* | | |
* ^| | context id | message tag
* || | |
* +---- is_sync
*/
#define MTL_PORTALS4_SYNC_MSG 0x8000000000000000ULL #define MTL_PORTALS4_SYNC_MSG 0x8000000000000000ULL
#define MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, contextid, sync) \ #define MTL_PORTALS4_SET_HDR_DATA(hdr_data, opcount, length, sync) \
{ \ { \
hdr_data = (sync) ? 1 : 0; \ hdr_data = 0; \
hdr_data = (hdr_data << 39); \ hdr_data |= opcount & 0x7FFFULL; \
hdr_data |= contextid; \ hdr_data = (hdr_data << 48); \
hdr_data = (hdr_data << 24); \ hdr_data |= (length & 0xFFFFFFFFFFFFULL); \
hdr_data |= (MTL_PORTALS4_TAG_MASK & tag); \ hdr_data |= (sync ? MTL_PORTALS4_SYNC_MSG : 0); \
} }
#define MTL_PORTALS4_GET_LENGTH(hdr_data) ((size_t)(hdr_data & 0xFFFFFFFFFFFFULL))
#define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \ #define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \
(0 != (MTL_PORTALS4_SYNC_MSG & hdr_data)) (0 != (MTL_PORTALS4_SYNC_MSG & hdr_data))
#define MTL_PORTALS4_SET_READ_BITS(match_bits, contextid, tag) \
{ \
match_bits = 0; \
match_bits |= (contextid & 0x0000000000FFFFFFULL); \
match_bits = (match_bits << 24); \
match_bits |= (tag & 0x0000000000FFFFFFULL); \
}
/* mtl-portals4 helpers */ /* mtl-portals4 helpers */
OMPI_DECLSPEC ompi_proc_t * OMPI_DECLSPEC ompi_proc_t *
ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank); ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank);

Просмотреть файл

@ -41,7 +41,7 @@ completion_fn(ptl_event_t *ev, ompi_mtl_portals4_base_request_t *ptl_base_reques
ptl_request->status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits); ptl_request->status.MPI_SOURCE = MTL_PORTALS4_GET_SOURCE(ev->match_bits);
ptl_request->status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits); ptl_request->status.MPI_TAG = MTL_PORTALS4_GET_TAG(ev->match_bits);
ptl_request->status.MPI_ERROR = MPI_SUCCESS; ptl_request->status.MPI_ERROR = MPI_SUCCESS;
ptl_request->status._ucount += ev->mlength; ptl_request->status._ucount =MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
if (ev->type != PTL_EVENT_SEARCH) { if (ev->type != PTL_EVENT_SEARCH) {
ptl_request->message = ompi_mtl_portals4_message_alloc(ev); ptl_request->message = ompi_mtl_portals4_message_alloc(ev);
} }

Просмотреть файл

@ -44,7 +44,7 @@ triggered_read_msg(void *start, ptl_size_t length, ptl_process_t target,
ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &request->ct_h); ret = PtlCTAlloc(ompi_mtl_portals4.ni_h, &request->ct_h);
if (OPAL_UNLIKELY(PTL_OK != ret)) { if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlGet failed: %d", "%s:%d: PtlCTAlloc failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
return OMPI_ERR_OUT_OF_RESOURCE; return OMPI_ERR_OUT_OF_RESOURCE;
} }
@ -60,6 +60,7 @@ triggered_read_msg(void *start, ptl_size_t length, ptl_process_t target,
request->ct_h, 1); request->ct_h, 1);
if (OPAL_UNLIKELY(PTL_OK != ret)) { if (OPAL_UNLIKELY(PTL_OK != ret)) {
PtlCTFree(request->ct_h); PtlCTFree(request->ct_h);
request->ct_h = PTL_INVALID_HANDLE;
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlTriggeredGet failed: %d", "%s:%d: PtlTriggeredGet failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
@ -110,6 +111,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ompi_mtl_portals4_recv_request_t* ptl_request = ompi_mtl_portals4_recv_request_t* ptl_request =
(ompi_mtl_portals4_recv_request_t*) ptl_base_request; (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
size_t msg_length = 0; size_t msg_length = 0;
ptl_match_bits_t read_match_bits;
/* as soon as we've seen any event associated with a request, it's /* as soon as we've seen any event associated with a request, it's
started */ started */
@ -128,61 +130,70 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
goto callback_error; goto callback_error;
} }
if (!ptl_request->is_triggered) { ptl_request->me_h = PTL_INVALID_HANDLE;
ptl_request->me_h = PTL_INVALID_HANDLE;
msg_length = ev->mlength; msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
MTL_PORTALS4_GET_SOURCE(ev->match_bits); MTL_PORTALS4_GET_SOURCE(ev->match_bits);
ptl_request->super.super.ompi_req->req_status.MPI_TAG = ptl_request->super.super.ompi_req->req_status.MPI_TAG =
MTL_PORTALS4_GET_TAG(ev->match_bits); MTL_PORTALS4_GET_TAG(ev->match_bits);
if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"truncate expected: %ld %ld", "truncate expected: %ld %ld",
msg_length, ptl_request->delivery_len); msg_length, ptl_request->delivery_len);
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
} }
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
ptl_request->hdr_data = ev->hdr_data; ptl_request->hdr_data = ev->hdr_data;
#endif #endif
if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) { if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
/* If it's not a short message and we're doing rndv, we /* If it's not a short message and we're doing rndv, we
only have the first part of the message. Issue the get only have the first part of the message. Issue the get
to pull the second part of the message. */ to pull the second part of the message. */
if (ptl_request->is_triggered) {
ptl_request->super.super.ompi_req->req_status._ucount = 0;
}
else {
ptl_request->super.super.ompi_req->req_status._ucount = ompi_mtl_portals4.eager_limit;
MTL_PORTALS4_SET_READ_BITS(read_match_bits,
MTL_PORTALS4_GET_CONTEXT(ev->match_bits),
MTL_PORTALS4_GET_TAG(ev->match_bits));
ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit, ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
((msg_length > ptl_request->delivery_len) ? ((msg_length > ptl_request->delivery_len) ?
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit, ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
ev->initiator, ev->initiator,
ev->hdr_data, read_match_bits,
ompi_mtl_portals4.eager_limit, ompi_mtl_portals4.eager_limit,
ptl_request); ptl_request);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
goto callback_error; goto callback_error;
} }
}
} else { } else {
/* If we're either using the eager protocol or were a /* If we're either using the eager protocol or were a
short message, all data has been received, so complete short message, all data has been received, so complete
the message. */ the message. */
ret = ompi_mtl_datatype_unpack(ptl_request->convertor, ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
ev->start, ev->start,
ev->mlength); ev->mlength);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_mtl_datatype_unpack failed: %d", "%s:%d: ompi_mtl_datatype_unpack failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
}
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) completed, expected",
ptl_request->opcount, ptl_request->hdr_data));
ptl_request->super.super.completion_callback(&ptl_request->super.super);
} }
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) completed, expected",
ptl_request->opcount, ptl_request->hdr_data));
ptl_request->super.super.completion_callback(&ptl_request->super.super);
} }
break; break;
@ -198,16 +209,14 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
goto callback_error; goto callback_error;
} }
if (ptl_request->is_triggered) if (ptl_request->is_triggered) {
PtlCTFree(ptl_request->ct_h); PtlCTFree(ptl_request->ct_h);
ptl_request->ct_h = PTL_INVALID_HANDLE;
}
/* set the received length in the status, now that we know /* set the received length in the status, now that we know
exactly how much data was sent. */ exactly how much data was sent. */
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength;
if (ompi_mtl_portals4.protocol == rndv) {
ptl_request->super.super.ompi_req->req_status._ucount +=
ompi_mtl_portals4.eager_limit;
}
#if OMPI_MTL_PORTALS4_FLOW_CONTROL #if OMPI_MTL_PORTALS4_FLOW_CONTROL
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1); OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
@ -236,98 +245,104 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
case PTL_EVENT_PUT_OVERFLOW: case PTL_EVENT_PUT_OVERFLOW:
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) got put_overflow event", "Recv %lu (0x%lx) got put_overflow event",
ptl_request->opcount, ev->hdr_data)); ptl_request->opcount, ev->hdr_data));
if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) { if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d", "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type); __FILE__, __LINE__, ev->ni_fail_type);
goto callback_error; goto callback_error;
} }
if (!ptl_request->is_triggered) { ptl_request->me_h = PTL_INVALID_HANDLE;
ptl_request->me_h = PTL_INVALID_HANDLE;
msg_length = ev->mlength; msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
ptl_request->super.super.ompi_req->req_status.MPI_SOURCE = ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
MTL_PORTALS4_GET_SOURCE(ev->match_bits); MTL_PORTALS4_GET_SOURCE(ev->match_bits);
ptl_request->super.super.ompi_req->req_status.MPI_TAG = ptl_request->super.super.ompi_req->req_status.MPI_TAG =
MTL_PORTALS4_GET_TAG(ev->match_bits); MTL_PORTALS4_GET_TAG(ev->match_bits);
if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) { if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"truncate unexpected: %ld %ld %d", "truncate unexpected: %ld %ld %d",
msg_length, ptl_request->delivery_len, msg_length, ptl_request->delivery_len,
MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)); MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
} }
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
ptl_request->hdr_data = ev->hdr_data; ptl_request->hdr_data = ev->hdr_data;
#endif #endif
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
/* overflow case. Short messages have the buffer stashed /* overflow case. Short messages have the buffer stashed
somewhere. Long messages left in buffer at the source */ somewhere. Long messages left in buffer at the source */
if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) { if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength; if (ev->mlength > 0) {
if (ev->mlength > 0) { struct iovec iov;
struct iovec iov; uint32_t iov_count = 1;
uint32_t iov_count = 1; size_t max_data;
size_t max_data; iov.iov_base = (char*) ev->start;
iov.iov_base = (char*) ev->start; iov.iov_len = ev->mlength;
iov.iov_len = ev->mlength; max_data = iov.iov_len;
max_data = iov.iov_len;
ret = opal_convertor_unpack(ptl_request->convertor, ret = opal_convertor_unpack(ptl_request->convertor,
&iov, &iov_count, &iov, &iov_count,
&max_data ); &max_data );
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr); if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
if (OPAL_UNLIKELY(ret < 0)) { if (OPAL_UNLIKELY(ret < 0)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output, opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: opal_convertor_unpack failed: %d", "%s:%d: opal_convertor_unpack failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
goto callback_error; goto callback_error;
}
} }
/* if it's a sync, send the ack */ }
if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) { /* if it's a sync, send the ack */
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
"Recv %lu (0x%lx) sending sync ack",
ptl_request->opcount, ptl_request->hdr_data));
ret = PtlPut(ompi_mtl_portals4.zero_md_h,
0,
0,
PTL_NO_ACK_REQ,
ev->initiator,
ompi_mtl_portals4.read_idx,
ev->hdr_data,
0,
NULL,
0);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret);
goto callback_error;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) completed, unexpected short (0x%lx)", "Recv %lu (0x%lx) sending sync ack",
ptl_request->opcount, ptl_request->hdr_data, (long) ev->start)); ptl_request->opcount, ptl_request->hdr_data));
ptl_request->super.super.completion_callback(&ptl_request->super.super); ret = PtlPut(ompi_mtl_portals4.zero_md_h,
0,
0,
PTL_NO_ACK_REQ,
ev->initiator,
ompi_mtl_portals4.read_idx,
ev->hdr_data,
0,
NULL,
0);
if (OPAL_UNLIKELY(PTL_OK != ret)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret);
goto callback_error;
}
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) completed, unexpected short (0x%lx)",
ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
ptl_request->super.super.completion_callback(&ptl_request->super.super);
} else {
if (!ptl_request->is_triggered) {
} else {
if (ev->mlength > 0) { if (ev->mlength > 0) {
/* if rndv or triggered, copy the eager part to the right place */ /* if rndv or triggered, copy the eager part to the right place */
memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength); memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
} }
MTL_PORTALS4_SET_READ_BITS(read_match_bits,
MTL_PORTALS4_GET_CONTEXT(ev->match_bits),
MTL_PORTALS4_GET_TAG(ev->match_bits));
ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength, ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength,
((msg_length > ptl_request->delivery_len) ? ((msg_length > ptl_request->delivery_len) ?
ptl_request->delivery_len : msg_length) - ev->mlength, ptl_request->delivery_len : msg_length) - ev->mlength,
ev->initiator, ev->initiator,
ev->hdr_data, read_match_bits,
ev->mlength, ev->mlength,
ptl_request); ptl_request);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
@ -366,8 +381,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
struct opal_convertor_t *convertor, struct opal_convertor_t *convertor,
mca_mtl_request_t *mtl_request) mca_mtl_request_t *mtl_request)
{ {
ptl_match_bits_t match_bits, ignore_bits; ptl_match_bits_t read_match_bits, recv_match_bits, recv_ignore_bits;
ptl_hdr_data_t hdr_data;
int ret = OMPI_SUCCESS; int ret = OMPI_SUCCESS;
ptl_process_t remote_proc; ptl_process_t remote_proc;
ompi_mtl_portals4_recv_request_t *ptl_request = ompi_mtl_portals4_recv_request_t *ptl_request =
@ -391,31 +405,16 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc));
} }
MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, MTL_PORTALS4_SET_RECV_BITS(recv_match_bits, recv_ignore_bits, comm->c_contextid,
src, tag); src, tag);
MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, comm->c_contextid, 0); MTL_PORTALS4_SET_READ_BITS(read_match_bits, comm->c_contextid, tag);
ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after); ret = ompi_mtl_datatype_recv_buf(convertor, &start, &length, &free_after);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret; return ret;
} }
ptl_request->is_triggered =
((ompi_mtl_portals4.protocol == eager) ||
(ompi_mtl_portals4.eager_limit >= length) ||
(MPI_ANY_SOURCE == src) ||
(MPI_ANY_TAG == tag)) ? false : true;
if (ptl_request->is_triggered) {
ret = triggered_read_msg(ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
ptl_request->delivery_len - ompi_mtl_portals4.eager_limit,
remote_proc,
hdr_data,
ompi_mtl_portals4.eager_limit,
ptl_request);
}
ptl_request->super.type = portals4_req_recv; ptl_request->super.type = portals4_req_recv;
ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress; ptl_request->super.event_callback = ompi_mtl_portals4_recv_progress;
#if OPAL_ENABLE_DEBUG #if OPAL_ENABLE_DEBUG
@ -428,12 +427,28 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
ptl_request->delivery_len = length; ptl_request->delivery_len = length;
ptl_request->req_started = false; ptl_request->req_started = false;
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
ptl_request->super.super.ompi_req->req_status._ucount = 0;
ptl_request->is_triggered =
((ompi_mtl_portals4.protocol == eager) ||
(ompi_mtl_portals4.eager_limit >= length) ||
(MPI_ANY_SOURCE == src) ||
(MPI_ANY_TAG == tag)) ? false : true;
if (ptl_request->is_triggered) {
ret = triggered_read_msg((char*) ptl_request->delivery_ptr,
ptl_request->delivery_len,
remote_proc,
read_match_bits,
0,
ptl_request);
}
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx, 0x%lx)\n", "Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n",
ptl_request->opcount, ptl_request->opcount,
remote_proc.phys.nid, remote_proc.phys.pid, remote_proc.phys.nid, remote_proc.phys.pid,
(int64_t)length, match_bits, ignore_bits, hdr_data, (unsigned long) ptl_request)); (int64_t)length, recv_match_bits, recv_ignore_bits, (unsigned long) ptl_request));
me.start = start; me.start = start;
me.length = length; me.length = length;
@ -454,8 +469,8 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
me.options |= PTL_ME_EVENT_LINK_DISABLE; me.options |= PTL_ME_EVENT_LINK_DISABLE;
} }
me.match_id = remote_proc; me.match_id = remote_proc;
me.match_bits = match_bits; me.match_bits = recv_match_bits;
me.ignore_bits = ignore_bits; me.ignore_bits = recv_ignore_bits;
ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
ompi_mtl_portals4.recv_idx, ompi_mtl_portals4.recv_idx,
@ -515,6 +530,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
ptl_request->delivery_ptr = start; ptl_request->delivery_ptr = start;
ptl_request->delivery_len = length; ptl_request->delivery_len = length;
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
ptl_request->super.super.ompi_req->req_status._ucount = 0;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Mrecv %lu of length %ld (0x%lx)\n", "Mrecv %lu of length %ld (0x%lx)\n",

Просмотреть файл

@ -182,16 +182,20 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
ompi_mtl_portals4_isend_request_t *ptl_request) ompi_mtl_portals4_isend_request_t *ptl_request)
{ {
int ret; int ret;
ptl_match_bits_t match_bits; ptl_match_bits_t read_match_bits, match_bits;
ptl_me_t me; ptl_me_t me;
ptl_hdr_data_t hdr_data; ptl_hdr_data_t hdr_data;
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_SHORT_MSG); MTL_PORTALS4_SHORT_MSG);
MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, contextid, (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0); MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length,
MCA_PML_BASE_SEND_SYNCHRONOUS == mode);
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) { if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
MTL_PORTALS4_SET_READ_BITS(read_match_bits, contextid, tag);
me.start = NULL; me.start = NULL;
me.length = 0; me.length = 0;
me.ct_handle = PTL_CT_NONE; me.ct_handle = PTL_CT_NONE;
@ -203,7 +207,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE; PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id = ptl_proc; me.match_id = ptl_proc;
me.match_bits = hdr_data; me.match_bits = read_match_bits;
me.ignore_bits = 0; me.ignore_bits = 0;
ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
@ -265,7 +269,7 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
ompi_mtl_portals4_isend_request_t *ptl_request) ompi_mtl_portals4_isend_request_t *ptl_request)
{ {
int ret; int ret;
ptl_match_bits_t match_bits; ptl_match_bits_t read_match_bits, match_bits;
ptl_me_t me; ptl_me_t me;
ptl_hdr_data_t hdr_data; ptl_hdr_data_t hdr_data;
ptl_size_t put_length; ptl_size_t put_length;
@ -273,7 +277,9 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag, MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
MTL_PORTALS4_LONG_MSG); MTL_PORTALS4_LONG_MSG);
MTL_PORTALS4_SET_HDR_DATA(hdr_data, tag, contextid, 0); MTL_PORTALS4_SET_READ_BITS(read_match_bits, contextid, tag);
MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0);
me.start = start; me.start = start;
me.length = length; me.length = length;
@ -286,7 +292,7 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_LINK_DISABLE |
PTL_ME_EVENT_UNLINK_DISABLE; PTL_ME_EVENT_UNLINK_DISABLE;
me.match_id = ptl_proc; me.match_id = ptl_proc;
me.match_bits = hdr_data; me.match_bits = read_match_bits;
me.ignore_bits = 0; me.ignore_bits = 0;
ret = PtlMEAppend(ompi_mtl_portals4.ni_h, ret = PtlMEAppend(ompi_mtl_portals4.ni_h,