Merge pull request #1900 from PDeveze/mtl-portals4-short_msg-split_msg
Mtl portals4 short msg split msg
Этот коммит содержится в:
Коммит
a17dff281d
@ -46,9 +46,15 @@ struct mca_mtl_portals4_module_t {
|
|||||||
|
|
||||||
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
|
/* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */
|
||||||
int32_t use_logical;
|
int32_t use_logical;
|
||||||
|
|
||||||
|
/* Process_id */
|
||||||
|
ptl_process_t ptl_process_id;
|
||||||
|
|
||||||
/* Use flow control: 1 (true) : 0 (false) */
|
/* Use flow control: 1 (true) : 0 (false) */
|
||||||
int32_t use_flowctl;
|
int32_t use_flowctl;
|
||||||
|
|
||||||
|
/** Short limit; Size limit for short messages */
|
||||||
|
uint64_t short_limit;
|
||||||
/** Eager limit; messages greater than this use a rendezvous protocol */
|
/** Eager limit; messages greater than this use a rendezvous protocol */
|
||||||
uint64_t eager_limit;
|
uint64_t eager_limit;
|
||||||
/** Size of short message blocks */
|
/** Size of short message blocks */
|
||||||
@ -67,6 +73,8 @@ struct mca_mtl_portals4_module_t {
|
|||||||
|
|
||||||
/** Network interface handle for matched interface */
|
/** Network interface handle for matched interface */
|
||||||
ptl_handle_ni_t ni_h;
|
ptl_handle_ni_t ni_h;
|
||||||
|
/** Limit given by portals after NIInit */
|
||||||
|
uint64_t max_msg_size_mtl;
|
||||||
/** Uid for current user */
|
/** Uid for current user */
|
||||||
ptl_uid_t uid;
|
ptl_uid_t uid;
|
||||||
|
|
||||||
|
@ -100,6 +100,18 @@ ompi_mtl_portals4_component_register(void)
|
|||||||
OPAL_INFO_LVL_9,
|
OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
¶m_priority);
|
¶m_priority);
|
||||||
|
ompi_mtl_portals4.short_limit = 2 * 1024;
|
||||||
|
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
|
||||||
|
"short_limit",
|
||||||
|
"Size limit for short messages",
|
||||||
|
MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
OPAL_INFO_LVL_5,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&ompi_mtl_portals4.short_limit);
|
||||||
|
|
||||||
|
|
||||||
ompi_mtl_portals4.eager_limit = 2 * 1024;
|
ompi_mtl_portals4.eager_limit = 2 * 1024;
|
||||||
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
|
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
|
||||||
@ -173,6 +185,19 @@ ompi_mtl_portals4_component_register(void)
|
|||||||
OPAL_INFO_LVL_5,
|
OPAL_INFO_LVL_5,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&ompi_mtl_portals4.protocol);
|
&ompi_mtl_portals4.protocol);
|
||||||
|
|
||||||
|
ompi_mtl_portals4.max_msg_size_mtl = PTL_SIZE_MAX;
|
||||||
|
(void) mca_base_component_var_register(&mca_mtl_portals4_component.mtl_version,
|
||||||
|
"max_msg_size",
|
||||||
|
"Max size supported by portals4 (above that, a message is cut into messages less than that size)",
|
||||||
|
MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
|
||||||
|
NULL,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
OPAL_INFO_LVL_5,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&ompi_mtl_portals4.max_msg_size_mtl);
|
||||||
|
|
||||||
OBJ_RELEASE(new_enum);
|
OBJ_RELEASE(new_enum);
|
||||||
if (0 > ret) {
|
if (0 > ret) {
|
||||||
return OMPI_ERR_NOT_SUPPORTED;
|
return OMPI_ERR_NOT_SUPPORTED;
|
||||||
@ -196,6 +221,12 @@ ompi_mtl_portals4_component_open(void)
|
|||||||
"no"
|
"no"
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
|
"Max message size: %lu", (unsigned long)
|
||||||
|
ompi_mtl_portals4.max_msg_size_mtl);
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
|
"Short limit: %d", (int)
|
||||||
|
ompi_mtl_portals4.short_limit);
|
||||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
"Eager limit: %d", (int)
|
"Eager limit: %d", (int)
|
||||||
ompi_mtl_portals4.eager_limit);
|
ompi_mtl_portals4.eager_limit);
|
||||||
@ -314,6 +345,11 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (actual_limits.max_msg_size < ompi_mtl_portals4.max_msg_size_mtl)
|
||||||
|
ompi_mtl_portals4.max_msg_size_mtl = actual_limits.max_msg_size;
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_mtl_base_framework.framework_output,
|
||||||
|
"Due to portals4 and user configuration messages will not go over the size of %lu", ompi_mtl_portals4.max_msg_size_mtl));
|
||||||
|
|
||||||
if (ompi_comm_rank(MPI_COMM_WORLD) == 0) {
|
if (ompi_comm_rank(MPI_COMM_WORLD) == 0) {
|
||||||
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_entries=%d", actual_limits.max_entries);
|
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_entries=%d", actual_limits.max_entries);
|
||||||
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_unexpected_headers=%d", actual_limits.max_unexpected_headers);
|
opal_output_verbose(10, ompi_mtl_base_framework.framework_output, "max_unexpected_headers=%d", actual_limits.max_unexpected_headers);
|
||||||
@ -350,6 +386,10 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ompi_mtl_portals4.ptl_process_id = id;
|
||||||
|
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output,
|
||||||
|
"PtlGetPhysId rank=%x nid=%x pid=%x\n", id.rank, id.phys.nid, id.phys.pid));
|
||||||
|
|
||||||
OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL,
|
OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL,
|
||||||
&mca_mtl_portals4_component.mtl_version,
|
&mca_mtl_portals4_component.mtl_version,
|
||||||
&id, sizeof(id));
|
&id, sizeof(id));
|
||||||
|
@ -70,6 +70,13 @@ ompi_mtl_portals4_flowctl_init(void)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ompi_mtl_portals4.flowctl_idx != REQ_FLOWCTL_TABLE_ID) {
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
|
"%s:%d: PtlPTAlloc did not allocate the requested PT: %d\n",
|
||||||
|
__FILE__, __LINE__, ompi_mtl_portals4.flowctl_idx);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
|
ret = PtlCTAlloc(ompi_mtl_portals4.ni_h,
|
||||||
&ompi_mtl_portals4.flowctl.trigger_ct_h);
|
&ompi_mtl_portals4.flowctl.trigger_ct_h);
|
||||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||||
@ -291,9 +298,7 @@ ompi_mtl_portals4_flowctl_trigger(void)
|
|||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (false == ompi_mtl_portals4.flowctl.flowctl_active) {
|
if (true == OPAL_ATOMIC_CMPSET_32(&ompi_mtl_portals4.flowctl.flowctl_active, false, true)) {
|
||||||
ompi_mtl_portals4.flowctl.flowctl_active = true;
|
|
||||||
|
|
||||||
/* send trigger to root */
|
/* send trigger to root */
|
||||||
ret = PtlPut(ompi_mtl_portals4.zero_md_h,
|
ret = PtlPut(ompi_mtl_portals4.zero_md_h,
|
||||||
0,
|
0,
|
||||||
|
@ -34,7 +34,7 @@ OBJ_CLASS_DECLARATION(ompi_mtl_portals4_pending_request_t);
|
|||||||
|
|
||||||
|
|
||||||
struct ompi_mtl_portals4_flowctl_t {
|
struct ompi_mtl_portals4_flowctl_t {
|
||||||
bool flowctl_active;
|
int32_t flowctl_active;
|
||||||
|
|
||||||
int32_t send_slots;
|
int32_t send_slots;
|
||||||
int32_t max_send_slots;
|
int32_t max_send_slots;
|
||||||
|
@ -32,7 +32,7 @@ completion_fn(ptl_event_t *ev, ompi_mtl_portals4_base_request_t *ptl_base_reques
|
|||||||
ompi_mtl_portals4_probe_request_t *ptl_request =
|
ompi_mtl_portals4_probe_request_t *ptl_request =
|
||||||
(ompi_mtl_portals4_probe_request_t*) ptl_base_request;
|
(ompi_mtl_portals4_probe_request_t*) ptl_base_request;
|
||||||
|
|
||||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
opal_output_verbose(10, ompi_mtl_base_framework.framework_output,
|
||||||
"%s:%d: completion_fn: %d %d",
|
"%s:%d: completion_fn: %d %d",
|
||||||
__FILE__, __LINE__, ev->type, ev->ni_fail_type);
|
__FILE__, __LINE__, ev->type, ev->ni_fail_type);
|
||||||
|
|
||||||
|
@ -39,7 +39,9 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target,
|
|||||||
ptl_match_bits_t match_bits, ptl_size_t remote_offset,
|
ptl_match_bits_t match_bits, ptl_size_t remote_offset,
|
||||||
ompi_mtl_portals4_recv_request_t *request)
|
ompi_mtl_portals4_recv_request_t *request)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret, i;
|
||||||
|
ptl_size_t rest = length, asked = 0, frag_size;
|
||||||
|
int32_t pending_reply;
|
||||||
|
|
||||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||||
while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
|
while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
|
||||||
@ -48,19 +50,29 @@ read_msg(void *start, ptl_size_t length, ptl_process_t target,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ret = PtlGet(ompi_mtl_portals4.send_md_h,
|
request->pending_reply = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl;
|
||||||
(ptl_size_t) start,
|
pending_reply = request->pending_reply;
|
||||||
length,
|
|
||||||
target,
|
for (i = 0 ; i < pending_reply ; i++) {
|
||||||
ompi_mtl_portals4.read_idx,
|
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d) send",
|
||||||
match_bits,
|
i + 1, pending_reply));
|
||||||
remote_offset,
|
frag_size = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest;
|
||||||
request);
|
ret = PtlGet(ompi_mtl_portals4.send_md_h,
|
||||||
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
(ptl_size_t) start + i * ompi_mtl_portals4.max_msg_size_mtl,
|
||||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
frag_size,
|
||||||
"%s:%d: PtlGet failed: %d",
|
target,
|
||||||
__FILE__, __LINE__, ret);
|
ompi_mtl_portals4.read_idx,
|
||||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
match_bits,
|
||||||
|
remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl,
|
||||||
|
request);
|
||||||
|
if (OPAL_UNLIKELY(PTL_OK != ret)) {
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
|
"%s:%d: PtlGet failed: %d",
|
||||||
|
__FILE__, __LINE__, ret);
|
||||||
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||||
|
}
|
||||||
|
rest -= frag_size;
|
||||||
|
asked += frag_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -109,26 +121,30 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
|
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ev->mlength < msg_length)
|
||||||
|
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "Truncated message, some PtlGet are required (protocol = %d)",
|
||||||
|
ompi_mtl_portals4.protocol));
|
||||||
|
|
||||||
#if OPAL_ENABLE_DEBUG
|
#if OPAL_ENABLE_DEBUG
|
||||||
ptl_request->hdr_data = ev->hdr_data;
|
ptl_request->hdr_data = ev->hdr_data;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
|
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
|
||||||
/* If it's not a short message and we're doing rndv, we
|
if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && msg_length > ev->mlength) {
|
||||||
|
/* If it's not a short message and we're doing rndv and the message is not complete, we
|
||||||
only have the first part of the message. Issue the get
|
only have the first part of the message. Issue the get
|
||||||
to pull the second part of the message. */
|
to pull the second part of the message. */
|
||||||
ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
|
ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength,
|
||||||
((msg_length > ptl_request->delivery_len) ?
|
((msg_length > ptl_request->delivery_len) ?
|
||||||
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
|
ptl_request->delivery_len : msg_length) - ev->mlength,
|
||||||
ev->initiator,
|
ev->initiator,
|
||||||
ev->hdr_data,
|
ev->hdr_data,
|
||||||
ompi_mtl_portals4.eager_limit,
|
ev->mlength,
|
||||||
ptl_request);
|
ptl_request);
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||||
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
|
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
|
||||||
goto callback_error;
|
goto callback_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* If we're either using the eager protocol or were a
|
/* If we're either using the eager protocol or were a
|
||||||
short message, all data has been received, so complete
|
short message, all data has been received, so complete
|
||||||
@ -142,8 +158,6 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
|
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
|
||||||
}
|
}
|
||||||
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
|
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||||
"Recv %lu (0x%lx) completed, expected",
|
"Recv %lu (0x%lx) completed, expected",
|
||||||
ptl_request->opcount, ptl_request->hdr_data));
|
ptl_request->opcount, ptl_request->hdr_data));
|
||||||
@ -165,12 +179,14 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* set the received length in the status, now that we know
|
/* set the received length in the status, now that we know
|
||||||
excatly how much data was sent. */
|
exactly how much data was sent. */
|
||||||
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
|
ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength;
|
||||||
if (ompi_mtl_portals4.protocol == rndv) {
|
|
||||||
ptl_request->super.super.ompi_req->req_status._ucount +=
|
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1);
|
||||||
ompi_mtl_portals4.eager_limit;
|
if (ret > 0) {
|
||||||
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
assert(ptl_request->pending_reply == 0);
|
||||||
|
|
||||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||||
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
|
||||||
@ -192,8 +208,8 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||||
"Recv %lu (0x%lx) completed, reply",
|
"Recv %lu (0x%lx) completed , reply (pending_reply: %d)",
|
||||||
ptl_request->opcount, ptl_request->hdr_data));
|
ptl_request->opcount, ptl_request->hdr_data, ptl_request->pending_reply));
|
||||||
ptl_request->super.super.completion_callback(&ptl_request->super.super);
|
ptl_request->super.super.completion_callback(&ptl_request->super.super);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -281,17 +297,16 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
ptl_request->super.super.completion_callback(&ptl_request->super.super);
|
ptl_request->super.super.completion_callback(&ptl_request->super.super);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
if (ev->mlength > 0) {
|
|
||||||
/* if rndv or triggered, copy the eager part to the right place */
|
|
||||||
memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength,
|
/* For long messages in the overflow list, ev->mlength = 0 */
|
||||||
((msg_length > ptl_request->delivery_len) ?
|
ptl_request->super.super.ompi_req->req_status._ucount = 0;
|
||||||
ptl_request->delivery_len : msg_length) - ev->mlength,
|
|
||||||
|
ret = read_msg((char*) ptl_request->delivery_ptr,
|
||||||
|
(msg_length > ptl_request->delivery_len) ?
|
||||||
|
ptl_request->delivery_len : msg_length,
|
||||||
ev->initiator,
|
ev->initiator,
|
||||||
ev->hdr_data,
|
ev->hdr_data,
|
||||||
ev->mlength,
|
0,
|
||||||
ptl_request);
|
ptl_request);
|
||||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||||
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
|
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
|
||||||
@ -373,6 +388,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
ptl_request->delivery_len = length;
|
ptl_request->delivery_len = length;
|
||||||
ptl_request->req_started = false;
|
ptl_request->req_started = false;
|
||||||
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||||
|
ptl_request->pending_reply = 0;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||||
"Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n",
|
"Recv %lu from %x,%x of length %ld (0x%lx, 0x%lx, 0x%lx)\n",
|
||||||
@ -389,7 +405,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
PTL_ME_OP_PUT |
|
PTL_ME_OP_PUT |
|
||||||
PTL_ME_USE_ONCE |
|
PTL_ME_USE_ONCE |
|
||||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||||
if (length <= ompi_mtl_portals4.eager_limit) {
|
if (length <= ompi_mtl_portals4.short_limit) {
|
||||||
me.options |= PTL_ME_EVENT_LINK_DISABLE;
|
me.options |= PTL_ME_EVENT_LINK_DISABLE;
|
||||||
}
|
}
|
||||||
me.match_id = remote_proc;
|
me.match_id = remote_proc;
|
||||||
@ -413,7 +429,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
/* if a long message, spin until we either have a comm event or a
|
/* if a long message, spin until we either have a comm event or a
|
||||||
link event, guaranteeing progress for long unexpected
|
link event, guaranteeing progress for long unexpected
|
||||||
messages. */
|
messages. */
|
||||||
if (length > ompi_mtl_portals4.eager_limit) {
|
if (length > ompi_mtl_portals4.short_limit) {
|
||||||
while (true != ptl_request->req_started) {
|
while (true != ptl_request->req_started) {
|
||||||
ompi_mtl_portals4_progress();
|
ompi_mtl_portals4_progress();
|
||||||
}
|
}
|
||||||
@ -454,6 +470,7 @@ ompi_mtl_portals4_imrecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
ptl_request->delivery_ptr = start;
|
ptl_request->delivery_ptr = start;
|
||||||
ptl_request->delivery_len = length;
|
ptl_request->delivery_len = length;
|
||||||
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||||
|
ptl_request->pending_reply = 0;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||||
"Mrecv %lu of length %ld (0x%lx)\n",
|
"Mrecv %lu of length %ld (0x%lx)\n",
|
||||||
|
@ -191,7 +191,7 @@ ompi_mtl_portals4_activate_block(ompi_mtl_portals4_recv_short_block_t *block)
|
|||||||
me.start = block->start;
|
me.start = block->start;
|
||||||
me.length = ompi_mtl_portals4.recv_short_size;
|
me.length = ompi_mtl_portals4.recv_short_size;
|
||||||
me.ct_handle = PTL_CT_NONE;
|
me.ct_handle = PTL_CT_NONE;
|
||||||
me.min_free = ompi_mtl_portals4.eager_limit;
|
me.min_free = ompi_mtl_portals4.short_limit;
|
||||||
me.uid = ompi_mtl_portals4.uid;
|
me.uid = ompi_mtl_portals4.uid;
|
||||||
me.options =
|
me.options =
|
||||||
PTL_ME_OP_PUT |
|
PTL_ME_OP_PUT |
|
||||||
|
@ -52,6 +52,8 @@ struct ompi_mtl_portals4_isend_request_t {
|
|||||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||||
struct ompi_mtl_portals4_pending_request_t *pending;
|
struct ompi_mtl_portals4_pending_request_t *pending;
|
||||||
#endif
|
#endif
|
||||||
|
ptl_size_t length;
|
||||||
|
int32_t pending_get;
|
||||||
uint32_t event_count;
|
uint32_t event_count;
|
||||||
};
|
};
|
||||||
typedef struct ompi_mtl_portals4_isend_request_t ompi_mtl_portals4_isend_request_t;
|
typedef struct ompi_mtl_portals4_isend_request_t ompi_mtl_portals4_isend_request_t;
|
||||||
@ -73,6 +75,7 @@ struct ompi_mtl_portals4_recv_request_t {
|
|||||||
void *delivery_ptr;
|
void *delivery_ptr;
|
||||||
size_t delivery_len;
|
size_t delivery_len;
|
||||||
volatile bool req_started;
|
volatile bool req_started;
|
||||||
|
int32_t pending_reply;
|
||||||
#if OPAL_ENABLE_DEBUG
|
#if OPAL_ENABLE_DEBUG
|
||||||
uint64_t opcount;
|
uint64_t opcount;
|
||||||
ptl_hdr_data_t hdr_data;
|
ptl_hdr_data_t hdr_data;
|
||||||
|
@ -44,6 +44,29 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
|||||||
ompi_mtl_portals4_isend_request_t* ptl_request =
|
ompi_mtl_portals4_isend_request_t* ptl_request =
|
||||||
(ompi_mtl_portals4_isend_request_t*) ptl_base_request;
|
(ompi_mtl_portals4_isend_request_t*) ptl_base_request;
|
||||||
|
|
||||||
|
if (PTL_EVENT_GET == ev->type) {
|
||||||
|
ret = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1);
|
||||||
|
if (ret > 0) {
|
||||||
|
/* wait for other gets */
|
||||||
|
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET received now pending_get=%d",ret));
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
assert(ptl_request->pending_get == 0);
|
||||||
|
|
||||||
|
/* last get received */
|
||||||
|
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "PTL_EVENT_GET: PtlMEUnlink is called ptl_request->me_h=%d (pending get=%d)", ptl_request->me_h, ret));
|
||||||
|
|
||||||
|
if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) {
|
||||||
|
ret = PtlMEUnlink(ptl_request->me_h);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
|
"%s:%d: send callback PtlMEUnlink returned %d",
|
||||||
|
__FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
#if OMPI_MTL_PORTALS4_FLOW_CONTROL
|
||||||
if (OPAL_UNLIKELY(ev->ni_fail_type == PTL_NI_PT_DISABLED)) {
|
if (OPAL_UNLIKELY(ev->ni_fail_type == PTL_NI_PT_DISABLED)) {
|
||||||
ompi_mtl_portals4_pending_request_t *pending =
|
ompi_mtl_portals4_pending_request_t *pending =
|
||||||
@ -66,6 +89,7 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
|||||||
"%s:%d: send callback PtlMEUnlink returned %d",
|
"%s:%d: send callback PtlMEUnlink returned %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
}
|
}
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
|
opal_list_append(&ompi_mtl_portals4.flowctl.pending_sends,
|
||||||
@ -89,11 +113,40 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
|||||||
"send %lu got event of type %d",
|
"send %lu got event of type %d",
|
||||||
ptl_request->opcount, ev->type));
|
ptl_request->opcount, ev->type));
|
||||||
|
|
||||||
|
/* First put achieved successfully (In the Priority List), so it may be necessary to decrement the number of pending get
|
||||||
|
* If the protocol is eager, just decrement pending_get
|
||||||
|
* Else (the protocol is rndv), decrement pending_get only if length % max_msg_size <= eager_limit
|
||||||
|
* (This is the case where the eager part allows to save one get)
|
||||||
|
*/
|
||||||
if ((PTL_EVENT_ACK == ev->type) &&
|
if ((PTL_EVENT_ACK == ev->type) &&
|
||||||
(PTL_PRIORITY_LIST == ev->ptl_list) &&
|
(PTL_PRIORITY_LIST == ev->ptl_list) &&
|
||||||
(eager == ompi_mtl_portals4.protocol) &&
|
(0 < ptl_request->pending_get)) {
|
||||||
|
|
||||||
|
if ((eager == ompi_mtl_portals4.protocol) ||
|
||||||
|
(ptl_request->length % ompi_mtl_portals4.max_msg_size_mtl <= ompi_mtl_portals4.eager_limit)) {
|
||||||
|
val = OPAL_THREAD_ADD32(&(ptl_request->pending_get), -1);
|
||||||
|
}
|
||||||
|
if (0 == val) {
|
||||||
|
add = 2; /* We haven't to wait for any get, so we have to add an extra count to cause the message to complete */
|
||||||
|
if (!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE)) {
|
||||||
|
ret = PtlMEUnlink(ptl_request->me_h);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
|
"%s:%d: send callback PtlMEUnlink returned %d",
|
||||||
|
__FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((PTL_EVENT_ACK == ev->type) &&
|
||||||
|
(PTL_PRIORITY_LIST == ev->ptl_list) &&
|
||||||
|
(ev->mlength == ptl_request->length) &&
|
||||||
(!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE))) {
|
(!PtlHandleIsEqual(ptl_request->me_h, PTL_INVALID_HANDLE))) {
|
||||||
/* long expected messages with the eager protocol won't see a
|
/* long expected messages with the eager protocol
|
||||||
|
(and also with the rndv protocol if the length
|
||||||
|
is less or egal to eager_limit) won't see a
|
||||||
get event to complete the message. Give them an extra
|
get event to complete the message. Give them an extra
|
||||||
count to cause the message to complete with just the SEND
|
count to cause the message to complete with just the SEND
|
||||||
and ACK events and remove the ME. (we wait for the counter
|
and ACK events and remove the ME. (we wait for the counter
|
||||||
@ -105,10 +158,10 @@ ompi_mtl_portals4_callback(ptl_event_t *ev,
|
|||||||
"%s:%d: send callback PtlMEUnlink returned %d",
|
"%s:%d: send callback PtlMEUnlink returned %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
}
|
}
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
add++;
|
add++;
|
||||||
}
|
}
|
||||||
val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add);
|
val = OPAL_THREAD_ADD32((int32_t*)&ptl_request->event_count, add);
|
||||||
|
|
||||||
assert(val <= 3);
|
assert(val <= 3);
|
||||||
|
|
||||||
if (val == 3) {
|
if (val == 3) {
|
||||||
@ -191,6 +244,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
|||||||
|
|
||||||
MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length,
|
MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length,
|
||||||
(MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0);
|
(MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0);
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
|
|
||||||
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
|
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
|
||||||
me.start = NULL;
|
me.start = NULL;
|
||||||
@ -217,6 +271,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
|||||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
"%s:%d: PtlMEAppend failed: %d",
|
"%s:%d: PtlMEAppend failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
return ompi_mtl_portals4_get_error(ret);
|
return ompi_mtl_portals4_get_error(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -225,7 +280,6 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
|||||||
ptl_request->opcount, hdr_data, match_bits));
|
ptl_request->opcount, hdr_data, match_bits));
|
||||||
} else {
|
} else {
|
||||||
ptl_request->event_count = 1;
|
ptl_request->event_count = 1;
|
||||||
ptl_request->me_h = PTL_INVALID_HANDLE;
|
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||||
"Send %lu short send with hdr_data 0x%lx (0x%lx)",
|
"Send %lu short send with hdr_data 0x%lx (0x%lx)",
|
||||||
@ -236,6 +290,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
|||||||
"Send %lu, start: %p",
|
"Send %lu, start: %p",
|
||||||
ptl_request->opcount, start));
|
ptl_request->opcount, start));
|
||||||
|
|
||||||
|
ptl_request->pending_get = 0;
|
||||||
ret = PtlPut(ompi_mtl_portals4.send_md_h,
|
ret = PtlPut(ompi_mtl_portals4.send_md_h,
|
||||||
(ptl_size_t) start,
|
(ptl_size_t) start,
|
||||||
length,
|
length,
|
||||||
@ -252,6 +307,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
|
|||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
|
if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
|
||||||
PtlMEUnlink(ptl_request->me_h);
|
PtlMEUnlink(ptl_request->me_h);
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
}
|
}
|
||||||
return ompi_mtl_portals4_get_error(ret);
|
return ompi_mtl_portals4_get_error(ret);
|
||||||
}
|
}
|
||||||
@ -283,7 +339,6 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
|
|||||||
me.uid = ompi_mtl_portals4.uid;
|
me.uid = ompi_mtl_portals4.uid;
|
||||||
me.options =
|
me.options =
|
||||||
PTL_ME_OP_GET |
|
PTL_ME_OP_GET |
|
||||||
PTL_ME_USE_ONCE |
|
|
||||||
PTL_ME_EVENT_LINK_DISABLE |
|
PTL_ME_EVENT_LINK_DISABLE |
|
||||||
PTL_ME_EVENT_UNLINK_DISABLE;
|
PTL_ME_EVENT_UNLINK_DISABLE;
|
||||||
me.match_id = ptl_proc;
|
me.match_id = ptl_proc;
|
||||||
@ -307,8 +362,32 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
|
|||||||
"Send %lu long send with hdr_data 0x%lx (0x%lx)",
|
"Send %lu long send with hdr_data 0x%lx (0x%lx)",
|
||||||
ptl_request->opcount, hdr_data, match_bits));
|
ptl_request->opcount, hdr_data, match_bits));
|
||||||
|
|
||||||
put_length = (rndv == ompi_mtl_portals4.protocol) ?
|
if (rndv == ompi_mtl_portals4.protocol) {
|
||||||
(ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;
|
ptl_size_t min = (OPAL_LIKELY (ompi_mtl_portals4.eager_limit < ompi_mtl_portals4.max_msg_size_mtl)) ?
|
||||||
|
ompi_mtl_portals4.eager_limit :
|
||||||
|
ompi_mtl_portals4.max_msg_size_mtl;
|
||||||
|
if ((ptl_size_t) length > (ptl_size_t) min) {
|
||||||
|
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output,
|
||||||
|
"msg truncated by %ld", length - min));
|
||||||
|
put_length = (ptl_size_t) min;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
put_length = (ptl_size_t) length;
|
||||||
|
} else { // eager protocol
|
||||||
|
if (length > ompi_mtl_portals4.max_msg_size_mtl)
|
||||||
|
put_length = (ptl_size_t) ompi_mtl_portals4.max_msg_size_mtl;
|
||||||
|
else
|
||||||
|
put_length = (ptl_size_t) length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We have to wait for some GET events.
|
||||||
|
If the first put falls in overflow list, the number of GET event is egal to:
|
||||||
|
(length - 1) / ompi_mtl_portals4.max_msg_size_mtl + 1
|
||||||
|
else we will re-calculate this number when we received the first ACK event (with remote overflow list)
|
||||||
|
*/
|
||||||
|
|
||||||
|
ptl_request->pending_get = (length - 1) / ompi_mtl_portals4.max_msg_size_mtl + 1;
|
||||||
|
OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "pending_get=%d", ptl_request->pending_get));
|
||||||
|
|
||||||
ret = PtlPut(ompi_mtl_portals4.send_md_h,
|
ret = PtlPut(ompi_mtl_portals4.send_md_h,
|
||||||
(ptl_size_t) start,
|
(ptl_size_t) start,
|
||||||
@ -324,7 +403,8 @@ ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
|
|||||||
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
|
||||||
"%s:%d: PtlPut failed: %d",
|
"%s:%d: PtlPut failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
PtlMEUnlink(ptl_request->me_h);
|
PtlMEUnlink(ptl_request->me_h);
|
||||||
|
ptl_request->me_h = PTL_INVALID_HANDLE;
|
||||||
return ompi_mtl_portals4_get_error(ret);
|
return ompi_mtl_portals4_get_error(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -355,7 +435,7 @@ ompi_mtl_portals4_pending_list_progress()
|
|||||||
}
|
}
|
||||||
|
|
||||||
pending = (ompi_mtl_portals4_pending_request_t*) item;
|
pending = (ompi_mtl_portals4_pending_request_t*) item;
|
||||||
if (pending->length <= ompi_mtl_portals4.eager_limit) {
|
if (pending->length <= ompi_mtl_portals4.short_limit) {
|
||||||
ret = ompi_mtl_portals4_short_isend(pending->mode,
|
ret = ompi_mtl_portals4_short_isend(pending->mode,
|
||||||
pending->start,
|
pending->start,
|
||||||
pending->length,
|
pending->length,
|
||||||
@ -414,6 +494,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
|||||||
|
|
||||||
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1);
|
ptl_request->opcount = OPAL_THREAD_ADD64((int64_t*)&ompi_mtl_portals4.opcount, 1);
|
||||||
ptl_request->buffer_ptr = (free_after) ? start : NULL;
|
ptl_request->buffer_ptr = (free_after) ? start : NULL;
|
||||||
|
ptl_request->length = length;
|
||||||
ptl_request->event_count = 0;
|
ptl_request->event_count = 0;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
|
||||||
@ -461,7 +542,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl,
|
|||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (length <= ompi_mtl_portals4.eager_limit) {
|
if (length <= ompi_mtl_portals4.short_limit) {
|
||||||
ret = ompi_mtl_portals4_short_isend(mode,
|
ret = ompi_mtl_portals4_short_isend(mode,
|
||||||
start,
|
start,
|
||||||
length,
|
length,
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user