Add some debugging output and fix some places where the output id and
verbosity level were swapped This commit was SVN r24740.
Этот коммит содержится в:
родитель
8f401a0563
Коммит
b778d785fb
@ -21,6 +21,8 @@
|
|||||||
|
|
||||||
#include <portals4.h>
|
#include <portals4.h>
|
||||||
|
|
||||||
|
#include "orte/util/name_fns.h"
|
||||||
|
#include "ompi/proc/proc.h"
|
||||||
#include "ompi/mca/mtl/mtl.h"
|
#include "ompi/mca/mtl/mtl.h"
|
||||||
#include "opal/class/opal_list.h"
|
#include "opal/class/opal_list.h"
|
||||||
#include "ompi/runtime/ompi_module_exchange.h"
|
#include "ompi/runtime/ompi_module_exchange.h"
|
||||||
@ -67,6 +69,16 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
ptl_process_t *id;
|
ptl_process_t *id;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
||||||
|
if (procs[i]->proc_arch != ompi_proc_local()->proc_arch) {
|
||||||
|
opal_output(ompi_mtl_base_output,
|
||||||
|
"Portals 4 MTL does not support heterogeneous operations.");
|
||||||
|
opal_output(ompi_mtl_base_output,
|
||||||
|
"Proc %s architecture %x, mine %x.",
|
||||||
|
ORTE_NAME_PRINT(&procs[i]->proc_name),
|
||||||
|
procs[i]->proc_arch, ompi_proc_local()->proc_arch);
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
mtl_peer_data[i] = malloc(sizeof(struct mca_mtl_base_endpoint_t));
|
mtl_peer_data[i] = malloc(sizeof(struct mca_mtl_base_endpoint_t));
|
||||||
if (NULL == mtl_peer_data[i]) {
|
if (NULL == mtl_peer_data[i]) {
|
||||||
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
||||||
@ -97,6 +109,11 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
|||||||
mtl_peer_data[i]->ptl_proc = *id;
|
mtl_peer_data[i]->ptl_proc = *id;
|
||||||
mtl_peer_data[i]->send_count = 0;
|
mtl_peer_data[i]->send_count = 0;
|
||||||
mtl_peer_data[i]->recv_count = 0;
|
mtl_peer_data[i]->recv_count = 0;
|
||||||
|
|
||||||
|
opal_output_verbose(25, ompi_mtl_base_output,
|
||||||
|
"Peer %d: %x,%x", (int) i,
|
||||||
|
(int) mtl_peer_data[i]->ptl_proc.phys.nid,
|
||||||
|
(int) mtl_peer_data[i]->ptl_proc.phys.pid);
|
||||||
}
|
}
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -159,7 +176,7 @@ ompi_mtl_portals4_progress(void)
|
|||||||
while (true) {
|
while (true) {
|
||||||
ret = PtlEQGet(ompi_mtl_portals4.eq_h, &ev);
|
ret = PtlEQGet(ompi_mtl_portals4.eq_h, &ev);
|
||||||
if (PTL_OK == ret) {
|
if (PTL_OK == ret) {
|
||||||
OPAL_OUTPUT_VERBOSE((ompi_mtl_base_output, 50,
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||||
"Found event of type %d\n", ev.type));
|
"Found event of type %d\n", ev.type));
|
||||||
switch (ev.type) {
|
switch (ev.type) {
|
||||||
case PTL_EVENT_GET:
|
case PTL_EVENT_GET:
|
||||||
|
@ -123,6 +123,18 @@ ompi_mtl_portals4_component_open(void)
|
|||||||
return OMPI_ERR_NOT_SUPPORTED;
|
return OMPI_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
|
"Eager limit: %d", (int) ompi_mtl_portals4.eager_limit);
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
|
"Short receive blocks: %d", ompi_mtl_portals4.recv_short_num);
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
|
"Queue size: %d", ompi_mtl_portals4.queue_size);
|
||||||
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
|
"Long protocol: %s",
|
||||||
|
(ompi_mtl_portals4.protocol == eager) ? "Eager" :
|
||||||
|
(ompi_mtl_portals4.protocol == rndv) ? "Rendezvous" :
|
||||||
|
(ompi_mtl_portals4.protocol == triggered) ? "Triggered" : "Other");
|
||||||
|
|
||||||
ompi_mtl_portals4.ni_h = PTL_INVALID_HANDLE;
|
ompi_mtl_portals4.ni_h = PTL_INVALID_HANDLE;
|
||||||
ompi_mtl_portals4.eq_h = PTL_INVALID_HANDLE;
|
ompi_mtl_portals4.eq_h = PTL_INVALID_HANDLE;
|
||||||
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
|
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
|
||||||
|
@ -129,6 +129,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -162,6 +163,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
if (ompi_mtl_portals4.protocol == triggered) {
|
if (ompi_mtl_portals4.protocol == triggered) {
|
||||||
PtlCTFree(ptl_request->ct_h);
|
PtlCTFree(ptl_request->ct_h);
|
||||||
}
|
}
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -230,6 +232,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
|||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -338,7 +341,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
remote_proc.phys.nid = PTL_NID_ANY;
|
remote_proc.phys.nid = PTL_NID_ANY;
|
||||||
remote_proc.phys.pid = PTL_PID_ANY;
|
remote_proc.phys.pid = PTL_PID_ANY;
|
||||||
if (ompi_mtl_portals4.protocol == triggered) {
|
if (ompi_mtl_portals4.protocol == triggered) {
|
||||||
printf("Brian broke any_source\n"); abort();
|
printf("Brian broke any_source with triggered rndv\n"); abort();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
|
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||||
@ -365,6 +368,12 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
|||||||
ptl_request->delivery_len = length;
|
ptl_request->delivery_len = length;
|
||||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||||
|
"Recv %d from %x,%x of length %d\n",
|
||||||
|
endpoint->recv_count,
|
||||||
|
endpoint->ptl_proc.phys.nid, endpoint->ptl_proc.phys.pid,
|
||||||
|
(int)length));
|
||||||
|
|
||||||
if (ompi_mtl_portals4.protocol == triggered && length > ompi_mtl_portals4.eager_limit) {
|
if (ompi_mtl_portals4.protocol == triggered && length > ompi_mtl_portals4.eager_limit) {
|
||||||
ptl_md_t md;
|
ptl_md_t md;
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ ompi_mtl_portals4_short_callback(ptl_event_t *ev, ompi_mtl_portals4_request_t *p
|
|||||||
assert(NULL != ptl_request->super.ompi_req);
|
assert(NULL != ptl_request->super.ompi_req);
|
||||||
|
|
||||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: short send callback ni_fail_type: %d",
|
"%s:%d: short send callback ni_fail_type: %d",
|
||||||
__FILE__, __LINE__, ev->ni_fail_type);
|
__FILE__, __LINE__, ev->ni_fail_type);
|
||||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||||
@ -46,6 +46,7 @@ ompi_mtl_portals4_short_callback(ptl_event_t *ev, ompi_mtl_portals4_request_t *p
|
|||||||
free(ptl_request->buffer_ptr);
|
free(ptl_request->buffer_ptr);
|
||||||
}
|
}
|
||||||
PtlMDRelease(ptl_request->md_h);
|
PtlMDRelease(ptl_request->md_h);
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -60,7 +61,7 @@ ompi_mtl_portals4_long_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
|||||||
assert(NULL != ptl_request->super.ompi_req);
|
assert(NULL != ptl_request->super.ompi_req);
|
||||||
|
|
||||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: long send callback ni_fail_type: %d",
|
"%s:%d: long send callback ni_fail_type: %d",
|
||||||
__FILE__, __LINE__, ev->ni_fail_type);
|
__FILE__, __LINE__, ev->ni_fail_type);
|
||||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||||
@ -74,6 +75,7 @@ ompi_mtl_portals4_long_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
|||||||
free(ptl_request->buffer_ptr);
|
free(ptl_request->buffer_ptr);
|
||||||
}
|
}
|
||||||
PtlMDRelease(ptl_request->md_h);
|
PtlMDRelease(ptl_request->md_h);
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,7 +96,7 @@ ompi_mtl_portals4_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
|||||||
assert(NULL != ptl_request->super.ompi_req);
|
assert(NULL != ptl_request->super.ompi_req);
|
||||||
|
|
||||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: sync send callback ni_fail_type: %d",
|
"%s:%d: sync send callback ni_fail_type: %d",
|
||||||
__FILE__, __LINE__, ev->ni_fail_type);
|
__FILE__, __LINE__, ev->ni_fail_type);
|
||||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||||
@ -108,6 +110,7 @@ ompi_mtl_portals4_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
|||||||
free(ptl_request->buffer_ptr);
|
free(ptl_request->buffer_ptr);
|
||||||
}
|
}
|
||||||
PtlMDRelease(ptl_request->md_h);
|
PtlMDRelease(ptl_request->md_h);
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
|
||||||
ptl_request->super.completion_callback(&ptl_request->super);
|
ptl_request->super.completion_callback(&ptl_request->super);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,7 +147,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int le
|
|||||||
&md,
|
&md,
|
||||||
&ptl_request->md_h);
|
&ptl_request->md_h);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlMDBind failed: %d",
|
"%s:%d: PtlMDBind failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
return ompi_mtl_portals4_get_error(ret);
|
return ompi_mtl_portals4_get_error(ret);
|
||||||
@ -161,7 +164,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int le
|
|||||||
ptl_request,
|
ptl_request,
|
||||||
0);
|
0);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlPut failed: %d",
|
"%s:%d: PtlPut failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
PtlMDRelease(ptl_request->md_h);
|
PtlMDRelease(ptl_request->md_h);
|
||||||
@ -196,7 +199,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
|
|||||||
&md,
|
&md,
|
||||||
&ptl_request->md_h);
|
&ptl_request->md_h);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlMDBind failed: %d",
|
"%s:%d: PtlMDBind failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
return ompi_mtl_portals4_get_error(ret);
|
return ompi_mtl_portals4_get_error(ret);
|
||||||
@ -223,7 +226,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
|
|||||||
ptl_request,
|
ptl_request,
|
||||||
&ptl_request->me_h);
|
&ptl_request->me_h);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlMEAppend failed: %d",
|
"%s:%d: PtlMEAppend failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
PtlMDRelease(ptl_request->md_h);
|
PtlMDRelease(ptl_request->md_h);
|
||||||
@ -265,7 +268,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
|
|||||||
me.match_bits);
|
me.match_bits);
|
||||||
}
|
}
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlPut failed: %d",
|
"%s:%d: PtlPut failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
PtlMEUnlink(ptl_request->me_h);
|
PtlMEUnlink(ptl_request->me_h);
|
||||||
@ -300,7 +303,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
|
|||||||
&md,
|
&md,
|
||||||
&ptl_request->md_h);
|
&ptl_request->md_h);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlMDBind failed: %d",
|
"%s:%d: PtlMDBind failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
return ompi_mtl_portals4_get_error(ret);
|
return ompi_mtl_portals4_get_error(ret);
|
||||||
@ -323,7 +326,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
|
|||||||
ptl_request,
|
ptl_request,
|
||||||
&ptl_request->me_h);
|
&ptl_request->me_h);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlMEAppend failed: %d",
|
"%s:%d: PtlMEAppend failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
PtlMDRelease(ptl_request->md_h);
|
PtlMDRelease(ptl_request->md_h);
|
||||||
@ -341,7 +344,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
|
|||||||
ptl_request,
|
ptl_request,
|
||||||
(ptl_hdr_data_t)(uintptr_t)ptl_request);
|
(ptl_hdr_data_t)(uintptr_t)ptl_request);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
opal_output_verbose(1, ompi_mtl_base_output,
|
||||||
"%s:%d: PtlPut failed: %d",
|
"%s:%d: PtlPut failed: %d",
|
||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
PtlMEUnlink(ptl_request->me_h);
|
PtlMEUnlink(ptl_request->me_h);
|
||||||
@ -381,6 +384,12 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
|
|||||||
|
|
||||||
endpoint->send_count++;
|
endpoint->send_count++;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||||
|
"Send %d to %x,%x of length %d\n",
|
||||||
|
endpoint->send_count,
|
||||||
|
endpoint->ptl_proc.phys.nid, endpoint->ptl_proc.phys.pid,
|
||||||
|
(int)length));
|
||||||
|
|
||||||
switch (mode) {
|
switch (mode) {
|
||||||
case MCA_PML_BASE_SEND_STANDARD:
|
case MCA_PML_BASE_SEND_STANDARD:
|
||||||
case MCA_PML_BASE_SEND_READY:
|
case MCA_PML_BASE_SEND_READY:
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user