Add some debugging output and fix some places where the output id and
verbosity level were swapped This commit was SVN r24740.
Этот коммит содержится в:
родитель
8f401a0563
Коммит
b778d785fb
@ -21,6 +21,8 @@
|
||||
|
||||
#include <portals4.h>
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
@ -67,6 +69,16 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
ptl_process_t *id;
|
||||
size_t size;
|
||||
|
||||
if (procs[i]->proc_arch != ompi_proc_local()->proc_arch) {
|
||||
opal_output(ompi_mtl_base_output,
|
||||
"Portals 4 MTL does not support heterogeneous operations.");
|
||||
opal_output(ompi_mtl_base_output,
|
||||
"Proc %s architecture %x, mine %x.",
|
||||
ORTE_NAME_PRINT(&procs[i]->proc_name),
|
||||
procs[i]->proc_arch, ompi_proc_local()->proc_arch);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mtl_peer_data[i] = malloc(sizeof(struct mca_mtl_base_endpoint_t));
|
||||
if (NULL == mtl_peer_data[i]) {
|
||||
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
|
||||
@ -93,10 +105,15 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
|
||||
__FILE__, __LINE__, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
mtl_peer_data[i]->ptl_proc = *id;
|
||||
mtl_peer_data[i]->send_count = 0;
|
||||
mtl_peer_data[i]->recv_count = 0;
|
||||
|
||||
opal_output_verbose(25, ompi_mtl_base_output,
|
||||
"Peer %d: %x,%x", (int) i,
|
||||
(int) mtl_peer_data[i]->ptl_proc.phys.nid,
|
||||
(int) mtl_peer_data[i]->ptl_proc.phys.pid);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -159,7 +176,7 @@ ompi_mtl_portals4_progress(void)
|
||||
while (true) {
|
||||
ret = PtlEQGet(ompi_mtl_portals4.eq_h, &ev);
|
||||
if (PTL_OK == ret) {
|
||||
OPAL_OUTPUT_VERBOSE((ompi_mtl_base_output, 50,
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||
"Found event of type %d\n", ev.type));
|
||||
switch (ev.type) {
|
||||
case PTL_EVENT_GET:
|
||||
|
@ -123,6 +123,18 @@ ompi_mtl_portals4_component_open(void)
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"Eager limit: %d", (int) ompi_mtl_portals4.eager_limit);
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"Short receive blocks: %d", ompi_mtl_portals4.recv_short_num);
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"Queue size: %d", ompi_mtl_portals4.queue_size);
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"Long protocol: %s",
|
||||
(ompi_mtl_portals4.protocol == eager) ? "Eager" :
|
||||
(ompi_mtl_portals4.protocol == rndv) ? "Rendezvous" :
|
||||
(ompi_mtl_portals4.protocol == triggered) ? "Triggered" : "Other");
|
||||
|
||||
ompi_mtl_portals4.ni_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.eq_h = PTL_INVALID_HANDLE;
|
||||
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;
|
||||
|
@ -129,6 +129,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
||||
__FILE__, __LINE__, ret);
|
||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
|
||||
ptl_request->super.completion_callback(&ptl_request->super);
|
||||
break;
|
||||
|
||||
@ -162,6 +163,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
||||
if (ompi_mtl_portals4.protocol == triggered) {
|
||||
PtlCTFree(ptl_request->ct_h);
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
|
||||
ptl_request->super.completion_callback(&ptl_request->super);
|
||||
break;
|
||||
|
||||
@ -230,6 +232,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
|
||||
__FILE__, __LINE__, ret);
|
||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
|
||||
ptl_request->super.completion_callback(&ptl_request->super);
|
||||
|
||||
} else {
|
||||
@ -338,7 +341,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
remote_proc.phys.nid = PTL_NID_ANY;
|
||||
remote_proc.phys.pid = PTL_PID_ANY;
|
||||
if (ompi_mtl_portals4.protocol == triggered) {
|
||||
printf("Brian broke any_source\n"); abort();
|
||||
printf("Brian broke any_source with triggered rndv\n"); abort();
|
||||
}
|
||||
} else {
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
|
||||
@ -365,6 +368,12 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
ptl_request->delivery_len = length;
|
||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||
"Recv %d from %x,%x of length %d\n",
|
||||
endpoint->recv_count,
|
||||
endpoint->ptl_proc.phys.nid, endpoint->ptl_proc.phys.pid,
|
||||
(int)length));
|
||||
|
||||
if (ompi_mtl_portals4.protocol == triggered && length > ompi_mtl_portals4.eager_limit) {
|
||||
ptl_md_t md;
|
||||
|
||||
|
@ -37,7 +37,7 @@ ompi_mtl_portals4_short_callback(ptl_event_t *ev, ompi_mtl_portals4_request_t *p
|
||||
assert(NULL != ptl_request->super.ompi_req);
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: short send callback ni_fail_type: %d",
|
||||
__FILE__, __LINE__, ev->ni_fail_type);
|
||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||
@ -46,6 +46,7 @@ ompi_mtl_portals4_short_callback(ptl_event_t *ev, ompi_mtl_portals4_request_t *p
|
||||
free(ptl_request->buffer_ptr);
|
||||
}
|
||||
PtlMDRelease(ptl_request->md_h);
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
|
||||
ptl_request->super.completion_callback(&ptl_request->super);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
@ -60,7 +61,7 @@ ompi_mtl_portals4_long_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
||||
assert(NULL != ptl_request->super.ompi_req);
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: long send callback ni_fail_type: %d",
|
||||
__FILE__, __LINE__, ev->ni_fail_type);
|
||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||
@ -74,6 +75,7 @@ ompi_mtl_portals4_long_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
||||
free(ptl_request->buffer_ptr);
|
||||
}
|
||||
PtlMDRelease(ptl_request->md_h);
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
|
||||
ptl_request->super.completion_callback(&ptl_request->super);
|
||||
}
|
||||
|
||||
@ -94,7 +96,7 @@ ompi_mtl_portals4_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
||||
assert(NULL != ptl_request->super.ompi_req);
|
||||
|
||||
if (ev->ni_fail_type != PTL_NI_OK) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: sync send callback ni_fail_type: %d",
|
||||
__FILE__, __LINE__, ev->ni_fail_type);
|
||||
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
|
||||
@ -108,6 +110,7 @@ ompi_mtl_portals4_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
|
||||
free(ptl_request->buffer_ptr);
|
||||
}
|
||||
PtlMDRelease(ptl_request->md_h);
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
|
||||
ptl_request->super.completion_callback(&ptl_request->super);
|
||||
}
|
||||
|
||||
@ -144,7 +147,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int le
|
||||
&md,
|
||||
&ptl_request->md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMDBind failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
@ -161,7 +164,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int le
|
||||
ptl_request,
|
||||
0);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlPut failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
PtlMDRelease(ptl_request->md_h);
|
||||
@ -196,7 +199,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
|
||||
&md,
|
||||
&ptl_request->md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMDBind failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
@ -223,7 +226,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
|
||||
ptl_request,
|
||||
&ptl_request->me_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMEAppend failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
PtlMDRelease(ptl_request->md_h);
|
||||
@ -265,7 +268,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
|
||||
me.match_bits);
|
||||
}
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlPut failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
@ -300,7 +303,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
|
||||
&md,
|
||||
&ptl_request->md_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMDBind failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
return ompi_mtl_portals4_get_error(ret);
|
||||
@ -323,7 +326,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
|
||||
ptl_request,
|
||||
&ptl_request->me_h);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlMEAppend failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
PtlMDRelease(ptl_request->md_h);
|
||||
@ -341,7 +344,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
|
||||
ptl_request,
|
||||
(ptl_hdr_data_t)(uintptr_t)ptl_request);
|
||||
if (PTL_OK != ret) {
|
||||
opal_output_verbose(ompi_mtl_base_output, 1,
|
||||
opal_output_verbose(1, ompi_mtl_base_output,
|
||||
"%s:%d: PtlPut failed: %d",
|
||||
__FILE__, __LINE__, ret);
|
||||
PtlMEUnlink(ptl_request->me_h);
|
||||
@ -381,6 +384,12 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
|
||||
|
||||
endpoint->send_count++;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
|
||||
"Send %d to %x,%x of length %d\n",
|
||||
endpoint->send_count,
|
||||
endpoint->ptl_proc.phys.nid, endpoint->ptl_proc.phys.pid,
|
||||
(int)length));
|
||||
|
||||
switch (mode) {
|
||||
case MCA_PML_BASE_SEND_STANDARD:
|
||||
case MCA_PML_BASE_SEND_READY:
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user