1
1

Add some debugging output and fix some places where the output id and

verbosity level were swapped

This commit was SVN r24740.
Этот коммит содержится в:
Brian Barrett 2011-06-01 17:20:18 +00:00
родитель 8f401a0563
Коммит b778d785fb
4 изменённых файлов: 61 добавлений и 14 удалений

Просмотреть файл

@ -21,6 +21,8 @@
#include <portals4.h> #include <portals4.h>
#include "orte/util/name_fns.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/mtl/mtl.h" #include "ompi/mca/mtl/mtl.h"
#include "opal/class/opal_list.h" #include "opal/class/opal_list.h"
#include "ompi/runtime/ompi_module_exchange.h" #include "ompi/runtime/ompi_module_exchange.h"
@ -67,6 +69,16 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
ptl_process_t *id; ptl_process_t *id;
size_t size; size_t size;
if (procs[i]->proc_arch != ompi_proc_local()->proc_arch) {
opal_output(ompi_mtl_base_output,
"Portals 4 MTL does not support heterogeneous operations.");
opal_output(ompi_mtl_base_output,
"Proc %s architecture %x, mine %x.",
ORTE_NAME_PRINT(&procs[i]->proc_name),
procs[i]->proc_arch, ompi_proc_local()->proc_arch);
return OMPI_ERROR;
}
mtl_peer_data[i] = malloc(sizeof(struct mca_mtl_base_endpoint_t)); mtl_peer_data[i] = malloc(sizeof(struct mca_mtl_base_endpoint_t));
if (NULL == mtl_peer_data[i]) { if (NULL == mtl_peer_data[i]) {
PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h); PtlMEUnlink(ompi_mtl_portals4.long_overflow_me_h);
@ -97,6 +109,11 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
mtl_peer_data[i]->ptl_proc = *id; mtl_peer_data[i]->ptl_proc = *id;
mtl_peer_data[i]->send_count = 0; mtl_peer_data[i]->send_count = 0;
mtl_peer_data[i]->recv_count = 0; mtl_peer_data[i]->recv_count = 0;
opal_output_verbose(25, ompi_mtl_base_output,
"Peer %d: %x,%x", (int) i,
(int) mtl_peer_data[i]->ptl_proc.phys.nid,
(int) mtl_peer_data[i]->ptl_proc.phys.pid);
} }
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -159,7 +176,7 @@ ompi_mtl_portals4_progress(void)
while (true) { while (true) {
ret = PtlEQGet(ompi_mtl_portals4.eq_h, &ev); ret = PtlEQGet(ompi_mtl_portals4.eq_h, &ev);
if (PTL_OK == ret) { if (PTL_OK == ret) {
OPAL_OUTPUT_VERBOSE((ompi_mtl_base_output, 50, OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"Found event of type %d\n", ev.type)); "Found event of type %d\n", ev.type));
switch (ev.type) { switch (ev.type) {
case PTL_EVENT_GET: case PTL_EVENT_GET:

Просмотреть файл

@ -123,6 +123,18 @@ ompi_mtl_portals4_component_open(void)
return OMPI_ERR_NOT_SUPPORTED; return OMPI_ERR_NOT_SUPPORTED;
} }
opal_output_verbose(1, ompi_mtl_base_output,
"Eager limit: %d", (int) ompi_mtl_portals4.eager_limit);
opal_output_verbose(1, ompi_mtl_base_output,
"Short receive blocks: %d", ompi_mtl_portals4.recv_short_num);
opal_output_verbose(1, ompi_mtl_base_output,
"Queue size: %d", ompi_mtl_portals4.queue_size);
opal_output_verbose(1, ompi_mtl_base_output,
"Long protocol: %s",
(ompi_mtl_portals4.protocol == eager) ? "Eager" :
(ompi_mtl_portals4.protocol == rndv) ? "Rendezvous" :
(ompi_mtl_portals4.protocol == triggered) ? "Triggered" : "Other");
ompi_mtl_portals4.ni_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.ni_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.eq_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.eq_h = PTL_INVALID_HANDLE;
ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE; ompi_mtl_portals4.zero_md_h = PTL_INVALID_HANDLE;

Просмотреть файл

@ -129,6 +129,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR; ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
} }
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
ptl_request->super.completion_callback(&ptl_request->super); ptl_request->super.completion_callback(&ptl_request->super);
break; break;
@ -162,6 +163,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
if (ompi_mtl_portals4.protocol == triggered) { if (ompi_mtl_portals4.protocol == triggered) {
PtlCTFree(ptl_request->ct_h); PtlCTFree(ptl_request->ct_h);
} }
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
ptl_request->super.completion_callback(&ptl_request->super); ptl_request->super.completion_callback(&ptl_request->super);
break; break;
@ -230,6 +232,7 @@ ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR; ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
} }
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv completed"));
ptl_request->super.completion_callback(&ptl_request->super); ptl_request->super.completion_callback(&ptl_request->super);
} else { } else {
@ -338,7 +341,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
remote_proc.phys.nid = PTL_NID_ANY; remote_proc.phys.nid = PTL_NID_ANY;
remote_proc.phys.pid = PTL_PID_ANY; remote_proc.phys.pid = PTL_PID_ANY;
if (ompi_mtl_portals4.protocol == triggered) { if (ompi_mtl_portals4.protocol == triggered) {
printf("Brian broke any_source\n"); abort(); printf("Brian broke any_source with triggered rndv\n"); abort();
} }
} else { } else {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src );
@ -365,6 +368,12 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl,
ptl_request->delivery_len = length; ptl_request->delivery_len = length;
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS; ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_SUCCESS;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"Recv %d from %x,%x of length %d\n",
endpoint->recv_count,
endpoint->ptl_proc.phys.nid, endpoint->ptl_proc.phys.pid,
(int)length));
if (ompi_mtl_portals4.protocol == triggered && length > ompi_mtl_portals4.eager_limit) { if (ompi_mtl_portals4.protocol == triggered && length > ompi_mtl_portals4.eager_limit) {
ptl_md_t md; ptl_md_t md;

Просмотреть файл

@ -37,7 +37,7 @@ ompi_mtl_portals4_short_callback(ptl_event_t *ev, ompi_mtl_portals4_request_t *p
assert(NULL != ptl_request->super.ompi_req); assert(NULL != ptl_request->super.ompi_req);
if (ev->ni_fail_type != PTL_NI_OK) { if (ev->ni_fail_type != PTL_NI_OK) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: short send callback ni_fail_type: %d", "%s:%d: short send callback ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type); __FILE__, __LINE__, ev->ni_fail_type);
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR; ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
@ -46,6 +46,7 @@ ompi_mtl_portals4_short_callback(ptl_event_t *ev, ompi_mtl_portals4_request_t *p
free(ptl_request->buffer_ptr); free(ptl_request->buffer_ptr);
} }
PtlMDRelease(ptl_request->md_h); PtlMDRelease(ptl_request->md_h);
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
ptl_request->super.completion_callback(&ptl_request->super); ptl_request->super.completion_callback(&ptl_request->super);
return OMPI_SUCCESS; return OMPI_SUCCESS;
@ -60,7 +61,7 @@ ompi_mtl_portals4_long_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
assert(NULL != ptl_request->super.ompi_req); assert(NULL != ptl_request->super.ompi_req);
if (ev->ni_fail_type != PTL_NI_OK) { if (ev->ni_fail_type != PTL_NI_OK) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: long send callback ni_fail_type: %d", "%s:%d: long send callback ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type); __FILE__, __LINE__, ev->ni_fail_type);
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR; ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
@ -74,6 +75,7 @@ ompi_mtl_portals4_long_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
free(ptl_request->buffer_ptr); free(ptl_request->buffer_ptr);
} }
PtlMDRelease(ptl_request->md_h); PtlMDRelease(ptl_request->md_h);
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
ptl_request->super.completion_callback(&ptl_request->super); ptl_request->super.completion_callback(&ptl_request->super);
} }
@ -94,7 +96,7 @@ ompi_mtl_portals4_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
assert(NULL != ptl_request->super.ompi_req); assert(NULL != ptl_request->super.ompi_req);
if (ev->ni_fail_type != PTL_NI_OK) { if (ev->ni_fail_type != PTL_NI_OK) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: sync send callback ni_fail_type: %d", "%s:%d: sync send callback ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type); __FILE__, __LINE__, ev->ni_fail_type);
ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR; ptl_request->super.ompi_req->req_status.MPI_ERROR = OMPI_ERROR;
@ -108,6 +110,7 @@ ompi_mtl_portals4_sync_callback(ptl_event_t *ev, struct ompi_mtl_portals4_reques
free(ptl_request->buffer_ptr); free(ptl_request->buffer_ptr);
} }
PtlMDRelease(ptl_request->md_h); PtlMDRelease(ptl_request->md_h);
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "send completed"));
ptl_request->super.completion_callback(&ptl_request->super); ptl_request->super.completion_callback(&ptl_request->super);
} }
@ -144,7 +147,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int le
&md, &md,
&ptl_request->md_h); &ptl_request->md_h);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d", "%s:%d: PtlMDBind failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
return ompi_mtl_portals4_get_error(ret); return ompi_mtl_portals4_get_error(ret);
@ -161,7 +164,7 @@ ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode, void *start, int le
ptl_request, ptl_request,
0); 0);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d", "%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
PtlMDRelease(ptl_request->md_h); PtlMDRelease(ptl_request->md_h);
@ -196,7 +199,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
&md, &md,
&ptl_request->md_h); &ptl_request->md_h);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d", "%s:%d: PtlMDBind failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
return ompi_mtl_portals4_get_error(ret); return ompi_mtl_portals4_get_error(ret);
@ -223,7 +226,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
ptl_request, ptl_request,
&ptl_request->me_h); &ptl_request->me_h);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d", "%s:%d: PtlMEAppend failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
PtlMDRelease(ptl_request->md_h); PtlMDRelease(ptl_request->md_h);
@ -265,7 +268,7 @@ ompi_mtl_portals4_long_isend(void *start, int length, int contextid, int localra
me.match_bits); me.match_bits);
} }
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d", "%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
PtlMEUnlink(ptl_request->me_h); PtlMEUnlink(ptl_request->me_h);
@ -300,7 +303,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
&md, &md,
&ptl_request->md_h); &ptl_request->md_h);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMDBind failed: %d", "%s:%d: PtlMDBind failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
return ompi_mtl_portals4_get_error(ret); return ompi_mtl_portals4_get_error(ret);
@ -323,7 +326,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
ptl_request, ptl_request,
&ptl_request->me_h); &ptl_request->me_h);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlMEAppend failed: %d", "%s:%d: PtlMEAppend failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
PtlMDRelease(ptl_request->md_h); PtlMDRelease(ptl_request->md_h);
@ -341,7 +344,7 @@ ompi_mtl_portals4_sync_isend(void *start, int length, int contextid, int localra
ptl_request, ptl_request,
(ptl_hdr_data_t)(uintptr_t)ptl_request); (ptl_hdr_data_t)(uintptr_t)ptl_request);
if (PTL_OK != ret) { if (PTL_OK != ret) {
opal_output_verbose(ompi_mtl_base_output, 1, opal_output_verbose(1, ompi_mtl_base_output,
"%s:%d: PtlPut failed: %d", "%s:%d: PtlPut failed: %d",
__FILE__, __LINE__, ret); __FILE__, __LINE__, ret);
PtlMEUnlink(ptl_request->me_h); PtlMEUnlink(ptl_request->me_h);
@ -381,6 +384,12 @@ ompi_mtl_portals4_isend(struct mca_mtl_base_module_t* mtl,
endpoint->send_count++; endpoint->send_count++;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
"Send %d to %x,%x of length %d\n",
endpoint->send_count,
endpoint->ptl_proc.phys.nid, endpoint->ptl_proc.phys.pid,
(int)length));
switch (mode) { switch (mode) {
case MCA_PML_BASE_SEND_STANDARD: case MCA_PML_BASE_SEND_STANDARD:
case MCA_PML_BASE_SEND_READY: case MCA_PML_BASE_SEND_READY: