From 47ed214458b985011cc0e88e02054eb92e9ece44 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 29 Nov 2016 19:17:38 -0800 Subject: [PATCH] Do not resend if max_retries is exceeded. Make a verbose output available to tell us where the intended message was to go. Signed-off-by: Ralph Castain --- orte/mca/oob/base/oob_base_stubs.c | 2 -- orte/mca/rml/base/rml_base_frame.c | 6 ++++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/orte/mca/oob/base/oob_base_stubs.c b/orte/mca/oob/base/oob_base_stubs.c index c6e92b8565..be1661285b 100644 --- a/orte/mca/oob/base/oob_base_stubs.c +++ b/orte/mca/oob/base/oob_base_stubs.c @@ -125,8 +125,6 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) ORTE_OOB_SEND(msg); return; } - ORTE_OOB_SEND(msg); - return; } msg->status = ORTE_ERR_ADDRESSEE_UNKNOWN; ORTE_RML_SEND_COMPLETE(msg); diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 8b381bb525..254d9fafdf 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -24,6 +24,7 @@ #include "opal/mca/base/mca_base_component_repository.h" #include "opal/util/output.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" #include "orte/mca/state/state.h" #include "orte/runtime/orte_wait.h" @@ -225,6 +226,11 @@ void orte_rml_send_callback(int status, orte_process_name_t *peer, { OBJ_RELEASE(buffer); if (ORTE_SUCCESS != status) { + opal_output_verbose(2, orte_rml_base_framework.framework_output, + "%s UNABLE TO SEND MESSAGE TO %s TAG %d: %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag, + ORTE_ERROR_NAME(status)); ORTE_ACTIVATE_PROC_STATE(peer, ORTE_PROC_STATE_UNABLE_TO_SEND_MSG); } }