diff --git a/orte/mca/routed/binomial/routed_binomial.c b/orte/mca/routed/binomial/routed_binomial.c index 74d56b9379..9aed118310 100644 --- a/orte/mca/routed/binomial/routed_binomial.c +++ b/orte/mca/routed/binomial/routed_binomial.c @@ -32,6 +32,7 @@ #include "orte/util/nidmap.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" +#include "orte/runtime/orte_quit.h" #include "orte/runtime/runtime.h" #include "orte/runtime/data_type_support/orte_dt_support.h" @@ -830,11 +831,22 @@ static int route_lost(const orte_process_name_t *route) item = opal_list_get_next(item)) { child = (orte_routed_tree_t*)item; if (child->vpid == route->vpid) { + OPAL_OUTPUT_VERBOSE((4, orte_routed_base_output, + "%s routed_binomial: removing route to child daemon %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(route))); opal_list_remove_item(&my_children, item); OBJ_RELEASE(item); return ORTE_SUCCESS; } } + /* if we are the HNP or daemon, AND we are terminating, + * then we want to finalize if all our child daemons + * have left + */ + if (orte_terminating && 0 == opal_list_get_size(&my_children)) { + orte_quit(); + } } /* we don't care about this one, so return success */ diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index af607a00ff..9f23ef5b26 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -743,6 +743,8 @@ int orte_daemon_process_commands(orte_process_name_t* sender, opal_output(0, "%s orted_cmd: received exit cmd", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); } + /* flag that we are terminating */ + orte_terminating = true; /* kill the local procs */ orte_odls.kill_local_procs(NULL); /* trigger our appropriate exit procedure diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 30d6c7581e..daf3100517 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -52,6 +52,7 @@ */ bool orte_initialized = false; bool orte_finalizing = false; +bool orte_terminating = false; bool orte_debug_flag = false; int orte_debug_verbosity; char *orte_prohibited_session_dirs = NULL; diff --git a/orte/runtime/orte_quit.c b/orte/runtime/orte_quit.c index 29bb060b99..dbbe35eb2f 100644 --- a/orte/runtime/orte_quit.c +++ b/orte/runtime/orte_quit.c @@ -143,6 +143,9 @@ void orte_quit(void) return; } + /* flag that we are finalizing */ + orte_finalizing = true; + /* whack any lingering session directory files from our jobs */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); diff --git a/orte/runtime/runtime.h b/orte/runtime/runtime.h index 60578d71d0..814fe2b3c0 100644 --- a/orte/runtime/runtime.h +++ b/orte/runtime/runtime.h @@ -43,6 +43,7 @@ ORTE_DECLSPEC extern const char orte_version_string[]; */ ORTE_DECLSPEC extern bool orte_initialized; ORTE_DECLSPEC extern bool orte_finalizing; +ORTE_DECLSPEC extern bool orte_terminating; ORTE_DECLSPEC extern int orte_debug_output; ORTE_DECLSPEC extern bool orte_debug_flag;