From 17846411c328f48a6f3416b760e9599d7f9dd51f Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 23 Sep 2014 22:51:10 +0000 Subject: [PATCH] Now that we have an ORTE thread running in apps, we can't just call "exit" during RTE abort as that is happening in a thread, and (at least in some environments) doesn't result in the main thread being immediately terminated. Instead, we wind up going thru orte_finalize in the main thread, which isn't what we want. So replace the call to "exit" with the "quick exit" variant "_exit", which causes the entire process to exit immediately. (custom patch has been posted for 1.8.3) This commit was SVN r32780. --- orte/mca/ess/base/ess_base_std_app.c | 6 +++--- orte/mca/ess/pmi/ess_pmi_module.c | 12 +++++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index c3d80c1200..66971c3b32 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -418,7 +418,7 @@ void orte_ess_base_app_abort(int status, bool report) { int fd; char *myfile; - struct timespec tp = {0, 100000}; \ + struct timespec tp = {0, 100000}; /* Exit - do NOT do a normal finalize as this will very likely * hang the process. We are aborting due to an abnormal condition @@ -445,7 +445,7 @@ void orte_ess_base_app_abort(int status, bool report) /* now introduce a short delay to allow any pending * messages (e.g., from a call to "show_help") to * have a chance to be sent */ - nanosleep(&tp, NULL); \ + nanosleep(&tp, NULL); } /* - Clean out the global structures @@ -453,7 +453,7 @@ void orte_ess_base_app_abort(int status, bool report) orte_proc_info_finalize(); /* Now Exit */ - exit(status); + _exit(status); } static void* orte_progress_thread_engine(opal_object_t *obj) diff --git a/orte/mca/ess/pmi/ess_pmi_module.c b/orte/mca/ess/pmi/ess_pmi_module.c index 5b6c4185e0..129bf668ba 100644 --- a/orte/mca/ess/pmi/ess_pmi_module.c +++ b/orte/mca/ess/pmi/ess_pmi_module.c @@ -418,6 +418,8 @@ static int rte_finalize(void) static void rte_abort(int status, bool report) { + struct timespec tp = {0, 100000}; + OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, "%s ess:pmi:abort: abort with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), @@ -428,10 +430,10 @@ static void rte_abort(int status, bool report) */ opal_pmix.abort(status, "N/A"); - /* - Clean out the global structures - * (not really necessary, but good practice) */ - orte_proc_info_finalize(); - + /* provide a little delay for the PMIx thread to + * get the info out */ + nanosleep(&tp, NULL); + /* Now Exit */ - exit(status); + _exit(status); }