diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index 301a848241..853e767ed6 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -396,7 +396,8 @@ static void proc_errors(int fd, short args, void *cbdata) orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); } /* if the proc has terminated, notify the state machine */ - if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_COMPLETE) && + if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) && + ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID) && !ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_RECORDED)) { ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED); } @@ -493,7 +494,8 @@ static void proc_errors(int fd, short args, void *cbdata) orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); } /* if the proc has terminated, notify the state machine */ - if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_COMPLETE) && + if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) && + ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID) && !ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_RECORDED)) { ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED); } diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 9a56c5906a..66fc7519fd 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -1873,7 +1873,8 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs, /* check for everything complete - this will remove * the child object from our local list */ - if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_COMPLETE)) { + if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) && + ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID)) { ORTE_ACTIVATE_PROC_STATE(&child->name, child->state); } } diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index 3f2e3d9323..fc41826fcd 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -282,7 +282,7 @@ static void track_procs(int fd, short argc, void *cbdata) if (NULL != orte_iof.close) { orte_iof.close(proc, ORTE_IOF_STDIN); } - if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_COMPLETE) && + if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_WAITPID) && !ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) { ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED); } @@ -292,7 +292,7 @@ static void track_procs(int fd, short argc, void *cbdata) * successful launch for short-lived procs */ ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_WAITPID); - if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_COMPLETE) && + if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_IOF_COMPLETE) && !ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) { ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED); } diff --git a/orte/util/attr.h b/orte/util/attr.h index 8f1957c6ab..0f93493a41 100644 --- a/orte/util/attr.h +++ b/orte/util/attr.h @@ -134,7 +134,6 @@ typedef uint16_t orte_proc_flags_t; #define ORTE_PROC_FLAG_AS_MPI 0x0080 // proc is MPI process #define ORTE_PROC_FLAG_IOF_COMPLETE 0x0100 // IOF has completed #define ORTE_PROC_FLAG_WAITPID 0x0200 // waitpid fired -#define ORTE_PROC_FLAG_COMPLETE 0x0300 // both IOF and waitpid have been reported #define ORTE_PROC_FLAG_RECORDED 0x0400 // termination has been recorded