Fix a race condition caused by a bad attribute flag that created an OR instead of an AND condition check
This commit was SVN r32587.
Этот коммит содержится в:
родитель
039b7acfb5
Коммит
5a13cdb739
@ -396,7 +396,8 @@ static void proc_errors(int fd, short args, void *cbdata)
|
|||||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
|
orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
|
||||||
}
|
}
|
||||||
/* if the proc has terminated, notify the state machine */
|
/* if the proc has terminated, notify the state machine */
|
||||||
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_COMPLETE) &&
|
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
|
||||||
|
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID) &&
|
||||||
!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_RECORDED)) {
|
!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_RECORDED)) {
|
||||||
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
||||||
}
|
}
|
||||||
@ -493,7 +494,8 @@ static void proc_errors(int fd, short args, void *cbdata)
|
|||||||
orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
|
orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
|
||||||
}
|
}
|
||||||
/* if the proc has terminated, notify the state machine */
|
/* if the proc has terminated, notify the state machine */
|
||||||
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_COMPLETE) &&
|
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
|
||||||
|
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID) &&
|
||||||
!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_RECORDED)) {
|
!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_RECORDED)) {
|
||||||
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
||||||
}
|
}
|
||||||
|
@ -1873,7 +1873,8 @@ int orte_odls_base_default_kill_local_procs(opal_pointer_array_t *procs,
|
|||||||
/* check for everything complete - this will remove
|
/* check for everything complete - this will remove
|
||||||
* the child object from our local list
|
* the child object from our local list
|
||||||
*/
|
*/
|
||||||
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_COMPLETE)) {
|
if (ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_IOF_COMPLETE) &&
|
||||||
|
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_WAITPID)) {
|
||||||
ORTE_ACTIVATE_PROC_STATE(&child->name, child->state);
|
ORTE_ACTIVATE_PROC_STATE(&child->name, child->state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -282,7 +282,7 @@ static void track_procs(int fd, short argc, void *cbdata)
|
|||||||
if (NULL != orte_iof.close) {
|
if (NULL != orte_iof.close) {
|
||||||
orte_iof.close(proc, ORTE_IOF_STDIN);
|
orte_iof.close(proc, ORTE_IOF_STDIN);
|
||||||
}
|
}
|
||||||
if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_COMPLETE) &&
|
if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_WAITPID) &&
|
||||||
!ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) {
|
!ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) {
|
||||||
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
||||||
}
|
}
|
||||||
@ -292,7 +292,7 @@ static void track_procs(int fd, short argc, void *cbdata)
|
|||||||
* successful launch for short-lived procs
|
* successful launch for short-lived procs
|
||||||
*/
|
*/
|
||||||
ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_WAITPID);
|
ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_WAITPID);
|
||||||
if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_COMPLETE) &&
|
if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_IOF_COMPLETE) &&
|
||||||
!ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) {
|
!ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) {
|
||||||
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
|
||||||
}
|
}
|
||||||
|
@ -134,7 +134,6 @@ typedef uint16_t orte_proc_flags_t;
|
|||||||
#define ORTE_PROC_FLAG_AS_MPI 0x0080 // proc is MPI process
|
#define ORTE_PROC_FLAG_AS_MPI 0x0080 // proc is MPI process
|
||||||
#define ORTE_PROC_FLAG_IOF_COMPLETE 0x0100 // IOF has completed
|
#define ORTE_PROC_FLAG_IOF_COMPLETE 0x0100 // IOF has completed
|
||||||
#define ORTE_PROC_FLAG_WAITPID 0x0200 // waitpid fired
|
#define ORTE_PROC_FLAG_WAITPID 0x0200 // waitpid fired
|
||||||
#define ORTE_PROC_FLAG_COMPLETE 0x0300 // both IOF and waitpid have been reported
|
|
||||||
#define ORTE_PROC_FLAG_RECORDED 0x0400 // termination has been recorded
|
#define ORTE_PROC_FLAG_RECORDED 0x0400 // termination has been recorded
|
||||||
|
|
||||||
|
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user