From b8ffa302da62a2552e5a1a95db1cbccf2b179f91 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 27 Feb 2009 10:16:25 +0000 Subject: [PATCH] Separate abnormal job termination from abnormal orted termination so we can continue to use xcast for orted cmds, but can know to turn off reading of stdin as the job is being terminated. This commit was SVN r20650. --- orte/mca/errmgr/default/errmgr_default.c | 4 ++-- orte/mca/iof/hnp/iof_hnp_read.c | 6 +++--- orte/mca/iof/hnp/iof_hnp_receive.c | 1 + orte/runtime/orte_globals.c | 1 + orte/runtime/orte_globals.h | 1 + 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/orte/mca/errmgr/default/errmgr_default.c b/orte/mca/errmgr/default/errmgr_default.c index 74767d8524..211226b017 100644 --- a/orte/mca/errmgr/default/errmgr_default.c +++ b/orte/mca/errmgr/default/errmgr_default.c @@ -66,7 +66,7 @@ void orte_errmgr_default_proc_aborted(orte_process_name_t *name, int exit_code) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(name), exit_code)); - orte_abnormal_term_ordered = true; + orte_job_term_ordered = true; /* indicate that all jobs other than the one containing this * proc have been orted to abort - this is necessary to avoid @@ -131,7 +131,7 @@ void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), exit_code)); - orte_abnormal_term_ordered = true; + orte_job_term_ordered = true; /* tell the plm to terminate all jobs */ if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) { diff --git a/orte/mca/iof/hnp/iof_hnp_read.c b/orte/mca/iof/hnp/iof_hnp_read.c index 74163a819f..f0c9431545 100644 --- a/orte/mca/iof/hnp/iof_hnp_read.c +++ b/orte/mca/iof/hnp/iof_hnp_read.c @@ -46,7 +46,7 @@ static void restart_stdin(int fd, short event, void *cbdata) { if (NULL != mca_iof_hnp_component.stdinev && - !orte_abnormal_term_ordered) { + !orte_job_term_ordered) { mca_iof_hnp_component.stdinev->active = true; opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0); } @@ -125,10 +125,10 @@ void orte_iof_hnp_read_local_handler(int fd, short event, void *cbdata) /* is this read from our stdin? */ if (ORTE_IOF_STDIN & rev->tag) { - /* if an abnormal termination has occurred, just ignore the + /* if job termination has been ordered, just ignore the * data and delete the read event */ - if (orte_abnormal_term_ordered) { + if (orte_job_term_ordered) { OBJ_RELEASE(mca_iof_hnp_component.stdinev); OPAL_THREAD_UNLOCK(&mca_iof_hnp_component.lock); return; diff --git a/orte/mca/iof/hnp/iof_hnp_receive.c b/orte/mca/iof/hnp/iof_hnp_receive.c index 6cc7ebac40..d3cebf72dc 100644 --- a/orte/mca/iof/hnp/iof_hnp_receive.c +++ b/orte/mca/iof/hnp/iof_hnp_receive.c @@ -70,6 +70,7 @@ static void process_msg(int fd, short event, void *cbdata) if (ORTE_IOF_XON & stream) { /* re-start the stdin read event */ if (NULL != mca_iof_hnp_component.stdinev && + !orte_job_term_ordered && !mca_iof_hnp_component.stdinev->active) { mca_iof_hnp_component.stdinev->active = true; opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 12b32d19df..b3cc130195 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -81,6 +81,7 @@ orte_trigger_event_t orte_exit, orteds_exit; int orte_exit_status = 0; bool orte_abnormal_term_ordered = false; bool orte_routing_is_enabled = false; +bool orte_job_term_ordered = false; int orte_heartbeat_rate; int orte_startup_timeout; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index c5d61d1857..f70da7cc20 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -460,6 +460,7 @@ ORTE_DECLSPEC extern orte_trigger_event_t orte_exit, orteds_exit; ORTE_DECLSPEC extern int orte_exit_status; ORTE_DECLSPEC extern bool orte_abnormal_term_ordered; ORTE_DECLSPEC extern bool orte_routing_is_enabled; +ORTE_DECLSPEC extern bool orte_job_term_ordered; ORTE_DECLSPEC extern int orte_heartbeat_rate; ORTE_DECLSPEC extern int orte_startup_timeout;