From 891630ae85e539cbe52d46cdd74a82c0315d1e6e Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 14 Nov 2008 15:19:53 +0000 Subject: [PATCH] Handle a race condition between mpirun detecting stdin closed (and releasing the read event), and receiving an xon/xoff notice from a remote orted that detects proc termination and tells mpirun "don't send any more input - the proc is gone". This latter was necessary since we might have hung an infinite source of input on mpirun, while the proc terminated after some point in time. This commit was SVN r19997. --- orte/mca/iof/base/base.h | 1 + orte/mca/iof/hnp/iof_hnp.c | 9 +-------- orte/mca/iof/hnp/iof_hnp_receive.c | 6 ++++-- orte/mca/iof/orted/iof_orted.c | 4 ++++ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/orte/mca/iof/base/base.h b/orte/mca/iof/base/base.h index 4ffbe0e55f..eeca238cc0 100644 --- a/orte/mca/iof/base/base.h +++ b/orte/mca/iof/base/base.h @@ -169,6 +169,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t); OPAL_EV_READ | OPAL_EV_PERSIST, \ (cbfunc), rev); \ if ((actv)) { \ + rev->active = true; \ opal_event_add(&rev->ev, 0); \ } \ } while(0); diff --git a/orte/mca/iof/hnp/iof_hnp.c b/orte/mca/iof/hnp/iof_hnp.c index a6a8a16c72..7f372ea932 100644 --- a/orte/mca/iof/hnp/iof_hnp.c +++ b/orte/mca/iof/hnp/iof_hnp.c @@ -232,14 +232,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag, */ ORTE_IOF_READ_EVENT(&mca_iof_hnp_component.stdinev, dst_name, fd, src_tag, - orte_iof_hnp_read_local_handler, false); - - /* flag that it is operational */ - mca_iof_hnp_component.stdinev->active = true; - /* activate it */ - if (OPAL_SUCCESS != (rc = opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0))) { - ORTE_ERROR_LOG(rc); - } + orte_iof_hnp_read_local_handler, true); } } return ORTE_SUCCESS; diff --git a/orte/mca/iof/hnp/iof_hnp_receive.c b/orte/mca/iof/hnp/iof_hnp_receive.c index e5262e53aa..66bc364482 100644 --- a/orte/mca/iof/hnp/iof_hnp_receive.c +++ b/orte/mca/iof/hnp/iof_hnp_receive.c @@ -62,14 +62,16 @@ static void process_msg(int fd, short event, void *cbdata) if (ORTE_IOF_XON & stream) { /* re-start the stdin read event */ - if (!mca_iof_hnp_component.stdinev->active) { + if (NULL != mca_iof_hnp_component.stdinev && + !mca_iof_hnp_component.stdinev->active) { mca_iof_hnp_component.stdinev->active = true; opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0); } goto CLEAN_RETURN; } else if (ORTE_IOF_XOFF & stream) { /* stop the stdin read event */ - if (!mca_iof_hnp_component.stdinev->active) { + if (NULL != mca_iof_hnp_component.stdinev && + !mca_iof_hnp_component.stdinev->active) { opal_event_del(&(mca_iof_hnp_component.stdinev->ev)); mca_iof_hnp_component.stdinev->active = false; } diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index 728a327886..aea03b674a 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -268,6 +268,10 @@ static void stdin_write_handler(int fd, short event, void *cbdata) opal_event_del(&wev->ev); wev->pending = false; /* tell the HNP to stop sending us stuff */ + if (!mca_iof_orted_component.xoff) { + mca_iof_orted_component.xoff = true; + orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF); + } /* tell ourselves to dump anything that arrives */ goto DEPART; } else if (num_written < output->numbytes) {