Handle a race condition between mpirun detecting stdin closed (and releasing the read event), and receiving an xon/xoff notice from a remote orted that detects proc termination and tells mpirun "don't send any more input - the proc is gone". This latter was necessary since we might have hung an infinite source of input on mpirun, while the proc terminated after some point in time.
This commit was SVN r19997.
Этот коммит содержится в:
родитель
101b6fdeb8
Коммит
891630ae85
@ -169,6 +169,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t);
|
||||
OPAL_EV_READ | OPAL_EV_PERSIST, \
|
||||
(cbfunc), rev); \
|
||||
if ((actv)) { \
|
||||
rev->active = true; \
|
||||
opal_event_add(&rev->ev, 0); \
|
||||
} \
|
||||
} while(0);
|
||||
|
@ -232,14 +232,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
|
||||
*/
|
||||
ORTE_IOF_READ_EVENT(&mca_iof_hnp_component.stdinev,
|
||||
dst_name, fd, src_tag,
|
||||
orte_iof_hnp_read_local_handler, false);
|
||||
|
||||
/* flag that it is operational */
|
||||
mca_iof_hnp_component.stdinev->active = true;
|
||||
/* activate it */
|
||||
if (OPAL_SUCCESS != (rc = opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
orte_iof_hnp_read_local_handler, true);
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -62,14 +62,16 @@ static void process_msg(int fd, short event, void *cbdata)
|
||||
|
||||
if (ORTE_IOF_XON & stream) {
|
||||
/* re-start the stdin read event */
|
||||
if (!mca_iof_hnp_component.stdinev->active) {
|
||||
if (NULL != mca_iof_hnp_component.stdinev &&
|
||||
!mca_iof_hnp_component.stdinev->active) {
|
||||
mca_iof_hnp_component.stdinev->active = true;
|
||||
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
|
||||
}
|
||||
goto CLEAN_RETURN;
|
||||
} else if (ORTE_IOF_XOFF & stream) {
|
||||
/* stop the stdin read event */
|
||||
if (!mca_iof_hnp_component.stdinev->active) {
|
||||
if (NULL != mca_iof_hnp_component.stdinev &&
|
||||
!mca_iof_hnp_component.stdinev->active) {
|
||||
opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
|
||||
mca_iof_hnp_component.stdinev->active = false;
|
||||
}
|
||||
|
@ -268,6 +268,10 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
||||
opal_event_del(&wev->ev);
|
||||
wev->pending = false;
|
||||
/* tell the HNP to stop sending us stuff */
|
||||
if (!mca_iof_orted_component.xoff) {
|
||||
mca_iof_orted_component.xoff = true;
|
||||
orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF);
|
||||
}
|
||||
/* tell ourselves to dump anything that arrives */
|
||||
goto DEPART;
|
||||
} else if (num_written < output->numbytes) {
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user