Handle a race condition between mpirun detecting stdin closed (and releasing the read event), and receiving an xon/xoff notice from a remote orted that detects proc termination and tells mpirun "don't send any more input - the proc is gone". This latter was necessary since we might have hung an infinite source of input on mpirun, while the proc terminated after some point in time.
This commit was SVN r19997.
Этот коммит содержится в:
родитель
101b6fdeb8
Коммит
891630ae85
@ -169,6 +169,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t);
|
|||||||
OPAL_EV_READ | OPAL_EV_PERSIST, \
|
OPAL_EV_READ | OPAL_EV_PERSIST, \
|
||||||
(cbfunc), rev); \
|
(cbfunc), rev); \
|
||||||
if ((actv)) { \
|
if ((actv)) { \
|
||||||
|
rev->active = true; \
|
||||||
opal_event_add(&rev->ev, 0); \
|
opal_event_add(&rev->ev, 0); \
|
||||||
} \
|
} \
|
||||||
} while(0);
|
} while(0);
|
||||||
|
@ -232,14 +232,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
|
|||||||
*/
|
*/
|
||||||
ORTE_IOF_READ_EVENT(&mca_iof_hnp_component.stdinev,
|
ORTE_IOF_READ_EVENT(&mca_iof_hnp_component.stdinev,
|
||||||
dst_name, fd, src_tag,
|
dst_name, fd, src_tag,
|
||||||
orte_iof_hnp_read_local_handler, false);
|
orte_iof_hnp_read_local_handler, true);
|
||||||
|
|
||||||
/* flag that it is operational */
|
|
||||||
mca_iof_hnp_component.stdinev->active = true;
|
|
||||||
/* activate it */
|
|
||||||
if (OPAL_SUCCESS != (rc = opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -62,14 +62,16 @@ static void process_msg(int fd, short event, void *cbdata)
|
|||||||
|
|
||||||
if (ORTE_IOF_XON & stream) {
|
if (ORTE_IOF_XON & stream) {
|
||||||
/* re-start the stdin read event */
|
/* re-start the stdin read event */
|
||||||
if (!mca_iof_hnp_component.stdinev->active) {
|
if (NULL != mca_iof_hnp_component.stdinev &&
|
||||||
|
!mca_iof_hnp_component.stdinev->active) {
|
||||||
mca_iof_hnp_component.stdinev->active = true;
|
mca_iof_hnp_component.stdinev->active = true;
|
||||||
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
|
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
|
||||||
}
|
}
|
||||||
goto CLEAN_RETURN;
|
goto CLEAN_RETURN;
|
||||||
} else if (ORTE_IOF_XOFF & stream) {
|
} else if (ORTE_IOF_XOFF & stream) {
|
||||||
/* stop the stdin read event */
|
/* stop the stdin read event */
|
||||||
if (!mca_iof_hnp_component.stdinev->active) {
|
if (NULL != mca_iof_hnp_component.stdinev &&
|
||||||
|
!mca_iof_hnp_component.stdinev->active) {
|
||||||
opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
|
opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
|
||||||
mca_iof_hnp_component.stdinev->active = false;
|
mca_iof_hnp_component.stdinev->active = false;
|
||||||
}
|
}
|
||||||
|
@ -268,6 +268,10 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
|
|||||||
opal_event_del(&wev->ev);
|
opal_event_del(&wev->ev);
|
||||||
wev->pending = false;
|
wev->pending = false;
|
||||||
/* tell the HNP to stop sending us stuff */
|
/* tell the HNP to stop sending us stuff */
|
||||||
|
if (!mca_iof_orted_component.xoff) {
|
||||||
|
mca_iof_orted_component.xoff = true;
|
||||||
|
orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF);
|
||||||
|
}
|
||||||
/* tell ourselves to dump anything that arrives */
|
/* tell ourselves to dump anything that arrives */
|
||||||
goto DEPART;
|
goto DEPART;
|
||||||
} else if (num_written < output->numbytes) {
|
} else if (num_written < output->numbytes) {
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user