1
1

Handle a race condition between mpirun detecting stdin closed (and releasing the read event), and receiving an xon/xoff notice from a remote orted that detects proc termination and tells mpirun "don't send any more input - the proc is gone". This latter was necessary since we might have hung an infinite source of input on mpirun, while the proc terminated after some point in time.

This commit was SVN r19997.
Этот коммит содержится в:
Ralph Castain 2008-11-14 15:19:53 +00:00
родитель 101b6fdeb8
Коммит 891630ae85
4 изменённых файлов: 10 добавлений и 10 удалений

Просмотреть файл

@ -169,6 +169,7 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_iof_write_output_t);
OPAL_EV_READ | OPAL_EV_PERSIST, \ OPAL_EV_READ | OPAL_EV_PERSIST, \
(cbfunc), rev); \ (cbfunc), rev); \
if ((actv)) { \ if ((actv)) { \
rev->active = true; \
opal_event_add(&rev->ev, 0); \ opal_event_add(&rev->ev, 0); \
} \ } \
} while(0); } while(0);

Просмотреть файл

@ -232,14 +232,7 @@ static int hnp_push(const orte_process_name_t* dst_name, orte_iof_tag_t src_tag,
*/ */
ORTE_IOF_READ_EVENT(&mca_iof_hnp_component.stdinev, ORTE_IOF_READ_EVENT(&mca_iof_hnp_component.stdinev,
dst_name, fd, src_tag, dst_name, fd, src_tag,
orte_iof_hnp_read_local_handler, false); orte_iof_hnp_read_local_handler, true);
/* flag that it is operational */
mca_iof_hnp_component.stdinev->active = true;
/* activate it */
if (OPAL_SUCCESS != (rc = opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0))) {
ORTE_ERROR_LOG(rc);
}
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;

Просмотреть файл

@ -62,14 +62,16 @@ static void process_msg(int fd, short event, void *cbdata)
if (ORTE_IOF_XON & stream) { if (ORTE_IOF_XON & stream) {
/* re-start the stdin read event */ /* re-start the stdin read event */
if (!mca_iof_hnp_component.stdinev->active) { if (NULL != mca_iof_hnp_component.stdinev &&
!mca_iof_hnp_component.stdinev->active) {
mca_iof_hnp_component.stdinev->active = true; mca_iof_hnp_component.stdinev->active = true;
opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0); opal_event_add(&(mca_iof_hnp_component.stdinev->ev), 0);
} }
goto CLEAN_RETURN; goto CLEAN_RETURN;
} else if (ORTE_IOF_XOFF & stream) { } else if (ORTE_IOF_XOFF & stream) {
/* stop the stdin read event */ /* stop the stdin read event */
if (!mca_iof_hnp_component.stdinev->active) { if (NULL != mca_iof_hnp_component.stdinev &&
!mca_iof_hnp_component.stdinev->active) {
opal_event_del(&(mca_iof_hnp_component.stdinev->ev)); opal_event_del(&(mca_iof_hnp_component.stdinev->ev));
mca_iof_hnp_component.stdinev->active = false; mca_iof_hnp_component.stdinev->active = false;
} }

Просмотреть файл

@ -268,6 +268,10 @@ static void stdin_write_handler(int fd, short event, void *cbdata)
opal_event_del(&wev->ev); opal_event_del(&wev->ev);
wev->pending = false; wev->pending = false;
/* tell the HNP to stop sending us stuff */ /* tell the HNP to stop sending us stuff */
if (!mca_iof_orted_component.xoff) {
mca_iof_orted_component.xoff = true;
orte_iof_orted_send_xonxoff(ORTE_IOF_XOFF);
}
/* tell ourselves to dump anything that arrives */ /* tell ourselves to dump anything that arrives */
goto DEPART; goto DEPART;
} else if (num_written < output->numbytes) { } else if (num_written < output->numbytes) {