Ensure that all ssh sessions are terminated when abnormally terminating the job
Этот коммит содержится в:
родитель
68614a211b
Коммит
920562d9b4
@ -966,7 +966,7 @@ static void process_launch_list(int fd, short args, void *cbdata)
|
|||||||
} else { /* father */
|
} else { /* father */
|
||||||
/* indicate this daemon has been launched */
|
/* indicate this daemon has been launched */
|
||||||
caddy->daemon->state = ORTE_PROC_STATE_RUNNING;
|
caddy->daemon->state = ORTE_PROC_STATE_RUNNING;
|
||||||
/* record the pid */
|
/* record the pid of the ssh fork */
|
||||||
caddy->daemon->pid = pid;
|
caddy->daemon->pid = pid;
|
||||||
|
|
||||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
|
||||||
@ -1289,16 +1289,33 @@ static int rsh_terminate_orteds(void)
|
|||||||
|
|
||||||
static int rsh_finalize(void)
|
static int rsh_finalize(void)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc, i;
|
||||||
|
orte_job_t *jdata;
|
||||||
|
orte_proc_t *proc;
|
||||||
|
|
||||||
/* remove launch event */
|
/* remove launch event */
|
||||||
opal_event_del(&launch_event);
|
opal_event_del(&launch_event);
|
||||||
OBJ_DESTRUCT(&launch_list);
|
OPAL_LIST_DESTRUCT(&launch_list);
|
||||||
|
|
||||||
/* cleanup any pending recvs */
|
/* cleanup any pending recvs */
|
||||||
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
|
if (ORTE_SUCCESS != (rc = orte_plm_base_comm_stop())) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) && orte_abnormal_term_ordered) {
|
||||||
|
/* ensure that any lingering ssh's are gone */
|
||||||
|
if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
for (i=0; i < jdata->procs->size; i++) {
|
||||||
|
if (NULL == (proc = opal_pointer_array_get_item(jdata->procs, i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (proc->state < ORTE_PROC_STATE_UNTERMINATED && 0 < proc->pid) {
|
||||||
|
kill(proc->pid, SIGKILL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user