1
1

Shifting to an event-driven RTE raises some interesting issues during shutdown. We want the last messages to get thru, but also need to correctly shutdown the virtual machine. This requires a delicate balancing act across event priorities, and the need to check for termination conditions in places where related events get processed.

Change the priority of comm_failure and job_termination events to ensure we process final messages prior to terminating. Check for termination conditions when processing proc termination events as we may order proc termination when the daemon gets an exit command, but we can't see the proc actually terminate until we get out of that message event.

Jeff: probably easiest to review this by testing. I tested it under both Slurm and rsh on v1.7.5 as well as trunk

cmr=v1.7.5:reviewer=jsquyres:subject=resolve event priorities during VM shutdown

This commit was SVN r31042.
Этот коммит содержится в:
Ralph Castain 2014-03-12 16:49:58 +00:00
родитель 51916c5b41
Коммит f56f37d364
8 изменённых файлов: 83 добавлений и 8 удалений

Просмотреть файл

@ -109,6 +109,11 @@ static int init(void)
/* setup state machine to trap job errors */ /* setup state machine to trap job errors */
orte_state.add_job_state(ORTE_JOB_STATE_ERROR, job_errors, ORTE_ERROR_PRI); orte_state.add_job_state(ORTE_JOB_STATE_ERROR, job_errors, ORTE_ERROR_PRI);
/* set the lost connection state to run at MSG priority so
* we can process any last messages from the proc
*/
orte_state.add_proc_state(ORTE_PROC_STATE_COMM_FAILED, proc_errors, ORTE_MSG_PRI);
/* setup state machine to trap proc errors */ /* setup state machine to trap proc errors */
orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI);

Просмотреть файл

@ -112,6 +112,11 @@ static int init(void)
/* setup state machine to trap job errors */ /* setup state machine to trap job errors */
orte_state.add_job_state(ORTE_JOB_STATE_ERROR, job_errors, ORTE_ERROR_PRI); orte_state.add_job_state(ORTE_JOB_STATE_ERROR, job_errors, ORTE_ERROR_PRI);
/* set the lost connection state to run at MSG priority so
* we can process any last messages from the proc
*/
orte_state.add_proc_state(ORTE_PROC_STATE_COMM_FAILED, proc_errors, ORTE_MSG_PRI);
/* setup state machine to trap proc errors */ /* setup state machine to trap proc errors */
orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI); orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI);
@ -290,7 +295,7 @@ static void proc_errors(int fd, short args, void *cbdata)
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
"%s errmgr:default:orted all routes gone - exiting", "%s errmgr:default:orted all routes gone - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ORTE_FORCED_TERMINATE(0); ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
} else { } else {
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
"%s errmgr:default:orted not exiting, num_routes() == %d", "%s errmgr:default:orted not exiting, num_routes() == %d",

Просмотреть файл

@ -921,7 +921,7 @@ static void clean_abort(int fd, short flags, void *arg)
(which is a Bad Thing), so we can't call it directly. (which is a Bad Thing), so we can't call it directly.
Instead, we have to exit this handler and setup to call Instead, we have to exit this handler and setup to call
job_completed() after this. */ job_completed() after this. */
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); orte_plm.terminate_orteds();;
} }
static struct timeval current, last={0,0}; static struct timeval current, last={0,0};

Просмотреть файл

@ -152,7 +152,7 @@ static int init(void)
} }
/* add the termination response */ /* add the termination response */
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED, if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED,
orte_quit, ORTE_MSG_PRI))) { orte_quit, ORTE_SYS_PRI))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
/* add a default error response */ /* add a default error response */

Просмотреть файл

@ -23,6 +23,7 @@
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/iof/iof.h" #include "orte/mca/iof/iof.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/routed/routed.h"
#include "orte/util/session_dir.h" #include "orte/util/session_dir.h"
#include "orte/runtime/orte_quit.h" #include "orte/runtime/orte_quit.h"
@ -108,7 +109,7 @@ static int init(void)
} }
/* add a state for when we are ordered to terminate */ /* add a state for when we are ordered to terminate */
if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED, if (ORTE_SUCCESS != (rc = orte_state.add_job_state(ORTE_JOB_STATE_DAEMONS_TERMINATED,
orte_quit, ORTE_ERROR_PRI))) { orte_quit, ORTE_SYS_PRI))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) { if (5 < opal_output_get_verbosity(orte_state_base_framework.framework_output)) {
@ -326,8 +327,28 @@ static void track_procs(int fd, short argc, void *cbdata)
orte_rml_send_callback, NULL))) { orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
/* if we are trying to terminate and our routes are
* gone, then terminate ourselves IF no local procs
* remain (might be some from another job)
*/
if (orte_orteds_term_ordered &&
0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
pptr->alive) {
/* at least one is still alive */
goto moveon;
}
}
/* call our appropriate exit procedure */
OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
"%s orted:state: all routes and children gone - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
}
} }
} }
moveon:
/* Release the stdin IOF file descriptor for this child, if one /* Release the stdin IOF file descriptor for this child, if one
* was defined. File descriptors for the other IOF channels - stdout, * was defined. File descriptors for the other IOF channels - stdout,
* stderr, and stddiag - were released when their associated pipes * stderr, and stddiag - were released when their associated pipes
@ -377,6 +398,25 @@ static void track_procs(int fd, short argc, void *cbdata)
orte_rml_send_callback, NULL))) { orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
/* if we are trying to terminate and our routes are
* gone, then terminate ourselves IF no local procs
* remain (might be some from another job)
*/
if (orte_orteds_term_ordered &&
0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
pptr->alive) {
/* at least one is still alive */
goto moveon;
}
}
/* call our appropriate exit procedure */
OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
"%s orted:state: all routes and children gone - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
}
} }
} }
} }

Просмотреть файл

@ -14,6 +14,7 @@
* reserved. * reserved.
* Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -85,6 +86,11 @@ static char *get_orted_comm_cmd_str(int command);
static opal_pointer_array_t *procs_prev_ordered_to_terminate = NULL; static opal_pointer_array_t *procs_prev_ordered_to_terminate = NULL;
static void suicide(int sd, short args, void *cbdata)
{
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
}
void orte_daemon_recv(int status, orte_process_name_t* sender, void orte_daemon_recv(int status, orte_process_name_t* sender,
opal_buffer_t *buffer, orte_rml_tag_t tag, opal_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata) void* cbdata)
@ -434,6 +440,8 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
} }
/* kill the local procs */ /* kill the local procs */
orte_odls.kill_local_procs(NULL); orte_odls.kill_local_procs(NULL);
/* flag that orteds were ordered to terminate */
orte_orteds_term_ordered = true;
/* if all my routes and local children are gone, then terminate ourselves */ /* if all my routes and local children are gone, then terminate ourselves */
if (0 == orte_routed.num_routes()) { if (0 == orte_routed.num_routes()) {
for (i=0; i < orte_local_children->size; i++) { for (i=0; i < orte_local_children->size; i++) {
@ -449,6 +457,11 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
} }
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED); ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
} else {
/* set a timer to suicide, just in case one of our
* dependent routes fails to terminate
*/
ORTE_TIMER_EVENT(10, 0, suicide, ORTE_ERROR_PRI);
} }
return; return;
break; break;

Просмотреть файл

@ -38,8 +38,12 @@ int main(int argc, char* argv[])
i++; i++;
pi = i / 3.14159256; pi = i / 3.14159256;
if (i == 9995) { if (i == 9995) {
orte_finalize(); if (ORTE_PROC_MY_NAME->vpid == 1) {
exit(ORTE_PROC_MY_NAME->vpid); orte_finalize();
exit(77);
} else {
i=0;
}
} }
} }

Просмотреть файл

@ -17,14 +17,22 @@
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
int i;
float pi;
if (ORTE_SUCCESS != orte_init(&argc, &argv, ORTE_PROC_NON_MPI)) { if (ORTE_SUCCESS != orte_init(&argc, &argv, ORTE_PROC_NON_MPI)) {
fprintf(stderr, "ORTE_INIT FAILED\n"); fprintf(stderr, "ORTE_INIT FAILED\n");
exit(1); exit(1);
} }
opal_output(0, "%s RUNNING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_output(0, "%s RUNNING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
while (orte_event_base_active) { i = 0;
opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE); while (1) {
i++;
pi = i / 3.14159256;
if (i == 9995) {
i=0;
}
} }
orte_finalize(); orte_finalize();