Cleanup the communications handshake so that orte-submit properly terminates upon job completion, and properly sends the terminate command to orte-dvm
Этот коммит содержится в:
родитель
54a4eaa516
Коммит
7299cc3ab9
@ -77,8 +77,7 @@ int orte_ess_base_tool_setup(void)
|
||||
opal_proc_local_set(&orte_process_info.super);
|
||||
|
||||
if (NULL != orte_process_info.my_hnp_uri) {
|
||||
/* if we were given an HNP, then we were launched
|
||||
* by mpirun in some fashion - in this case, we want
|
||||
/* if we were given an HNP, then we want
|
||||
* to look like an application as well as being a tool.
|
||||
* Need to do this before opening the routed framework
|
||||
* so it will do the right things.
|
||||
@ -87,6 +86,7 @@ int orte_ess_base_tool_setup(void)
|
||||
/* get a separate orte event base */
|
||||
orte_event_base = opal_start_progress_thread("orte", true);
|
||||
progress_thread_running = true;
|
||||
orte_event_base_active = true;
|
||||
}
|
||||
|
||||
/* open and setup the state machine */
|
||||
|
@ -109,6 +109,7 @@ static void local_recv(int status, orte_process_name_t* sender,
|
||||
static char **global_mca_env = NULL;
|
||||
static orte_std_cntr_t total_num_apps = 0;
|
||||
static bool want_prefix_by_default = (bool) ORTE_WANT_ORTERUN_PREFIX_BY_DEFAULT;
|
||||
static volatile bool mywait = true;
|
||||
|
||||
/*
|
||||
* Globals
|
||||
@ -485,7 +486,7 @@ int main(int argc, char *argv[])
|
||||
opal_buffer_t *buf;
|
||||
orte_daemon_cmd_flag_t cmd = ORTE_DAEMON_HALT_VM_CMD;
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.pack(buf, &cmd, 1, ORTE_DAEMON_CMD_T);
|
||||
opal_dss.pack(buf, &cmd, 1, ORTE_DAEMON_CMD);
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL);
|
||||
@ -563,12 +564,10 @@ int main(int argc, char *argv[])
|
||||
rc = orte_plm.spawn(jdata);
|
||||
|
||||
waiting:
|
||||
/* loop the event lib until an exit event is detected */
|
||||
while (orte_event_base_active) {
|
||||
opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
|
||||
}
|
||||
ORTE_WAIT_FOR_COMPLETION(mywait);
|
||||
|
||||
DONE:
|
||||
opal_output(0, "FINALIZING");
|
||||
/* cleanup and leave */
|
||||
orte_finalize();
|
||||
|
||||
@ -1445,7 +1444,7 @@ void orte_timeout_wakeup(int sd, short args, void *cbdata)
|
||||
orte_show_help("help-orterun.txt", "orterun:timeout",
|
||||
true, (NULL == tm) ? "NULL" : tm);
|
||||
ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
|
||||
orte_event_base_active = false;
|
||||
exit(orte_exit_status);
|
||||
}
|
||||
|
||||
static void local_recv(int status, orte_process_name_t* sender,
|
||||
@ -1454,7 +1453,7 @@ static void local_recv(int status, orte_process_name_t* sender,
|
||||
{
|
||||
int rc, ret;
|
||||
int32_t cnt;
|
||||
|
||||
|
||||
/* unpack the completion status of the job */
|
||||
cnt = 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) {
|
||||
@ -1463,6 +1462,6 @@ static void local_recv(int status, orte_process_name_t* sender,
|
||||
/* update our exit status to match */
|
||||
ORTE_UPDATE_EXIT_STATUS(ret);
|
||||
|
||||
/* eject us from the event loop - we are done */
|
||||
exit(orte_exit_status);
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user