Fix single-node operations so that the HNP correctly exits when the job completes
This commit was SVN r18556.
Этот коммит содержится в:
родитель
b456fb2d42
Коммит
95578b0528
@ -117,9 +117,6 @@ int orte_plm_base_orted_exit(void)
|
||||
}
|
||||
procs = (orte_proc_t**)daemons->procs->addr;
|
||||
|
||||
procs[0]->state = ORTE_PROC_STATE_TERMINATED;
|
||||
daemons->num_terminated++;
|
||||
|
||||
/* pack the command */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&cmd, &command, 1, ORTE_DAEMON_CMD))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -141,6 +138,9 @@ int orte_plm_base_orted_exit(void)
|
||||
"%s plm:base:orted_cmd:orted_exit abnormal term ordered",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* be sure I get the command */
|
||||
ORTE_MESSAGE_EVENT(ORTE_PROC_MY_NAME, &cmd, ORTE_RML_TAG_DAEMON, orte_daemon_cmd_processor);
|
||||
|
||||
/* now send the command one daemon at a time using a non-blocking
|
||||
* send - let the callback function keep track of how many
|
||||
* complete - it will delete the event if they all do.
|
||||
|
@ -68,6 +68,7 @@
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/odls/odls.h"
|
||||
#include "orte/mca/plm/plm.h"
|
||||
#include "orte/mca/plm/base/plm_private.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
@ -557,9 +558,24 @@ static int process_commands(orte_process_name_t* sender,
|
||||
/**** EXIT COMMAND ****/
|
||||
case ORTE_DAEMON_EXIT_CMD:
|
||||
if (orte_process_info.hnp) {
|
||||
/* if we are the HNP, do nothing - we will
|
||||
* exit at our own sweet time
|
||||
orte_job_t *daemons;
|
||||
orte_proc_t **procs;
|
||||
/* if we are the HNP, ensure our local procs are terminated */
|
||||
orte_odls.kill_local_procs(ORTE_JOBID_WILDCARD, false);
|
||||
/* now lookup the daemon job object */
|
||||
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
return ORTE_ERR_NOT_FOUND;
|
||||
}
|
||||
procs = (orte_proc_t**)daemons->procs->addr;
|
||||
/* declare us terminated so things can exit cleanly */
|
||||
procs[0]->state = ORTE_PROC_STATE_TERMINATED;
|
||||
daemons->num_terminated++;
|
||||
/* need to check for job complete as otherwise this doesn't
|
||||
* get triggered in single-daemon systems
|
||||
*/
|
||||
orte_plm_base_check_job_completed(daemons);
|
||||
/* all done! */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
/* eventually, we need to revise this so we only
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user