1
1

Merge pull request #2764 from rhc54/topic/dvm

If a tool sees the HNP it is attached to die (thereby losing connecti…
Этот коммит содержится в:
Ralph Castain 2017-01-19 15:39:30 -08:00 коммит произвёл GitHub
родитель ca50b31de1 19bb64cfb8
Коммит bb132f6d03
2 изменённых файлов: 16 добавлений и 5 удалений

Просмотреть файл

@ -9,7 +9,7 @@
* reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -103,8 +103,14 @@ static void proc_errors(int fd, short args, void *cbdata)
return;
}
/* all errors require abort */
orte_errmgr_base_abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
/* if we lost our lifeline, then just stop the event loop
* so the main program can cleanly terminate */
if (ORTE_PROC_STATE_LIFELINE_LOST == caddy->proc_state) {
orte_event_base_active = false;
} else {
/* all other errors require abort */
orte_errmgr_base_abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
}
OBJ_RELEASE(caddy);
}

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2007-2016 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -170,9 +170,14 @@ int orterun(int argc, char *argv[])
ORTE_UPDATE_EXIT_STATUS(1);
goto DONE;
}
while (1) {
while (orte_event_base_active) {
opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
}
/* we are terminated when the DVM master shuts down, thereby
* closing our connection to them. This looks like an error,
* but is not - so correct our exit status here */
orte_exit_status = 0;
goto DONE;
} else {
/* spawn the job and its daemons */
memset(&launchst, 0, sizeof(launchst));