Fix a bug that Tim highlighted in which orted coredumps when an orterun is
CTRL-C'd. We were calling orte_finalize recursively which caused a segv when it tried to use a freed framework (orte_rmgr in this case). I added a status flag to orte_universe_info to indicate where we are in the code. This was needed to determine if we should call orte_abort or not when shutting down in the tcp oob. This commit was SVN r7160.
Этот коммит содержится в:
родитель
0f797fd40b
Коммит
78da530fd2
@ -37,6 +37,7 @@
|
||||
|
||||
#include "class/orte_proc_table.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "util/univ_info.h"
|
||||
|
||||
#include "mca/gpr/gpr.h"
|
||||
#include "mca/ns/ns.h"
|
||||
@ -469,7 +470,10 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t* peer)
|
||||
|
||||
/* if we lose the connection to the seed - abort */
|
||||
if(memcmp(&peer->peer_name,&mca_oob_name_seed,sizeof(mca_oob_name_seed)) == 0) {
|
||||
orte_errmgr.abort();
|
||||
/* If we are not already inside orte_finalize, then call abort */
|
||||
if (ORTE_UNIVERSE_STATE_FINALIZE > orte_universe_info.state) {
|
||||
orte_errmgr.abort();
|
||||
}
|
||||
}
|
||||
|
||||
mca_oob_tcp_peer_shutdown(peer);
|
||||
|
@ -32,6 +32,10 @@
|
||||
*/
|
||||
int orte_finalize(void)
|
||||
{
|
||||
|
||||
/* We have now entered the finalization stage */
|
||||
orte_universe_info.state = ORTE_UNIVERSE_STATE_FINALIZE;
|
||||
|
||||
/* finalize the orte system */
|
||||
orte_system_finalize();
|
||||
|
||||
|
@ -47,6 +47,9 @@ int orte_init(bool infrastructure)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/* Since we are now finished with init, change the state to running */
|
||||
orte_universe_info.state = ORTE_UNIVERSE_STATE_RUNNING;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -41,7 +41,7 @@
|
||||
#include "orte/util/univ_info.h"
|
||||
|
||||
orte_universe_t orte_universe_info = {
|
||||
/* .init = */ false,
|
||||
/* .state = */ ORTE_UNIVERSE_STATE_PRE_INIT,
|
||||
/* .name = */ NULL,
|
||||
/* .host = */ NULL,
|
||||
/* .uid = */ NULL,
|
||||
@ -59,7 +59,7 @@ int orte_univ_info(void)
|
||||
int id, tmp;
|
||||
char *tmpname=NULL, *tptr, *ptr;
|
||||
|
||||
if (!orte_universe_info.init) {
|
||||
if (ORTE_UNIVERSE_STATE_PRE_INIT == orte_universe_info.state) {
|
||||
id = mca_base_param_register_string("universe", NULL, NULL, NULL, NULL);
|
||||
mca_base_param_lookup_string(id, &tmpname);
|
||||
|
||||
@ -119,7 +119,7 @@ int orte_univ_info(void)
|
||||
id = mca_base_param_register_string("universe", "script", NULL, NULL, orte_universe_info.scriptfile);
|
||||
mca_base_param_lookup_string(id, &(orte_universe_info.scriptfile));
|
||||
|
||||
orte_universe_info.init = true;
|
||||
orte_universe_info.state = ORTE_UNIVERSE_STATE_INIT;
|
||||
}
|
||||
|
||||
return(ORTE_SUCCESS);
|
||||
@ -158,7 +158,7 @@ int orte_univ_info_finalize(void)
|
||||
orte_universe_info.scriptfile = NULL;
|
||||
}
|
||||
|
||||
orte_universe_info.init = false;
|
||||
orte_universe_info.state = ORTE_UNIVERSE_STATE_PRE_INIT;
|
||||
orte_universe_info.persistence = false;
|
||||
orte_universe_info.console = false;
|
||||
orte_universe_info.console_connected = false;
|
||||
|
@ -37,12 +37,21 @@
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Some states for the orte_universe_t.state field */
|
||||
enum orte_universe_state_t {
|
||||
ORTE_UNIVERSE_STATE_PRE_INIT, /* Before initialization */
|
||||
ORTE_UNIVERSE_STATE_INIT, /* In initalization */
|
||||
ORTE_UNIVERSE_STATE_RUNNING, /* After initalization */
|
||||
ORTE_UNIVERSE_STATE_FINALIZE /* In Finalization */
|
||||
};
|
||||
typedef enum orte_universe_state_t orte_universe_state_t;
|
||||
|
||||
/* Define the info structure underlying the Open MPI universe system
|
||||
* instanced in ompi_rte_init.c */
|
||||
|
||||
* instanced in ompi_rte_init.c */
|
||||
|
||||
struct orte_universe_t {
|
||||
bool init;
|
||||
orte_universe_state_t state; /**< Indicates state of the universe */
|
||||
char *name;
|
||||
char *host;
|
||||
char *uid;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user