From 348bf8e13f3c5b5b2d7169cde939b3e395dc4cd1 Mon Sep 17 00:00:00 2001 From: Aurelien Bouteiller Date: Wed, 24 Oct 2018 02:03:24 -0400 Subject: [PATCH] Prevent errmgr invokation from crashing in finalize Signed-off-by: Aurelien Bouteiller --- orte/mca/errmgr/default_app/errmgr_default_app.c | 9 +++++---- orte/mca/ess/base/ess_base_std_orted.c | 7 +++++-- orte/mca/ess/hnp/ess_hnp_module.c | 9 ++++++--- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/orte/mca/errmgr/default_app/errmgr_default_app.c b/orte/mca/errmgr/default_app/errmgr_default_app.c index 065f2de28e..2c7605c310 100644 --- a/orte/mca/errmgr/default_app/errmgr_default_app.c +++ b/orte/mca/errmgr/default_app/errmgr_default_app.c @@ -4,7 +4,7 @@ * * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. * - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. @@ -101,14 +101,14 @@ static void notify_cbfunc(int status, state = ORTE_PROC_STATE_TERMINATED; } + /* push it into our event base */ + ORTE_ACTIVATE_PROC_STATE((orte_process_name_t*)source, state); + /* let the caller know we processed this, but allow the * chain to continue */ if (NULL != cbfunc) { cbfunc(ORTE_SUCCESS, NULL, NULL, NULL, cbdata); } - - /* push it into our event base */ - ORTE_ACTIVATE_PROC_STATE((orte_process_name_t*)source, state); } /************************ @@ -143,6 +143,7 @@ static int finalize(void) { if (SIZE_MAX != myerrhandle) { opal_pmix.deregister_evhandler(myerrhandle, NULL, NULL); + myerrhandle = SIZE_MAX; } return ORTE_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 9711bd64fd..164099ade4 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University + * Copyright (c) 2004-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -669,7 +669,9 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_filem_base_framework); (void) mca_base_framework_close(&orte_grpcomm_base_framework); (void) mca_base_framework_close(&orte_iof_base_framework); - (void) mca_base_framework_close(&orte_errmgr_base_framework); + /* first stage shutdown of the errmgr, deregister the handler but keep + * the required facilities until the rml and oob are offline */ + orte_errmgr.finalize(); (void) mca_base_framework_close(&orte_plm_base_framework); /* make sure our local procs are dead */ orte_odls.kill_local_procs(NULL); @@ -678,6 +680,7 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_routed_base_framework); (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); + (void) mca_base_framework_close(&orte_errmgr_base_framework); (void) mca_base_framework_close(&orte_state_base_framework); /* remove our use of the session directory tree */ orte_session_dir_finalize(ORTE_PROC_MY_NAME); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index e8e811645e..e5b844d475 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University + * Copyright (c) 2004-2018 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -799,8 +799,9 @@ static int rte_finalize(void) (void) mca_base_framework_close(&orte_grpcomm_base_framework); (void) mca_base_framework_close(&orte_routed_base_framework); (void) mca_base_framework_close(&orte_plm_base_framework); - (void) mca_base_framework_close(&orte_errmgr_base_framework); - (void) mca_base_framework_close(&orte_state_base_framework); + /* first stage shutdown of the errmgr, deregister the handler but keep + * the required facilities until the rml and oob are offline */ + orte_errmgr.finalize(); /* cleanup the pstat stuff */ (void) mca_base_framework_close(&opal_pstat_base_framework); @@ -816,6 +817,8 @@ static int rte_finalize(void) /* shutdown the messaging frameworks */ (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); + (void) mca_base_framework_close(&orte_errmgr_base_framework); + (void) mca_base_framework_close(&orte_state_base_framework); /* remove our use of the session directory tree */ orte_session_dir_finalize(ORTE_PROC_MY_NAME);