diff --git a/orte/mca/ess/alps/Makefile.am b/orte/mca/ess/alps/Makefile.am index b2312df2f6..fed1f2df22 100644 --- a/orte/mca/ess/alps/Makefile.am +++ b/orte/mca/ess/alps/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # @@ -50,4 +51,3 @@ libmca_ess_alps_la_SOURCES =$(sources) libmca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS) libmca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS) libmca_ess_alps_la_LIBADD = $(ess_alps_LIBS) - diff --git a/orte/mca/ess/base/ess_base_std_app.c b/orte/mca/ess/base/ess_base_std_app.c index c1e3bd3ab3..7addd036dc 100644 --- a/orte/mca/ess/base/ess_base_std_app.c +++ b/orte/mca/ess/base/ess_base_std_app.c @@ -45,7 +45,6 @@ #include "opal/util/output.h" #include "opal/util/proc.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/routed/base/base.h" @@ -58,17 +57,12 @@ #include "orte/mca/odls/odls_types.h" #include "orte/mca/filem/base/base.h" #include "orte/mca/errmgr/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/base/base.h" -#endif #include "orte/mca/state/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" #include "orte/util/show_help.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -285,44 +279,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) goto error; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Setup the SnapC - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_select"; - goto error; - } - /* apps need the OPAL CR stuff */ - opal_cr_set_enabled(true); -#else - opal_cr_set_enabled(false); -#endif - /* Initalize the CR setup - * Note: Always do this, even in non-FT builds. - * If we don't some user level tools may hang. - */ - if (ORTE_SUCCESS != (ret = orte_cr_init())) { - ORTE_ERROR_LOG(ret); - error = "orte_cr_init"; - goto error; - } /* open the distributed file system */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -344,13 +300,6 @@ int orte_ess_base_app_setup(bool db_restrict_local) int orte_ess_base_app_finalize(void) { - orte_cr_finalize(); - -#if OPAL_ENABLE_FT_CR == 1 - (void) mca_base_framework_close(&orte_snapc_base_framework); - (void) mca_base_framework_close(&orte_sstore_base_framework); -#endif - /* release the conduits */ orte_rml.close_conduit(orte_mgmt_conduit); orte_rml.close_conduit(orte_coll_conduit); @@ -414,8 +363,7 @@ void orte_ess_base_app_abort(int status, bool report) * clean environment. Taken from orte_finalize(): * - Assume errmgr cleans up child processes before we exit. */ - /* CRS cleanup since it may have a named pipe and thread active */ - orte_cr_finalize(); + /* If we were asked to report this termination, do so. * Since singletons don't start an HNP unless necessary, and * direct-launched procs don't have daemons at all, only send diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index b848f78745..7f50533880 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -38,11 +38,11 @@ #include "opal/dss/dss.h" #include "opal/mca/event/event.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/pmix/base/base.h" #include "opal/mca/pstat/base/base.h" #include "opal/util/arch.h" +#include "opal/util/opal_environ.h" #include "opal/util/os_path.h" #include "opal/util/proc.h" @@ -61,10 +61,6 @@ #include "orte/mca/regx/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/base/base.h" -#endif #include "orte/mca/filem/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" @@ -73,7 +69,6 @@ #include "orte/mca/errmgr/base/base.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/state.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_quit.h" @@ -626,46 +621,6 @@ int orte_ess_base_orted_setup(void) goto error; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Setup the SnapC - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(!ORTE_PROC_IS_HNP, ORTE_PROC_IS_DAEMON))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_select"; - goto error; - } - - /* For daemons, ORTE doesn't need the OPAL CR stuff */ - opal_cr_set_enabled(false); -#else - opal_cr_set_enabled(false); -#endif - /* - * Initalize the CR setup - * Note: Always do this, even in non-FT builds. - * If we don't some user level tools may hang. - */ - if (ORTE_SUCCESS != (ret = orte_cr_init())) { - ORTE_ERROR_LOG(ret); - error = "orte_cr_init"; - goto error; - } /* setup the DFS framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -680,7 +635,7 @@ int orte_ess_base_orted_setup(void) return ORTE_SUCCESS; - error: + error: orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", true, error, ORTE_ERROR_NAME(ret), ret); diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index 478de19aa1..f3ca7baa3f 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -37,25 +37,24 @@ #include "opal/mca/event/event.h" #include "opal/mca/pmix/base/base.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "opal/runtime/opal_progress_threads.h" #include "opal/util/arch.h" +#include "opal/util/opal_environ.h" #include "opal/util/argv.h" #include "opal/util/proc.h" +#include "orte/mca/iof/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/errmgr/base/base.h" -#include "orte/mca/iof/base/base.h" #include "orte/mca/state/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" @@ -96,8 +95,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags) int ret; char *error = NULL; opal_list_t transports; - orte_jobid_t jobid; - orte_vpid_t vpid; opal_list_t info; opal_value_t *kv, *knext, val; opal_pmix_query_t *q; @@ -123,65 +120,17 @@ int orte_ess_base_tool_setup(opal_list_t *flags) } if (NULL == opal_pmix.tool_init) { /* we no longer support non-pmix tools */ + orte_show_help("help-ess-base.txt", + "legacy-tool", true); + ret = ORTE_ERR_SILENT; error = "opal_pmix.tool_init"; - ret = ORTE_ERR_NOT_SUPPORTED; goto error; } /* set the event base for the pmix component code */ opal_pmix_base_set_evbase(orte_event_base); - /* we have to define our name here */ - if (NULL != orte_ess_base_jobid && - NULL != orte_ess_base_vpid) { - opal_output_verbose(2, orte_ess_base_framework.framework_output, - "ess:tool:obtaining name from environment"); - if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) { - return(ret); - } - ORTE_PROC_MY_NAME->jobid = jobid; - if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) { - return(ret); - } - ORTE_PROC_MY_NAME->vpid = vpid; - } else { - /* If we are a tool with no name, then define it here */ - uint16_t jobfam; - uint32_t hash32; - uint32_t bias; - - opal_output_verbose(2, orte_ess_base_framework.framework_output, - "ess:tool:computing name"); - /* hash the nodename */ - OPAL_HASH_STR(orte_process_info.nodename, hash32); - bias = (uint32_t)orte_process_info.pid; - /* fold in the bias */ - hash32 = hash32 ^ bias; - - /* now compress to 16-bits */ - jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32)); - - /* set the name */ - ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16); - ORTE_PROC_MY_NAME->vpid = 0; - } - /* my name is set, xfer it to the OPAL layer */ - orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME; - - /* initialize - PMIx may set our name here if we attach to - * a PMIx server */ + /* initialize */ OBJ_CONSTRUCT(&info, opal_list_t); - /* pass our name so the PMIx layer can use it */ - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_TOOL_NSPACE); - orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid); - kv->type = OPAL_STRING; - opal_list_append(&info, &kv->super); - /* ditto for our rank */ - kv = OBJ_NEW(opal_value_t); - kv->key = strdup(OPAL_PMIX_TOOL_RANK); - kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid; - kv->type = OPAL_VPID; - opal_list_append(&info, &kv->super); if (NULL != flags) { /* pass along any directives */ OPAL_LIST_FOREACH_SAFE(kv, knext, flags, opal_value_t) { @@ -196,9 +145,9 @@ int orte_ess_base_tool_setup(opal_list_t *flags) goto error; } OPAL_LIST_DESTRUCT(&info); + /* the PMIx server set our name - record it here */ ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid; ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid; - orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename); orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL; orte_process_info.super.proc_arch = opal_local_arch; @@ -294,7 +243,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags) } /* setup I/O forwarding system - must come after we init routes */ - if (NULL != orte_process_info.my_hnp_uri) { + if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) { /* extract the name */ if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) { orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); @@ -374,13 +323,14 @@ int orte_ess_base_tool_finalize(void) * a very small subset of orte_init - ensure that * I only back those elements out */ - if (NULL != orte_process_info.my_hnp_uri) { + if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) { (void) mca_base_framework_close(&orte_iof_base_framework); } (void) mca_base_framework_close(&orte_routed_base_framework); (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_errmgr_base_framework); + opal_pmix.finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); return ORTE_SUCCESS; diff --git a/orte/mca/ess/base/help-ess-base.txt b/orte/mca/ess/base/help-ess-base.txt index 0d4907b565..89d98ccb39 100644 --- a/orte/mca/ess/base/help-ess-base.txt +++ b/orte/mca/ess/base/help-ess-base.txt @@ -10,7 +10,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. -# Copyright (c) 2017 Intel, Inc. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -89,3 +89,9 @@ when OMPI was not configured --with-alps and we weren't able to discover an ALPS installation in the usual places. Please configure as appropriate and try again. +# +[legacy-tool] +We no longer support non-PMIx-based tools, and require a +minimum level of PMIx v2.0. + +Please update the tool and/or the PMIx version you are using. diff --git a/orte/mca/ess/env/Makefile.am b/orte/mca/ess/env/Makefile.am index 905bf86fe1..76f9801cfc 100644 --- a/orte/mca/ess/env/Makefile.am +++ b/orte/mca/ess/env/Makefile.am @@ -11,6 +11,7 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/orte/mca/ess/env/ess_env_module.c b/orte/mca/ess/env/ess_env_module.c index ae793fd848..df55650e4a 100644 --- a/orte/mca/ess/env/ess_env_module.c +++ b/orte/mca/ess/env/ess_env_module.c @@ -56,9 +56,6 @@ #include "orte/mca/plm/base/base.h" #include "orte/mca/rmaps/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#endif #include "orte/mca/filem/base/base.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" @@ -68,7 +65,6 @@ #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_cr.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" #include "orte/mca/ess/env/ess_env.h" @@ -78,19 +74,11 @@ static int env_set_name(void); static int rte_init(void); static int rte_finalize(void); -#if OPAL_ENABLE_FT_CR == 1 -static int rte_ft_event(int state); -#endif - orte_ess_base_module_t orte_ess_env_module = { rte_init, rte_finalize, orte_ess_base_app_abort, -#if OPAL_ENABLE_FT_CR == 1 - rte_ft_event -#else NULL -#endif }; static int rte_init(void) @@ -175,201 +163,3 @@ static int env_set_name(void) return ORTE_SUCCESS; } - -#if OPAL_ENABLE_FT_CR == 1 -static int rte_ft_event(int state) -{ - int ret, exit_status = ORTE_SUCCESS; - orte_proc_type_t svtype; - - /******** Checkpoint Prep ********/ - if(OPAL_CRS_CHECKPOINT == state) { - /* - * Notify SnapC - */ - if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CHECKPOINT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify Routed - */ - if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CHECKPOINT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify RML -> OOB - */ - if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CHECKPOINT))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - /******** Continue Recovery ********/ - else if (OPAL_CRS_CONTINUE == state ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d) - %s is Continuing", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* - * Notify RML -> OOB - */ - if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CONTINUE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify Routed - */ - if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CONTINUE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify SnapC - */ - if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CONTINUE))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if (opal_cr_continue_like_restart) { - /* - * Barrier to make all processes have been successfully restarted before - * we try to remove some restart only files. - */ - opal_pmix.fence(NULL, 0); - - if( orte_cr_flush_restart_files ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d): %s " - "Cleanup restart files...", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - opal_crs_base_cleanup_flush(); - } - } - } - /******** Restart Recovery ********/ - else if (OPAL_CRS_RESTART == state ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d) - %s is Restarting", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* - * This should follow the ess init() function - */ - - /* - * - Reset Contact information - */ - if( ORTE_SUCCESS != (ret = env_set_name() ) ) { - exit_status = ret; - } - - /* - * Notify RML -> OOB - */ - if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_RESTART))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Restart the routed framework - * JJH: Lie to the finalize function so it does not try to contact the daemon. - */ - svtype = orte_process_info.proc_type; - orte_process_info.proc_type = ORTE_PROC_TOOL; - if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - orte_process_info.proc_type = svtype; - if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Restart the PLM - Does nothing at the moment, but included for completeness - */ - if (ORTE_SUCCESS != (ret = orte_plm.finalize())) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if (ORTE_SUCCESS != (ret = orte_plm.init())) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * RML - Enable communications - */ - if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Notify Routed - */ - if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_RESTART))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - /* - * Barrier to make all processes have been successfully restarted before - * we try to remove some restart only files. - */ - opal_pmix.fence(NULL, 0); - - if( orte_cr_flush_restart_files ) { - OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output, - "ess:env ft_event(%2d): %s " - "Cleanup restart files...", - state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - opal_crs_base_cleanup_flush(); - } - - /* - * Session directory re-init - */ - if (orte_create_session_dirs) { - if (ORTE_SUCCESS != (ret = orte_session_dir(true, - orte_process_info.tmpdir_base, - orte_process_info.nodename, - NULL, /* Batch ID -- Not used */ - ORTE_PROC_MY_NAME))) { - exit_status = ret; - } - - opal_output_set_output_file_info(orte_process_info.proc_session_dir, - "output-", NULL, NULL); - } - - /* - * Notify SnapC - */ - if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_RESTART))) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - else if (OPAL_CRS_TERM == state ) { - /* Nothing */ - } - else { - /* Error state = Nothing */ - } - - return exit_status; -} -#endif diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 11e3eb6457..ec954b9932 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -42,13 +42,13 @@ #include "opal/class/opal_list.h" #include "opal/mca/event/event.h" #include "opal/runtime/opal.h" -#include "opal/runtime/opal_cr.h" #include "opal/util/arch.h" #include "opal/util/argv.h" #include "opal/util/if.h" #include "opal/util/os_path.h" #include "opal/util/output.h" +#include "opal/util/opal_environ.h" #include "opal/util/malloc.h" #include "opal/util/basename.h" #include "opal/util/fd.h" @@ -72,10 +72,6 @@ #include "orte/mca/plm/plm.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/rmaps/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 -#include "orte/mca/snapc/base/base.h" -#include "orte/mca/sstore/base/base.h" -#endif #include "orte/mca/filem/base/base.h" #include "orte/mca/state/base/base.h" #include "orte/mca/state/state.h" @@ -95,7 +91,6 @@ #include "orte/runtime/orte_wait.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_quit.h" -#include "orte/runtime/orte_cr.h" #include "orte/runtime/orte_locks.h" #include "orte/mca/ess/ess.h" @@ -150,6 +145,7 @@ static int rte_init(void) orte_topology_t *t; opal_list_t transports; orte_ess_base_signal_t *sig; + opal_value_t val; /* run the prolog */ if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) { @@ -473,6 +469,22 @@ static int rte_init(void) proc->pid = orte_process_info.pid; orte_oob_base_get_addr(&proc->rml_uri); orte_process_info.my_hnp_uri = strdup(proc->rml_uri); + /* store it in the local PMIx repo for later retrieval */ + OBJ_CONSTRUCT(&val, opal_value_t); + val.key = OPAL_PMIX_PROC_URI; + val.type = OPAL_STRING; + val.data.string = proc->rml_uri; + if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &val))) { + ORTE_ERROR_LOG(ret); + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); + error = "store uri"; + goto error; + } + val.key = NULL; + val.data.string = NULL; + OBJ_DESTRUCT(&val); /* we are also officially a daemon, so better update that field too */ orte_process_info.my_daemon_uri = strdup(proc->rml_uri); proc->state = ORTE_PROC_STATE_RUNNING; @@ -684,46 +696,7 @@ static int rte_init(void) error = "orte_filem_base_select"; goto error; } -#if OPAL_ENABLE_FT_CR == 1 - /* - * Setup the SnapC - */ - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) { - ORTE_ERROR_LOG(ret); - error = "orte_snapc_base_select"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_sstore_base_select"; - goto error; - } - /* For HNP, ORTE doesn't need the OPAL CR stuff */ - opal_cr_set_enabled(false); -#else - opal_cr_set_enabled(false); -#endif - /* - * Initalize the CR setup - * Note: Always do this, even in non-FT builds. - * If we don't some user level tools may hang. - */ - if (ORTE_SUCCESS != (ret = orte_cr_init())) { - ORTE_ERROR_LOG(ret); - error = "orte_cr_init"; - goto error; - } /* setup the dfs framework */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) { ORTE_ERROR_LOG(ret); @@ -773,7 +746,7 @@ static int rte_init(void) opal_progress_set_yield_when_idle(false); return ORTE_SUCCESS; - error: + error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", @@ -898,8 +871,6 @@ static void rte_abort(int status, bool report) * - Assume errmgr cleans up child processes before we exit. */ - /* CRS cleanup since it may have a named pipe and thread active */ - orte_cr_finalize(); /* ensure we scrub the session directory tree */ orte_session_dir_cleanup(ORTE_JOBID_WILDCARD); /* - Clean out the global structures diff --git a/orte/mca/ess/lsf/Makefile.am b/orte/mca/ess/lsf/Makefile.am index dfcf60b8de..98dccbc4b1 100644 --- a/orte/mca/ess/lsf/Makefile.am +++ b/orte/mca/ess/lsf/Makefile.am @@ -11,6 +11,7 @@ # All rights reserved. # Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/orte/mca/ess/pmi/Makefile.am b/orte/mca/ess/pmi/Makefile.am index 4cea859710..3d53270285 100644 --- a/orte/mca/ess/pmi/Makefile.am +++ b/orte/mca/ess/pmi/Makefile.am @@ -2,7 +2,7 @@ # Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2013 Los Alamos National Security, LLC. # All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved +# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. # $COPYRIGHT$ # diff --git a/orte/mca/ess/singleton/Makefile.am b/orte/mca/ess/singleton/Makefile.am index 54c0f92c10..c68d191e30 100644 --- a/orte/mca/ess/singleton/Makefile.am +++ b/orte/mca/ess/singleton/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/orte/mca/ess/slurm/Makefile.am b/orte/mca/ess/slurm/Makefile.am index 6a098bb1cd..36e09eea2b 100644 --- a/orte/mca/ess/slurm/Makefile.am +++ b/orte/mca/ess/slurm/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/orte/mca/ess/tm/Makefile.am b/orte/mca/ess/tm/Makefile.am index 20bcac4914..e0e443210c 100644 --- a/orte/mca/ess/tm/Makefile.am +++ b/orte/mca/ess/tm/Makefile.am @@ -10,6 +10,7 @@ # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/orte/mca/ess/tool/Makefile.am b/orte/mca/ess/tool/Makefile.am index 7226862775..cf3b819a49 100644 --- a/orte/mca/ess/tool/Makefile.am +++ b/orte/mca/ess/tool/Makefile.am @@ -11,6 +11,7 @@ # All rights reserved. # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2017-2018 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow diff --git a/orte/mca/ess/tool/ess_tool_module.c b/orte/mca/ess/tool/ess_tool_module.c index a5a1ae3929..b734643c52 100644 --- a/orte/mca/ess/tool/ess_tool_module.c +++ b/orte/mca/ess/tool/ess_tool_module.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,7 +40,6 @@ #include "orte/mca/plm/plm.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/proc_info.h" -#include "orte/runtime/orte_cr.h" #include "orte/mca/ess/ess.h" #include "orte/mca/ess/base/base.h" @@ -125,7 +124,6 @@ static int rte_init(void) opal_list_append(&flags, &val->super); } - /* do the standard tool init */ if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(&flags))) { ORTE_ERROR_LOG(ret); @@ -137,7 +135,7 @@ static int rte_init(void) return ORTE_SUCCESS; - error: + error: if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) { orte_show_help("help-orte-runtime.txt", "orte_init:startup:internal-failure", @@ -176,9 +174,6 @@ static void rte_abort(int status, bool report) * - Assume errmgr cleans up child processes before we exit. */ - /* CRS cleanup since it may have a named pipe and thread active */ - orte_cr_finalize(); - /* - Clean out the global structures * (not really necessary, but good practice) */