1
1
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2018-01-25 08:43:44 -08:00
родитель 9fb80bd239
Коммит d1071397ac
15 изменённых файлов: 50 добавлений и 429 удалений

Просмотреть файл

@ -10,6 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
@ -50,4 +51,3 @@ libmca_ess_alps_la_SOURCES =$(sources)
libmca_ess_alps_la_CPPFLAGS = $(ess_alps_CPPFLAGS)
libmca_ess_alps_la_LDFLAGS = -module -avoid-version $(ess_alps_LDFLAGS)
libmca_ess_alps_la_LIBADD = $(ess_alps_LIBS)

Просмотреть файл

@ -45,7 +45,6 @@
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_cr.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/routed/base/base.h"
@ -58,17 +57,12 @@
#include "orte/mca/odls/odls_types.h"
#include "orte/mca/filem/base/base.h"
#include "orte/mca/errmgr/base/base.h"
#if OPAL_ENABLE_FT_CR == 1
#include "orte/mca/snapc/base/base.h"
#include "orte/mca/sstore/base/base.h"
#endif
#include "orte/mca/state/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_cr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
@ -285,44 +279,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
goto error;
}
#if OPAL_ENABLE_FT_CR == 1
/*
* Setup the SnapC
*/
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_snapc_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_sstore_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) {
ORTE_ERROR_LOG(ret);
error = "orte_snapc_base_select";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_sstore_base_select";
goto error;
}
/* apps need the OPAL CR stuff */
opal_cr_set_enabled(true);
#else
opal_cr_set_enabled(false);
#endif
/* Initalize the CR setup
* Note: Always do this, even in non-FT builds.
* If we don't some user level tools may hang.
*/
if (ORTE_SUCCESS != (ret = orte_cr_init())) {
ORTE_ERROR_LOG(ret);
error = "orte_cr_init";
goto error;
}
/* open the distributed file system */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
@ -344,13 +300,6 @@ int orte_ess_base_app_setup(bool db_restrict_local)
int orte_ess_base_app_finalize(void)
{
orte_cr_finalize();
#if OPAL_ENABLE_FT_CR == 1
(void) mca_base_framework_close(&orte_snapc_base_framework);
(void) mca_base_framework_close(&orte_sstore_base_framework);
#endif
/* release the conduits */
orte_rml.close_conduit(orte_mgmt_conduit);
orte_rml.close_conduit(orte_coll_conduit);
@ -414,8 +363,7 @@ void orte_ess_base_app_abort(int status, bool report)
* clean environment. Taken from orte_finalize():
* - Assume errmgr cleans up child processes before we exit.
*/
/* CRS cleanup since it may have a named pipe and thread active */
orte_cr_finalize();
/* If we were asked to report this termination, do so.
* Since singletons don't start an HNP unless necessary, and
* direct-launched procs don't have daemons at all, only send

Просмотреть файл

@ -38,11 +38,11 @@
#include "opal/dss/dss.h"
#include "opal/mca/event/event.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_cr.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/mca/pstat/base/base.h"
#include "opal/util/arch.h"
#include "opal/util/opal_environ.h"
#include "opal/util/os_path.h"
#include "opal/util/proc.h"
@ -61,10 +61,6 @@
#include "orte/mca/regx/base/base.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/base.h"
#if OPAL_ENABLE_FT_CR == 1
#include "orte/mca/snapc/base/base.h"
#include "orte/mca/sstore/base/base.h"
#endif
#include "orte/mca/filem/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
@ -73,7 +69,6 @@
#include "orte/mca/errmgr/base/base.h"
#include "orte/mca/state/base/base.h"
#include "orte/mca/state/state.h"
#include "orte/runtime/orte_cr.h"
#include "orte/runtime/orte_wait.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_quit.h"
@ -626,46 +621,6 @@ int orte_ess_base_orted_setup(void)
goto error;
}
#if OPAL_ENABLE_FT_CR == 1
/*
* Setup the SnapC
*/
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_snapc_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_sstore_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(!ORTE_PROC_IS_HNP, ORTE_PROC_IS_DAEMON))) {
ORTE_ERROR_LOG(ret);
error = "orte_snapc_base_select";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_sstore_base_select";
goto error;
}
/* For daemons, ORTE doesn't need the OPAL CR stuff */
opal_cr_set_enabled(false);
#else
opal_cr_set_enabled(false);
#endif
/*
* Initalize the CR setup
* Note: Always do this, even in non-FT builds.
* If we don't some user level tools may hang.
*/
if (ORTE_SUCCESS != (ret = orte_cr_init())) {
ORTE_ERROR_LOG(ret);
error = "orte_cr_init";
goto error;
}
/* setup the DFS framework */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
@ -680,7 +635,7 @@ int orte_ess_base_orted_setup(void)
return ORTE_SUCCESS;
error:
error:
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);

Просмотреть файл

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
*
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
@ -37,25 +37,24 @@
#include "opal/mca/event/event.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_cr.h"
#include "opal/runtime/opal_progress_threads.h"
#include "opal/util/arch.h"
#include "opal/util/opal_environ.h"
#include "opal/util/argv.h"
#include "opal/util/proc.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/plm/base/base.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/base/base.h"
#include "orte/mca/errmgr/base/base.h"
#include "orte/mca/iof/base/base.h"
#include "orte/mca/state/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
#include "orte/util/show_help.h"
#include "orte/runtime/orte_cr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
@ -96,8 +95,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
int ret;
char *error = NULL;
opal_list_t transports;
orte_jobid_t jobid;
orte_vpid_t vpid;
opal_list_t info;
opal_value_t *kv, *knext, val;
opal_pmix_query_t *q;
@ -123,65 +120,17 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
}
if (NULL == opal_pmix.tool_init) {
/* we no longer support non-pmix tools */
orte_show_help("help-ess-base.txt",
"legacy-tool", true);
ret = ORTE_ERR_SILENT;
error = "opal_pmix.tool_init";
ret = ORTE_ERR_NOT_SUPPORTED;
goto error;
}
/* set the event base for the pmix component code */
opal_pmix_base_set_evbase(orte_event_base);
/* we have to define our name here */
if (NULL != orte_ess_base_jobid &&
NULL != orte_ess_base_vpid) {
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:obtaining name from environment");
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_jobid(&jobid, orte_ess_base_jobid))) {
return(ret);
}
ORTE_PROC_MY_NAME->jobid = jobid;
if (ORTE_SUCCESS != (ret = orte_util_convert_string_to_vpid(&vpid, orte_ess_base_vpid))) {
return(ret);
}
ORTE_PROC_MY_NAME->vpid = vpid;
} else {
/* If we are a tool with no name, then define it here */
uint16_t jobfam;
uint32_t hash32;
uint32_t bias;
opal_output_verbose(2, orte_ess_base_framework.framework_output,
"ess:tool:computing name");
/* hash the nodename */
OPAL_HASH_STR(orte_process_info.nodename, hash32);
bias = (uint32_t)orte_process_info.pid;
/* fold in the bias */
hash32 = hash32 ^ bias;
/* now compress to 16-bits */
jobfam = (uint16_t)(((0x0000ffff & (0xffff0000 & hash32) >> 16)) ^ (0x0000ffff & hash32));
/* set the name */
ORTE_PROC_MY_NAME->jobid = 0xffff0000 & ((uint32_t)jobfam << 16);
ORTE_PROC_MY_NAME->vpid = 0;
}
/* my name is set, xfer it to the OPAL layer */
orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
/* initialize - PMIx may set our name here if we attach to
* a PMIx server */
/* initialize */
OBJ_CONSTRUCT(&info, opal_list_t);
/* pass our name so the PMIx layer can use it */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_TOOL_NSPACE);
orte_util_convert_jobid_to_string(&kv->data.string, ORTE_PROC_MY_NAME->jobid);
kv->type = OPAL_STRING;
opal_list_append(&info, &kv->super);
/* ditto for our rank */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_TOOL_RANK);
kv->data.name.vpid = ORTE_PROC_MY_NAME->vpid;
kv->type = OPAL_VPID;
opal_list_append(&info, &kv->super);
if (NULL != flags) {
/* pass along any directives */
OPAL_LIST_FOREACH_SAFE(kv, knext, flags, opal_value_t) {
@ -196,9 +145,9 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
goto error;
}
OPAL_LIST_DESTRUCT(&info);
/* the PMIx server set our name - record it here */
ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
orte_process_info.super.proc_arch = opal_local_arch;
@ -294,7 +243,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
}
/* setup I/O forwarding system - must come after we init routes */
if (NULL != orte_process_info.my_hnp_uri) {
if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) {
/* extract the name */
if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) {
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
@ -374,13 +323,14 @@ int orte_ess_base_tool_finalize(void)
* a very small subset of orte_init - ensure that
* I only back those elements out
*/
if (NULL != orte_process_info.my_hnp_uri) {
if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) {
(void) mca_base_framework_close(&orte_iof_base_framework);
}
(void) mca_base_framework_close(&orte_routed_base_framework);
(void) mca_base_framework_close(&orte_rml_base_framework);
(void) mca_base_framework_close(&orte_errmgr_base_framework);
opal_pmix.finalize();
(void) mca_base_framework_close(&opal_pmix_base_framework);
return ORTE_SUCCESS;

Просмотреть файл

@ -10,7 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2017 Intel, Inc. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -89,3 +89,9 @@ when OMPI was not configured --with-alps and we weren't able
to discover an ALPS installation in the usual places.
Please configure as appropriate and try again.
#
[legacy-tool]
We no longer support non-PMIx-based tools, and require a
minimum level of PMIx v2.0.
Please update the tool and/or the PMIx version you are using.

1
orte/mca/ess/env/Makefile.am поставляемый
Просмотреть файл

@ -11,6 +11,7 @@
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow

210
orte/mca/ess/env/ess_env_module.c поставляемый
Просмотреть файл

@ -56,9 +56,6 @@
#include "orte/mca/plm/base/base.h"
#include "orte/mca/rmaps/base/base.h"
#if OPAL_ENABLE_FT_CR == 1
#include "orte/mca/snapc/base/base.h"
#endif
#include "orte/mca/filem/base/base.h"
#include "orte/util/proc_info.h"
#include "orte/util/session_dir.h"
@ -68,7 +65,6 @@
#include "orte/runtime/orte_wait.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_cr.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/ess/env/ess_env.h"
@ -78,19 +74,11 @@ static int env_set_name(void);
static int rte_init(void);
static int rte_finalize(void);
#if OPAL_ENABLE_FT_CR == 1
static int rte_ft_event(int state);
#endif
orte_ess_base_module_t orte_ess_env_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
#if OPAL_ENABLE_FT_CR == 1
rte_ft_event
#else
NULL
#endif
};
static int rte_init(void)
@ -175,201 +163,3 @@ static int env_set_name(void)
return ORTE_SUCCESS;
}
#if OPAL_ENABLE_FT_CR == 1
static int rte_ft_event(int state)
{
int ret, exit_status = ORTE_SUCCESS;
orte_proc_type_t svtype;
/******** Checkpoint Prep ********/
if(OPAL_CRS_CHECKPOINT == state) {
/*
* Notify SnapC
*/
if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CHECKPOINT))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Notify Routed
*/
if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CHECKPOINT))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Notify RML -> OOB
*/
if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CHECKPOINT))) {
ORTE_ERROR_LOG(ret);
return ret;
}
}
/******** Continue Recovery ********/
else if (OPAL_CRS_CONTINUE == state ) {
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"ess:env ft_event(%2d) - %s is Continuing",
state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/*
* Notify RML -> OOB
*/
if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_CONTINUE))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Notify Routed
*/
if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_CONTINUE))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Notify SnapC
*/
if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_CONTINUE))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (opal_cr_continue_like_restart) {
/*
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
opal_pmix.fence(NULL, 0);
if( orte_cr_flush_restart_files ) {
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"ess:env ft_event(%2d): %s "
"Cleanup restart files...",
state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
opal_crs_base_cleanup_flush();
}
}
}
/******** Restart Recovery ********/
else if (OPAL_CRS_RESTART == state ) {
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"ess:env ft_event(%2d) - %s is Restarting",
state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/*
* This should follow the ess init() function
*/
/*
* - Reset Contact information
*/
if( ORTE_SUCCESS != (ret = env_set_name() ) ) {
exit_status = ret;
}
/*
* Notify RML -> OOB
*/
if( ORTE_SUCCESS != (ret = orte_rml.ft_event(OPAL_CRS_RESTART))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Restart the routed framework
* JJH: Lie to the finalize function so it does not try to contact the daemon.
*/
svtype = orte_process_info.proc_type;
orte_process_info.proc_type = ORTE_PROC_TOOL;
if (ORTE_SUCCESS != (ret = orte_routed.finalize()) ) {
ORTE_ERROR_LOG(ret);
return ret;
}
orte_process_info.proc_type = svtype;
if (ORTE_SUCCESS != (ret = orte_routed.initialize()) ) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Restart the PLM - Does nothing at the moment, but included for completeness
*/
if (ORTE_SUCCESS != (ret = orte_plm.finalize())) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (ORTE_SUCCESS != (ret = orte_plm.init())) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* RML - Enable communications
*/
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Notify Routed
*/
if( ORTE_SUCCESS != (ret = orte_routed.ft_event(OPAL_CRS_RESTART))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/*
* Barrier to make all processes have been successfully restarted before
* we try to remove some restart only files.
*/
opal_pmix.fence(NULL, 0);
if( orte_cr_flush_restart_files ) {
OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
"ess:env ft_event(%2d): %s "
"Cleanup restart files...",
state, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
opal_crs_base_cleanup_flush();
}
/*
* Session directory re-init
*/
if (orte_create_session_dirs) {
if (ORTE_SUCCESS != (ret = orte_session_dir(true,
orte_process_info.tmpdir_base,
orte_process_info.nodename,
NULL, /* Batch ID -- Not used */
ORTE_PROC_MY_NAME))) {
exit_status = ret;
}
opal_output_set_output_file_info(orte_process_info.proc_session_dir,
"output-", NULL, NULL);
}
/*
* Notify SnapC
*/
if( ORTE_SUCCESS != (ret = orte_snapc.ft_event(OPAL_CRS_RESTART))) {
ORTE_ERROR_LOG(ret);
return ret;
}
}
else if (OPAL_CRS_TERM == state ) {
/* Nothing */
}
else {
/* Error state = Nothing */
}
return exit_status;
}
#endif

Просмотреть файл

@ -42,13 +42,13 @@
#include "opal/class/opal_list.h"
#include "opal/mca/event/event.h"
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_cr.h"
#include "opal/util/arch.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/os_path.h"
#include "opal/util/output.h"
#include "opal/util/opal_environ.h"
#include "opal/util/malloc.h"
#include "opal/util/basename.h"
#include "opal/util/fd.h"
@ -72,10 +72,6 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/odls/base/base.h"
#include "orte/mca/rmaps/base/base.h"
#if OPAL_ENABLE_FT_CR == 1
#include "orte/mca/snapc/base/base.h"
#include "orte/mca/sstore/base/base.h"
#endif
#include "orte/mca/filem/base/base.h"
#include "orte/mca/state/base/base.h"
#include "orte/mca/state/state.h"
@ -95,7 +91,6 @@
#include "orte/runtime/orte_wait.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_quit.h"
#include "orte/runtime/orte_cr.h"
#include "orte/runtime/orte_locks.h"
#include "orte/mca/ess/ess.h"
@ -150,6 +145,7 @@ static int rte_init(void)
orte_topology_t *t;
opal_list_t transports;
orte_ess_base_signal_t *sig;
opal_value_t val;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
@ -473,6 +469,22 @@ static int rte_init(void)
proc->pid = orte_process_info.pid;
orte_oob_base_get_addr(&proc->rml_uri);
orte_process_info.my_hnp_uri = strdup(proc->rml_uri);
/* store it in the local PMIx repo for later retrieval */
OBJ_CONSTRUCT(&val, opal_value_t);
val.key = OPAL_PMIX_PROC_URI;
val.type = OPAL_STRING;
val.data.string = proc->rml_uri;
if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_NAME, &val))) {
ORTE_ERROR_LOG(ret);
val.key = NULL;
val.data.string = NULL;
OBJ_DESTRUCT(&val);
error = "store uri";
goto error;
}
val.key = NULL;
val.data.string = NULL;
OBJ_DESTRUCT(&val);
/* we are also officially a daemon, so better update that field too */
orte_process_info.my_daemon_uri = strdup(proc->rml_uri);
proc->state = ORTE_PROC_STATE_RUNNING;
@ -684,46 +696,7 @@ static int rte_init(void)
error = "orte_filem_base_select";
goto error;
}
#if OPAL_ENABLE_FT_CR == 1
/*
* Setup the SnapC
*/
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_snapc_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
error = "orte_sstore_base_open";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) {
ORTE_ERROR_LOG(ret);
error = "orte_snapc_base_select";
goto error;
}
if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
ORTE_ERROR_LOG(ret);
error = "orte_sstore_base_select";
goto error;
}
/* For HNP, ORTE doesn't need the OPAL CR stuff */
opal_cr_set_enabled(false);
#else
opal_cr_set_enabled(false);
#endif
/*
* Initalize the CR setup
* Note: Always do this, even in non-FT builds.
* If we don't some user level tools may hang.
*/
if (ORTE_SUCCESS != (ret = orte_cr_init())) {
ORTE_ERROR_LOG(ret);
error = "orte_cr_init";
goto error;
}
/* setup the dfs framework */
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
ORTE_ERROR_LOG(ret);
@ -773,7 +746,7 @@ static int rte_init(void)
opal_progress_set_yield_when_idle(false);
return ORTE_SUCCESS;
error:
error:
if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
@ -898,8 +871,6 @@ static void rte_abort(int status, bool report)
* - Assume errmgr cleans up child processes before we exit.
*/
/* CRS cleanup since it may have a named pipe and thread active */
orte_cr_finalize();
/* ensure we scrub the session directory tree */
orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
/* - Clean out the global structures

Просмотреть файл

@ -11,6 +11,7 @@
# All rights reserved.
# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow

Просмотреть файл

@ -2,7 +2,7 @@
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2014 Intel, Inc. All rights reserved
# Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#

Просмотреть файл

@ -10,6 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow

Просмотреть файл

@ -10,6 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow

Просмотреть файл

@ -10,6 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow

Просмотреть файл

@ -11,6 +11,7 @@
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -40,7 +40,6 @@
#include "orte/mca/plm/plm.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/proc_info.h"
#include "orte/runtime/orte_cr.h"
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
@ -125,7 +124,6 @@ static int rte_init(void)
opal_list_append(&flags, &val->super);
}
/* do the standard tool init */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(&flags))) {
ORTE_ERROR_LOG(ret);
@ -137,7 +135,7 @@ static int rte_init(void)
return ORTE_SUCCESS;
error:
error:
if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
@ -176,9 +174,6 @@ static void rte_abort(int status, bool report)
* - Assume errmgr cleans up child processes before we exit.
*/
/* CRS cleanup since it may have a named pipe and thread active */
orte_cr_finalize();
/* - Clean out the global structures
* (not really necessary, but good practice)
*/