Enable singletons to run without any active OOB module until they attempt to comm_spawn
Этот коммит содержится в:
родитель
e4f6f83b9d
Коммит
3e44d3c9e3
@ -1176,9 +1176,9 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
|
||||
*/
|
||||
if ((orte_process_info.proc_type & ORTE_PROC_SINGLETON) &&
|
||||
!orte_routing_is_enabled) {
|
||||
if (ORTE_SUCCESS != orte_plm_base_fork_hnp()) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
|
||||
return ORTE_ERR_FATAL;
|
||||
if (ORTE_SUCCESS != (rc = orte_plm_base_fork_hnp())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
orte_routing_is_enabled = true;
|
||||
/* need to init_routes again to redirect messages
|
||||
|
@ -239,21 +239,23 @@ static int rte_init(void)
|
||||
}
|
||||
OBJ_DESTRUCT(&kvn);
|
||||
|
||||
/* construct the RTE string */
|
||||
/* construct the RTE string, if we have one */
|
||||
param = orte_rml.get_contact_info();
|
||||
/* push it out for others to use */
|
||||
OBJ_CONSTRUCT(&kvn, opal_value_t);
|
||||
kvn.key = strdup(OPAL_DSTORE_URI);
|
||||
kvn.type = OPAL_STRING;
|
||||
kvn.data.string = strdup(param);
|
||||
free(param);
|
||||
if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
if (NULL != param) {
|
||||
/* push it out for others to use */
|
||||
OBJ_CONSTRUCT(&kvn, opal_value_t);
|
||||
kvn.key = strdup(OPAL_DSTORE_URI);
|
||||
kvn.type = OPAL_STRING;
|
||||
kvn.data.string = strdup(param);
|
||||
free(param);
|
||||
if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&kvn);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&kvn);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&kvn);
|
||||
|
||||
|
||||
/* push our local rank */
|
||||
OBJ_CONSTRUCT(&kvn, opal_value_t);
|
||||
kvn.key = strdup(OPAL_DSTORE_LOCALRANK);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -115,8 +115,10 @@ int orte_oob_base_select(void)
|
||||
}
|
||||
}
|
||||
|
||||
if (0 == opal_list_get_size(&orte_oob_base.actives)) {
|
||||
/* no support available means we really cannot run */
|
||||
if (0 == opal_list_get_size(&orte_oob_base.actives) &&
|
||||
!orte_standalone_operation) {
|
||||
/* no support available means we really cannot run unless
|
||||
* we are a singleton */
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"mca:oob:select: Init failed to return any available transports");
|
||||
orte_show_help("help-oob-base.txt", "no-interfaces-avail", true);
|
||||
|
@ -10,6 +10,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -155,3 +156,9 @@ you should use --tune instead. The --tune option allows one to specify mca
|
||||
parameters as well as environment variables from within a file using the same
|
||||
command line syntax (e.g. -mca var val -mca var "val" -x var=val -x var).
|
||||
The --am option will be removed in a future release.
|
||||
#
|
||||
[no-oob]
|
||||
A call was made to launch additional processes, but this process has
|
||||
no active out-of-band transports and therefore cannot execute this call.
|
||||
Please check to see if you have the "oob" MCA parameter set and ensure
|
||||
that it is either unset or at least includes the tcp transport.
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -35,10 +35,12 @@
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/orted/pmix/pmix_server.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
@ -109,6 +111,13 @@ int orte_plm_proxy_spawn(orte_job_t *jdata)
|
||||
"%s plm:base:proxy spawn child job",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* if we don't have any active OOB modules, then abort */
|
||||
if (0 == opal_list_get_size(&orte_oob_base.actives)) {
|
||||
orte_show_help("help-plm-base.txt", "no-oob", true);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERR_SILENT);
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
/* if we are a singleton and the supporting HNP hasn't
|
||||
* been spawned, then do so now
|
||||
*/
|
||||
@ -229,6 +238,13 @@ int orte_plm_base_fork_hnp(void)
|
||||
int rc;
|
||||
orte_jobid_t jobid;
|
||||
|
||||
/* if we don't have any active OOB modules, then abort */
|
||||
if (0 == opal_list_get_size(&orte_oob_base.actives)) {
|
||||
orte_show_help("help-plm-base.txt", "no-oob", true);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERR_SILENT);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
|
||||
/* A pipe is used to communicate between the parent and child to
|
||||
indicate whether the exec ultimately succeeded or failed. The
|
||||
child sets the pipe to be close-on-exec; the child only ever
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user