1
1

Enable singletons to run without any active OOB module until they attempt to comm_spawn

Этот коммит содержится в:
Ralph Castain 2015-04-10 14:06:42 -07:00
родитель e4f6f83b9d
Коммит 3e44d3c9e3
5 изменённых файлов: 46 добавлений и 19 удалений

Просмотреть файл

@ -1176,9 +1176,9 @@ static int open_port(char *port_name, orte_rml_tag_t given_tag)
*/
if ((orte_process_info.proc_type & ORTE_PROC_SINGLETON) &&
!orte_routing_is_enabled) {
if (ORTE_SUCCESS != orte_plm_base_fork_hnp()) {
ORTE_ERROR_LOG(ORTE_ERR_FATAL);
return ORTE_ERR_FATAL;
if (ORTE_SUCCESS != (rc = orte_plm_base_fork_hnp())) {
ORTE_ERROR_LOG(rc);
return OMPI_ERROR;
}
orte_routing_is_enabled = true;
/* need to init_routes again to redirect messages

Просмотреть файл

@ -239,21 +239,23 @@ static int rte_init(void)
}
OBJ_DESTRUCT(&kvn);
/* construct the RTE string */
/* construct the RTE string, if we have one */
param = orte_rml.get_contact_info();
/* push it out for others to use */
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_URI);
kvn.type = OPAL_STRING;
kvn.data.string = strdup(param);
free(param);
if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
ORTE_ERROR_LOG(rc);
if (NULL != param) {
/* push it out for others to use */
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_URI);
kvn.type = OPAL_STRING;
kvn.data.string = strdup(param);
free(param);
if (ORTE_SUCCESS != (rc = opal_pmix.put(PMIX_GLOBAL, &kvn))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kvn);
return rc;
}
OBJ_DESTRUCT(&kvn);
return rc;
}
OBJ_DESTRUCT(&kvn);
/* push our local rank */
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_LOCALRANK);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -115,8 +115,10 @@ int orte_oob_base_select(void)
}
}
if (0 == opal_list_get_size(&orte_oob_base.actives)) {
/* no support available means we really cannot run */
if (0 == opal_list_get_size(&orte_oob_base.actives) &&
!orte_standalone_operation) {
/* no support available means we really cannot run unless
* we are a singleton */
opal_output_verbose(5, orte_oob_base_framework.framework_output,
"mca:oob:select: Init failed to return any available transports");
orte_show_help("help-oob-base.txt", "no-interfaces-avail", true);

Просмотреть файл

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2015 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -155,3 +156,9 @@ you should use --tune instead. The --tune option allows one to specify mca
parameters as well as environment variables from within a file using the same
command line syntax (e.g. -mca var val -mca var "val" -x var=val -x var).
The --am option will be removed in a future release.
#
[no-oob]
A call was made to launch additional processes, but this process has
no active out-of-band transports and therefore cannot execute this call.
Please check to see if you have the "oob" MCA parameter set and ensure
that it is either unset or at least includes the tcp transport.

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -35,10 +35,12 @@
#include "orte/util/name_fns.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/oob/base/base.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/rml/rml_types.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/routed/routed.h"
#include "orte/mca/state/state.h"
#include "orte/orted/pmix/pmix_server.h"
#include "orte/runtime/orte_globals.h"
#include "orte/runtime/orte_wait.h"
@ -109,6 +111,13 @@ int orte_plm_proxy_spawn(orte_job_t *jdata)
"%s plm:base:proxy spawn child job",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* if we don't have any active OOB modules, then abort */
if (0 == opal_list_get_size(&orte_oob_base.actives)) {
orte_show_help("help-plm-base.txt", "no-oob", true);
ORTE_FORCED_TERMINATE(ORTE_ERR_SILENT);
goto CLEANUP;
}
/* if we are a singleton and the supporting HNP hasn't
* been spawned, then do so now
*/
@ -229,6 +238,13 @@ int orte_plm_base_fork_hnp(void)
int rc;
orte_jobid_t jobid;
/* if we don't have any active OOB modules, then abort */
if (0 == opal_list_get_size(&orte_oob_base.actives)) {
orte_show_help("help-plm-base.txt", "no-oob", true);
ORTE_FORCED_TERMINATE(ORTE_ERR_SILENT);
return ORTE_ERR_SILENT;
}
/* A pipe is used to communicate between the parent and child to
indicate whether the exec ultimately succeeded or failed. The
child sets the pipe to be close-on-exec; the child only ever