Add an MCA param "hnp_on_smgmt_node" that mpirun can use to tell the orteds to ignore its topology signature as mpirun is executing on a system mgmt node, and hence a different topology than the compute nodes
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
568b58af75
Коммит
e9bc2934be
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2009 Institut National de Recherche en Informatique
|
||||
* et Automatique. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 IBM Corporation. All rights reserved.
|
||||
@ -1341,6 +1341,12 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv,
|
||||
opal_argv_append(argc, argv, "1");
|
||||
}
|
||||
|
||||
if (orte_hnp_on_smgmt_node) {
|
||||
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
|
||||
opal_argv_append(argc, argv, "orte_hnp_on_smgmt_node");
|
||||
opal_argv_append(argc, argv, "1");
|
||||
}
|
||||
|
||||
if (orte_map_stddiag_to_stderr) {
|
||||
opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID);
|
||||
opal_argv_append(argc, argv, "orte_map_stddiag_to_stderr");
|
||||
|
@ -16,7 +16,7 @@
|
||||
* Copyright (c) 2009 Institut National de Recherche en Informatique
|
||||
* et Automatique. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -129,6 +129,7 @@ static struct {
|
||||
bool tree_spawn;
|
||||
char *hnp_topo_sig;
|
||||
bool test_suicide;
|
||||
bool hnp_on_smgmt_node;
|
||||
} orted_globals;
|
||||
|
||||
/*
|
||||
@ -216,6 +217,10 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = {
|
||||
&orted_globals.hnp_topo_sig, OPAL_CMD_LINE_TYPE_STRING,
|
||||
"Topology signature of HNP" },
|
||||
|
||||
{ "orte_hnp_on_smgmt_node", '\0', NULL, "hnp-on-smgmt-node", 0,
|
||||
&orted_globals.hnp_on_smgmt_node, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Mpirun is executing on a system mgmt node whose topology is different from the compute nodes [Default = false]" },
|
||||
|
||||
/* End of list */
|
||||
{ NULL, '\0', NULL, NULL, 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_NULL, NULL }
|
||||
@ -767,7 +772,7 @@ int orte_daemon(int argc, char *argv[])
|
||||
/* add the local topology, if different from the HNP's or user directed us to,
|
||||
* but always if we are the first daemon to ensure we get a compute node */
|
||||
if (1 == ORTE_PROC_MY_NAME->vpid || orte_hetero_nodes ||
|
||||
0 != strcmp(orte_topo_signature, orted_globals.hnp_topo_sig)) {
|
||||
(!orted_globals.hnp_on_smgmt_node && 0 != strcmp(orte_topo_signature, orted_globals.hnp_topo_sig))) {
|
||||
tflag = 1;
|
||||
if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &tflag, 1, OPAL_UINT8))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
|
@ -92,6 +92,7 @@ int orted_debug_failure = -1;
|
||||
int orted_debug_failure_delay = -1;
|
||||
bool orte_hetero_apps = false;
|
||||
bool orte_hetero_nodes = false;
|
||||
bool orte_hnp_on_smgmt_node = false;
|
||||
bool orte_never_launched = false;
|
||||
bool orte_devel_level_output = false;
|
||||
bool orte_display_topo_with_map = false;
|
||||
|
@ -475,6 +475,7 @@ ORTE_DECLSPEC extern int orted_debug_failure_delay;
|
||||
/* homegeneity flags */
|
||||
ORTE_DECLSPEC extern bool orte_hetero_apps;
|
||||
ORTE_DECLSPEC extern bool orte_hetero_nodes;
|
||||
ORTE_DECLSPEC extern bool orte_hnp_on_smgmt_node;
|
||||
|
||||
ORTE_DECLSPEC extern bool orte_never_launched;
|
||||
ORTE_DECLSPEC extern bool orte_devel_level_output;
|
||||
|
@ -767,5 +767,13 @@ int orte_register_params(void)
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_mgmt_transport);
|
||||
|
||||
orte_hnp_on_smgmt_node = false;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "hnp_on_smgmt_node",
|
||||
"Mpirun is executing on a system mgmt node whose topology is different from the compute nodes [Default = false]",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_hnp_on_smgmt_node);
|
||||
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user