1
1

Merge pull request #3999 from hppritcha/topic/slurmd_controls_them_all

SLURM: launch all processes via slurmd
Этот коммит содержится в:
Howard Pritchard 2017-08-03 15:33:44 -06:00 коммит произвёл GitHub
родитель c27beea3a1 d08be74573
Коммит 897c62756b
4 изменённых файлов: 47 добавлений и 25 удалений

5
NEWS
Просмотреть файл

@ -65,6 +65,11 @@ Master (not on release branches yet)
via --enable-mpi-cxx.
- Removed embedded VampirTrace. It is in maintenance mode since 2013.
Please consider Score-P (score-p.org) as an external replacement.
- Add a mca parameter ras_base_launch_orted_on_hn to allow for launching
MPI processes on the same node where mpirun is executing using a separate
orte daemon, rather than the mpirun process. This may be useful to set to
true when using SLURM, as it improves interoperability with SLURM's signal
propagation tools. By default it is set to false, except for Cray XC systems.
3.0.0 -- July, 2017
-------------------

Просмотреть файл

@ -51,6 +51,7 @@ typedef struct orte_ras_base_t {
orte_ras_base_module_t *active_module;
int total_slots_alloc;
int multiplier;
bool launch_orted_on_hn;
} orte_ras_base_t;
ORTE_DECLSPEC extern orte_ras_base_t orte_ras_base;

Просмотреть файл

@ -59,6 +59,31 @@ static int ras_register(mca_base_register_flag_t flags)
NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.multiplier);
#if SLURM_CRAY_ENV
/*
* If we are in a Cray-SLURM environment, then we cannot
* launch procs local to the HNP. The problem
* is the MPI processes launched on the head node (where the
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
* (mpirun) which is not a child of a slurmd daemon. This
* means that any RDMA credentials obtained via the odls/alps
* local launcher are incorrect. Test for this condition. If
* found, then take steps to ensure we launch a daemon on
* the same node as mpirun and that it gets used to fork
* local procs instead of mpirun so they get the proper
* credential */
orte_ras_base.launch_orted_on_hn = true;
#else
orte_ras_base.launch_orted_on_hn = false;
#endif
mca_base_var_register("orte", "ras", "base", "launch_orted_on_hn",
"Launch an orte daemon on the head node",
MCA_BASE_VAR_TYPE_BOOL,
NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &orte_ras_base.launch_orted_on_hn);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science
@ -78,33 +78,24 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
/* get the hnp node's info */
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
#if SLURM_CRAY_ENV
/* if we are in a Cray-SLURM environment, then we cannot
* launch procs local to the HNP. The problem
* is the MPI processes launched on the head node (where the
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
* (mpirun) which is not a child of a slurmd daemon. This
* means that any RDMA credentials obtained via the odls/alps
* local launcher are incorrect. Test for this condition. If
* found, then take steps to ensure we launch a daemon on
* the same node as mpirun and that it gets used to fork
* local procs instead of mpirun so they get the proper
* credential */
if (NULL != hnp_node) {
OPAL_LIST_FOREACH(node, nodes, orte_node_t) {
if (orte_ifislocal(node->name)) {
orte_hnp_is_allocated = true;
break;
if ((orte_ras_base.launch_orted_on_hn == true) &&
(orte_managed_allocation)) {
if (NULL != hnp_node) {
OPAL_LIST_FOREACH(node, nodes, orte_node_t) {
if (orte_ifislocal(node->name)) {
orte_hnp_is_allocated = true;
break;
}
}
if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) &
ORTE_MAPPING_NO_USE_LOCAL)) {
hnp_node->name = strdup("mpirun");
skiphnp = true;
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
}
}
if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) {
hnp_node->name = strdup("mpirun");
skiphnp = true;
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
}
}
#endif
/* cycle through the list */
while (NULL != (item = opal_list_remove_first(nodes))) {