Merge pull request #3467 from rhc54/topic/slurm
Enable full operations under SLURM on Cray systems
Этот коммит содержится в:
Коммит
ee4ce13e16
@ -13,6 +13,7 @@
|
|||||||
# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
|
# Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
|
||||||
# Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
# Copyright (c) 2016 Los Alamos National Security, LLC. All rights
|
||||||
# reserved.
|
# reserved.
|
||||||
|
# Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -68,6 +69,15 @@ AC_DEFUN([ORTE_CHECK_SLURM],[
|
|||||||
[orte_check_slurm_happy="yes"],
|
[orte_check_slurm_happy="yes"],
|
||||||
[orte_check_slurm_happy="no"])])
|
[orte_check_slurm_happy="no"])])
|
||||||
|
|
||||||
|
# check to see if this is a Cray nativized slurm env.
|
||||||
|
|
||||||
|
slurm_cray_env=0
|
||||||
|
OPAL_CHECK_ALPS([orte_slurm_cray],
|
||||||
|
[slurm_cray_env=1])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([SLURM_CRAY_ENV],[$slurm_cray_env],
|
||||||
|
[defined to 1 if slurm cray env, 0 otherwise])
|
||||||
|
|
||||||
OPAL_SUMMARY_ADD([[Resource Managers]],[[Slurm]],[$1],[$orte_check_slurm_happy])
|
OPAL_SUMMARY_ADD([[Resource Managers]],[[Slurm]],[$1],[$orte_check_slurm_happy])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||||
# Copyright (c) 2011-2016 Los Alamos National Security, LLC.
|
# Copyright (c) 2011-2016 Los Alamos National Security, LLC.
|
||||||
# All rights reserved.
|
# All rights reserved.
|
||||||
|
# Copyright (c) 2017 Intel, Inc. All rights reserved.
|
||||||
# $COPYRIGHT$
|
# $COPYRIGHT$
|
||||||
#
|
#
|
||||||
# Additional copyrights may follow
|
# Additional copyrights may follow
|
||||||
@ -38,12 +39,4 @@ AC_DEFUN([MCA_orte_plm_slurm_CONFIG],[
|
|||||||
AC_SUBST([plm_slurm_LDFLAGS])
|
AC_SUBST([plm_slurm_LDFLAGS])
|
||||||
AC_SUBST([plm_slurm_LIBS])
|
AC_SUBST([plm_slurm_LIBS])
|
||||||
|
|
||||||
# check to see if this is a Cray nativized slurm env.
|
|
||||||
|
|
||||||
slurm_cray_env=0
|
|
||||||
OPAL_CHECK_ALPS([plm_slurm_cray],
|
|
||||||
[slurm_cray_env=1])
|
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([SLURM_CRAY_ENV],[$slurm_cray_env],
|
|
||||||
[defined to 1 if slurm cray env, 0 otherwise])
|
|
||||||
])dnl
|
])dnl
|
||||||
|
@ -49,18 +49,3 @@ are running.
|
|||||||
|
|
||||||
Please consult with your system administrator about obtaining
|
Please consult with your system administrator about obtaining
|
||||||
such support.
|
such support.
|
||||||
[no-local-support]
|
|
||||||
The SLURM process starter cannot start processes local to
|
|
||||||
mpirun when executing under a Cray environment. The problem
|
|
||||||
is that mpirun is not itself a child of a slurmd daemon. Thus,
|
|
||||||
any processes mpirun itself starts will inherit incorrect
|
|
||||||
RDMA credentials.
|
|
||||||
|
|
||||||
Your application will be mapped and run (assuming adequate
|
|
||||||
resources) on the remaining allocated nodes. If adequate
|
|
||||||
resources are not available, you will need to exit and obtain
|
|
||||||
a larger allocation.
|
|
||||||
|
|
||||||
This situation will be fixed in a future release. Meantime,
|
|
||||||
you can turn "off" this warning by setting the plm_slurm_warning
|
|
||||||
MCA param to 0.
|
|
||||||
|
@ -193,25 +193,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
|||||||
"%s plm:slurm: LAUNCH DAEMONS CALLED",
|
"%s plm:slurm: LAUNCH DAEMONS CALLED",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||||
|
|
||||||
#if SLURM_CRAY_ENV
|
|
||||||
/* if we are in a Cray-SLURM environment, then we cannot
|
|
||||||
* launch procs local to the HNP. The problem
|
|
||||||
* is the MPI processes launched on the head node (where the
|
|
||||||
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
|
|
||||||
* (mpirun) which is not a child of a slurmd daemon. This
|
|
||||||
* means that any RDMA credentials obtained via the odls/alps
|
|
||||||
* local launcher are incorrect. So warn the user and set
|
|
||||||
* the envar for no_schedule_local if mpirun is not on a
|
|
||||||
* system management node (i.e. is part of the allocation)
|
|
||||||
* and the "no_use_local" flag hasn't been set */
|
|
||||||
if (mca_plm_slurm_component.slurm_warning_msg &&
|
|
||||||
(orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL))) {
|
|
||||||
orte_show_help("help-plm-slurm.txt", "no-local-support", true);
|
|
||||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
|
||||||
mca_plm_slurm_component.slurm_warning_msg = false; // only do this once
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* if we are launching debugger daemons, then just go
|
/* if we are launching debugger daemons, then just go
|
||||||
* do it - no new daemons will be launched
|
* do it - no new daemons will be launched
|
||||||
*/
|
*/
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015 Research Organization for Information Science
|
* Copyright (c) 2015 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -30,6 +30,7 @@
|
|||||||
#include "opal/util/if.h"
|
#include "opal/util/if.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
|
#include "orte/mca/rmaps/base/base.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
#include "orte/runtime/orte_globals.h"
|
#include "orte/runtime/orte_globals.h"
|
||||||
|
|
||||||
@ -46,7 +47,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
|||||||
int rc, i;
|
int rc, i;
|
||||||
orte_node_t *node, *hnp_node, *nptr;
|
orte_node_t *node, *hnp_node, *nptr;
|
||||||
char *ptr;
|
char *ptr;
|
||||||
bool hnp_alone = true;
|
bool hnp_alone = true, skiphnp = false;
|
||||||
orte_attribute_t *kv;
|
orte_attribute_t *kv;
|
||||||
char **alias=NULL, **nalias;
|
char **alias=NULL, **nalias;
|
||||||
|
|
||||||
@ -77,6 +78,33 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
|||||||
|
|
||||||
/* get the hnp node's info */
|
/* get the hnp node's info */
|
||||||
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
|
hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
|
||||||
|
#if SLURM_CRAY_ENV
|
||||||
|
/* if we are in a Cray-SLURM environment, then we cannot
|
||||||
|
* launch procs local to the HNP. The problem
|
||||||
|
* is the MPI processes launched on the head node (where the
|
||||||
|
* ORTE_PROC_IS_HNP evalues to true) get launched by a daemon
|
||||||
|
* (mpirun) which is not a child of a slurmd daemon. This
|
||||||
|
* means that any RDMA credentials obtained via the odls/alps
|
||||||
|
* local launcher are incorrect. Test for this condition. If
|
||||||
|
* found, then take steps to ensure we launch a daemon on
|
||||||
|
* the same node as mpirun and that it gets used to fork
|
||||||
|
* local procs instead of mpirun so they get the proper
|
||||||
|
* credential */
|
||||||
|
if (NULL != hnp_node) {
|
||||||
|
OPAL_LIST_FOREACH(node, nodes, orte_node_t) {
|
||||||
|
if (orte_ifislocal(node->name)) {
|
||||||
|
orte_hnp_is_allocated = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) {
|
||||||
|
hnp_node->name = strdup("mpirun");
|
||||||
|
skiphnp = true;
|
||||||
|
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* cycle through the list */
|
/* cycle through the list */
|
||||||
while (NULL != (item = opal_list_remove_first(nodes))) {
|
while (NULL != (item = opal_list_remove_first(nodes))) {
|
||||||
@ -86,7 +114,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
|||||||
* first position since it is the first one entered. We need to check to see
|
* first position since it is the first one entered. We need to check to see
|
||||||
* if this node is the same as the HNP's node so we don't double-enter it
|
* if this node is the same as the HNP's node so we don't double-enter it
|
||||||
*/
|
*/
|
||||||
if (NULL != hnp_node && orte_ifislocal(node->name)) {
|
if (!skiphnp && NULL != hnp_node && orte_ifislocal(node->name)) {
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
|
||||||
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
|
"%s ras:base:node_insert updating HNP [%s] info to %ld slots",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
@ -189,7 +217,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
|||||||
* ensure we don't have any domain info in the node record
|
* ensure we don't have any domain info in the node record
|
||||||
* for the hnp
|
* for the hnp
|
||||||
*/
|
*/
|
||||||
if (!orte_have_fqdn_allocation && !hnp_alone) {
|
if (NULL != hnp_node && !orte_have_fqdn_allocation && !hnp_alone) {
|
||||||
if (NULL != (ptr = strchr(hnp_node->name, '.'))) {
|
if (NULL != (ptr = strchr(hnp_node->name, '.'))) {
|
||||||
*ptr = '\0';
|
*ptr = '\0';
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user