From 7b138ec6d976545d138524a01fe28da5abfdd069 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 14 Dec 2020 17:34:55 -0800 Subject: [PATCH] Update Slurm launch support Assign all cpu's on node to the daemon Signed-off-by: Ralph Castain (cherry picked from commit 7bac7eed6ef423e47fe980b4c32eae36b8e1d4cb) --- orte/mca/plm/slurm/plm_slurm_module.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/orte/mca/plm/slurm/plm_slurm_module.c b/orte/mca/plm/slurm/plm_slurm_module.c index f8de6509e9..f05ea44e8d 100644 --- a/orte/mca/plm/slurm/plm_slurm_module.c +++ b/orte/mca/plm/slurm/plm_slurm_module.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2019 Cisco Systems, Inc. All rights reserved * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -267,6 +267,9 @@ static void launch_daemons(int fd, short args, void *cbdata) /* start one orted on each node */ opal_argv_append(&argc, &argv, "--ntasks-per-node=1"); + /* ensure Slurm adds all CPUs to this task */ + putenv("SLURM_WHOLE=1"); + if (!orte_enable_recovery) { /* kill the job if any orteds die */ opal_argv_append(&argc, &argv, "--kill-on-bad-exit");