diff --git a/orte/mca/ras/slurm/ras_slurm.h b/orte/mca/ras/slurm/ras_slurm.h index 50f601e5cf..9dd7b93e01 100644 --- a/orte/mca/ras/slurm/ras_slurm.h +++ b/orte/mca/ras/slurm/ras_slurm.h @@ -11,6 +11,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,6 +38,7 @@ typedef struct { bool dyn_alloc_enabled; char *config_file; bool rolling_alloc; + bool use_all; } orte_ras_slurm_component_t; ORTE_DECLSPEC extern orte_ras_slurm_component_t mca_ras_slurm_component; diff --git a/orte/mca/ras/slurm/ras_slurm_component.c b/orte/mca/ras/slurm/ras_slurm_component.c index 3667560c52..2eb47b5366 100644 --- a/orte/mca/ras/slurm/ras_slurm_component.c +++ b/orte/mca/ras/slurm/ras_slurm_component.c @@ -106,6 +106,14 @@ static int ras_slurm_register(void) MCA_BASE_VAR_SCOPE_READONLY, &mca_ras_slurm_component.rolling_alloc); + mca_ras_slurm_component.use_all = false; + (void) mca_base_component_var_register (component, "use_entire_allocation", + "Use entire allocation (not just job step nodes) for this application", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &mca_ras_slurm_component.use_all); + return ORTE_SUCCESS; } diff --git a/orte/mca/ras/slurm/ras_slurm_module.c b/orte/mca/ras/slurm/ras_slurm_module.c index 5221d0973b..a90fe9b44c 100644 --- a/orte/mca/ras/slurm/ras_slurm_module.c +++ b/orte/mca/ras/slurm/ras_slurm_module.c @@ -280,39 +280,63 @@ static int orte_ras_slurm_allocate(orte_job_t *jdata, opal_list_t *nodes) ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } - - /* get the number of process slots we were assigned on each node */ - tasks_per_node = getenv("SLURM_TASKS_PER_NODE"); - if (NULL == tasks_per_node) { - /* couldn't find any version - abort */ - orte_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1, - "SLURM_TASKS_PER_NODE"); - free(regexp); - return ORTE_ERR_NOT_FOUND; - } - node_tasks = strdup(tasks_per_node); - if(NULL == node_tasks) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - free(regexp); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* get the number of CPUs per task that the user provided to slurm */ - tmp = getenv("SLURM_CPUS_PER_TASK"); - if(NULL != tmp) { - cpus_per_task = atoi(tmp); - if(0 >= cpus_per_task) { - opal_output(0, "ras:slurm:allocate: Got bad value from SLURM_CPUS_PER_TASK. " - "Variable was: %s\n", tmp); - ORTE_ERROR_LOG(ORTE_ERROR); - free(node_tasks); + if (mca_ras_slurm_component.use_all) { + /* this is an oddball case required for debug situations where + * a tool is started that will then call mpirun. In this case, + * Slurm will assign only 1 tasks/per node to the tool, but + * we want mpirun to use the entire allocation. They don't give + * us a specific variable for this purpose, so we have to fudge + * a bit - but this is a special edge case, and we'll live with it */ + tasks_per_node = getenv("SLURM_JOB_CPUS_PER_NODE"); + if (NULL == tasks_per_node) { + /* couldn't find any version - abort */ + orte_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1, + "SLURM_JOB_CPUS_PER_NODE"); free(regexp); - return ORTE_ERROR; + return ORTE_ERR_NOT_FOUND; + } + node_tasks = strdup(tasks_per_node); + if (NULL == node_tasks) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + free(regexp); + return ORTE_ERR_OUT_OF_RESOURCE; } - } else { cpus_per_task = 1; + } else { + /* get the number of process slots we were assigned on each node */ + tasks_per_node = getenv("SLURM_TASKS_PER_NODE"); + if (NULL == tasks_per_node) { + /* couldn't find any version - abort */ + orte_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1, + "SLURM_TASKS_PER_NODE"); + free(regexp); + return ORTE_ERR_NOT_FOUND; + } + node_tasks = strdup(tasks_per_node); + if (NULL == node_tasks) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + free(regexp); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* get the number of CPUs per task that the user provided to slurm */ + tmp = getenv("SLURM_CPUS_PER_TASK"); + if(NULL != tmp) { + cpus_per_task = atoi(tmp); + if(0 >= cpus_per_task) { + opal_output(0, "ras:slurm:allocate: Got bad value from SLURM_CPUS_PER_TASK. " + "Variable was: %s\n", tmp); + ORTE_ERROR_LOG(ORTE_ERROR); + free(node_tasks); + free(regexp); + return ORTE_ERROR; + } + } else { + cpus_per_task = 1; + } } - + ret = orte_ras_slurm_discover(regexp, node_tasks, nodes); free(regexp); free(node_tasks);