1
1

Okay, get srun to play nice. Problem was that everything worked fine so long as the user did "salloc" with an argument requesting a specific number of nodes. However, if the user specified instead a number of processes, then we launched that number of daemons - resulting in multiple daemons/node. Not good.

So force things to behave correctly either way.

This commit was SVN r25792.
Этот коммит содержится в:
Ralph Castain 2012-01-26 19:58:57 +00:00
родитель ef94e606c7
Коммит 07f3a91075

Просмотреть файл

@ -257,9 +257,6 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
/* add the srun command */ /* add the srun command */
opal_argv_append(&argc, &argv, "srun"); opal_argv_append(&argc, &argv, "srun");
/* ensure we only launch one daemon/node */
opal_argv_append(&argc, &argv, "--ntasks-per-node=1");
/* alert us if any orteds die during startup */ /* alert us if any orteds die during startup */
opal_argv_append(&argc, &argv, "--kill-on-bad-exit"); opal_argv_append(&argc, &argv, "--kill-on-bad-exit");
@ -308,15 +305,16 @@ static int plm_slurm_launch_job(orte_job_t *jdata)
opal_argv_append(&argc, &argv, tmp); opal_argv_append(&argc, &argv, tmp);
free(tmp); free(tmp);
asprintf(&tmp, "--ntasks=%lu", (unsigned long)map->num_new_daemons);
opal_argv_append(&argc, &argv, tmp);
free(tmp);
asprintf(&tmp, "--nodelist=%s", nodelist_flat); asprintf(&tmp, "--nodelist=%s", nodelist_flat);
opal_argv_append(&argc, &argv, tmp); opal_argv_append(&argc, &argv, tmp);
free(tmp); free(tmp);
} }
/* tell srun how many tasks to run */
asprintf(&tmp, "--ntasks=%lu", (unsigned long)map->num_new_daemons);
opal_argv_append(&argc, &argv, tmp);
free(tmp);
OPAL_OUTPUT_VERBOSE((2, orte_plm_globals.output, OPAL_OUTPUT_VERBOSE((2, orte_plm_globals.output,
"%s plm:slurm: launching on nodes %s", "%s plm:slurm: launching on nodes %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodelist_flat)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nodelist_flat));