1
1

Ensure that the pmix server system-level rendezvous file is only output by the HNP as (at least for slurm on cray) a daemon could be colocated with the HNP and overwrite the file. Update the scaling.pl script to only use the system-level rendezvous so it doesn't get rejected by a colocated daemon

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-10-14 10:16:49 -07:00
родитель b75ed83d4b
Коммит 6ffb0d0507
2 изменённых файлов: 7 добавлений и 5 удалений

Просмотреть файл

@ -27,7 +27,7 @@ my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1"); my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
my @starterlist = qw(mpirun prun srun aprun); my @starterlist = qw(mpirun prun srun aprun);
my @starteroptionlist = (" --novm", my @starteroptionlist = (" --novm",
"", " --system-server-only",
" --distribution=cyclic --ntasks-per-node=", " --distribution=cyclic --ntasks-per-node=",
" -N"); " -N");
@ -267,7 +267,7 @@ foreach $starter (@starters) {
# if we are going to use the dvm, then we # if we are going to use the dvm, then we
if ($starter eq "prun") { if ($starter eq "prun") {
# need to start it # need to start it
$cmd = "orte-dvm -mca pmix_system_server 1 2>&1 &"; $cmd = "orte-dvm --system_server 2>&1 &";
if ($myresults) { if ($myresults) {
print FILE "\n\n$cmd\n"; print FILE "\n\n$cmd\n";
} }
@ -341,7 +341,7 @@ foreach $starter (@starters) {
} }
if ($havedvm) { if ($havedvm) {
if (!$SHOWME) { if (!$SHOWME) {
$cmd = "prun --terminate"; $cmd = "prun --system-server-only --terminate";
system($cmd); system($cmd);
} }
$havedvm = 0; $havedvm = 0;

Просмотреть файл

@ -273,8 +273,10 @@ int pmix_server_init(void)
} }
/* if requested, tell the server to drop a system-level /* if requested, tell the server to drop a system-level
* PMIx connection point */ * PMIx connection point - only do this for the HNP as, in
if (orte_pmix_server_globals.system_server) { * at least one case, a daemon can be colocated with the
* HNP and would overwrite the server rendezvous file */
if (orte_pmix_server_globals.system_server && ORTE_PROC_IS_HNP) {
kv = OBJ_NEW(opal_value_t); kv = OBJ_NEW(opal_value_t);
kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT); kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT);
kv->type = OPAL_BOOL; kv->type = OPAL_BOOL;