Ensure that the pmix server system-level rendezvous file is only output by the HNP as (at least for slurm on cray) a daemon could be colocated with the HNP and overwrite the file. Update the scaling.pl script to only use the system-level rendezvous so it doesn't get rejected by a colocated daemon
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
b75ed83d4b
Коммит
6ffb0d0507
@ -27,7 +27,7 @@ my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
|
|||||||
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
|
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
|
||||||
my @starterlist = qw(mpirun prun srun aprun);
|
my @starterlist = qw(mpirun prun srun aprun);
|
||||||
my @starteroptionlist = (" --novm",
|
my @starteroptionlist = (" --novm",
|
||||||
"",
|
" --system-server-only",
|
||||||
" --distribution=cyclic --ntasks-per-node=",
|
" --distribution=cyclic --ntasks-per-node=",
|
||||||
" -N");
|
" -N");
|
||||||
|
|
||||||
@ -267,7 +267,7 @@ foreach $starter (@starters) {
|
|||||||
# if we are going to use the dvm, then we
|
# if we are going to use the dvm, then we
|
||||||
if ($starter eq "prun") {
|
if ($starter eq "prun") {
|
||||||
# need to start it
|
# need to start it
|
||||||
$cmd = "orte-dvm -mca pmix_system_server 1 2>&1 &";
|
$cmd = "orte-dvm --system_server 2>&1 &";
|
||||||
if ($myresults) {
|
if ($myresults) {
|
||||||
print FILE "\n\n$cmd\n";
|
print FILE "\n\n$cmd\n";
|
||||||
}
|
}
|
||||||
@ -341,7 +341,7 @@ foreach $starter (@starters) {
|
|||||||
}
|
}
|
||||||
if ($havedvm) {
|
if ($havedvm) {
|
||||||
if (!$SHOWME) {
|
if (!$SHOWME) {
|
||||||
$cmd = "prun --terminate";
|
$cmd = "prun --system-server-only --terminate";
|
||||||
system($cmd);
|
system($cmd);
|
||||||
}
|
}
|
||||||
$havedvm = 0;
|
$havedvm = 0;
|
||||||
|
@ -273,8 +273,10 @@ int pmix_server_init(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* if requested, tell the server to drop a system-level
|
/* if requested, tell the server to drop a system-level
|
||||||
* PMIx connection point */
|
* PMIx connection point - only do this for the HNP as, in
|
||||||
if (orte_pmix_server_globals.system_server) {
|
* at least one case, a daemon can be colocated with the
|
||||||
|
* HNP and would overwrite the server rendezvous file */
|
||||||
|
if (orte_pmix_server_globals.system_server && ORTE_PROC_IS_HNP) {
|
||||||
kv = OBJ_NEW(opal_value_t);
|
kv = OBJ_NEW(opal_value_t);
|
||||||
kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT);
|
kv->key = strdup(OPAL_PMIX_SERVER_SYSTEM_SUPPORT);
|
||||||
kv->type = OPAL_BOOL;
|
kv->type = OPAL_BOOL;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user