diff --git a/contrib/scaling/scaling.pl b/contrib/scaling/scaling.pl index 710f036a45..947146d1b4 100755 --- a/contrib/scaling/scaling.pl +++ b/contrib/scaling/scaling.pl @@ -24,7 +24,7 @@ my $ppn = 1; my @csvrow; my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op); -my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1"); +my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca pmix_base_collect_data 0", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1 -mca pmix_base_collect_data 0"); my @starterlist = qw(mpirun prun srun aprun); my @starteroptionlist = (" --novm", " --system-server-only", @@ -87,6 +87,7 @@ my $option; my $havedvm = 0; my @starters; my @starteroptions; +my $pid; # if they explicitly requested specific starters, then # only use those @@ -267,12 +268,17 @@ foreach $starter (@starters) { # if we are going to use the dvm, then we if ($starter eq "prun") { # need to start it - $cmd = "orte-dvm --system_server 2>&1 &"; if ($myresults) { - print FILE "\n\n$cmd\n"; + print FILE "\n\norte-dvm --system-server\n"; } if (!$SHOWME) { - system($cmd); + unless ($pid = fork) { + unless (fork) { + exec "orte-dvm --system-server 2>&1"; + die "no exec"; + } + exit 0; + } $havedvm = 1; } # give it a couple of seconds to start @@ -297,7 +303,7 @@ foreach $starter (@starters) { # pre-position the executable $cmd = $starter . $starteroptions[$index] . " $test 2>&1"; my $error; - $error = system($cmd); + $error = `$cmd`; if (0 != $error) { if ($myresults) { print FILE "Command $cmd returned error $error\n"; @@ -342,7 +348,8 @@ foreach $starter (@starters) { if ($havedvm) { if (!$SHOWME) { $cmd = "prun --system-server-only --terminate"; - system($cmd); + my $rc = `$cmd`; + waitpid($pid, 0); } $havedvm = 0; } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index a36dabc08d..c012a803ef 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -59,7 +59,7 @@ #include "opal/mca/rcache/rcache.h" #include "opal/mca/mpool/base/base.h" #include "opal/mca/btl/base/base.h" -#include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/base/base.h" #include "opal/util/timings.h" #include "opal/util/opal_environ.h" @@ -366,8 +366,8 @@ static int ompi_register_mca_variables(void) static void fence_release(int status, void *cbdata) { - volatile bool *active = (volatile bool*)cbdata; - *active = false; + opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata; + OPAL_PMIX_WAKEUP_THREAD(lock); } int ompi_mpi_init(int argc, char **argv, int requested, int *provided) @@ -377,9 +377,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) size_t nprocs; char *error = NULL; ompi_errhandler_errtrk_t errtrk; - volatile bool active; opal_list_t info; opal_value_t *kv; + opal_pmix_lock_t lock; + bool background_fence = false; OMPI_TIMING_INIT(32); @@ -682,24 +683,20 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { /* execute the fence_nb in the background to collect * the data */ - if (!ompi_async_mpi_init) { - /* we are going to execute a barrier at the - * end of MPI_Init. We can only have ONE fence - * operation with the identical involved procs - * at a time, so we will need to wait when we - * get there */ - active = true; - opal_pmix.fence_nb(NULL, true, fence_release, (void*)&active); - } else { - opal_pmix.fence_nb(NULL, true, NULL, NULL); - } + background_fence = true; + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + opal_pmix.fence_nb(NULL, true, fence_release, (void*)&lock); } else if (!opal_pmix_base_async_modex) { - active = true; + /* we want to do the modex */ + OPAL_PMIX_CONSTRUCT_LOCK(&lock); opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data, - fence_release, (void*)&active); - OMPI_LAZY_WAIT_FOR_COMPLETION(active); + fence_release, (void*)&lock); + /* cannot just wait on thread as we need to call opal_progress */ + OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active); + OPAL_PMIX_DESTRUCT_LOCK(&lock); } - } else { + /* otherwise, we don't want to do the modex, so fall thru */ + } else if (!opal_pmix_base_async_modex || opal_pmix_collect_all_data) { opal_pmix.fence(NULL, opal_pmix_collect_all_data); } @@ -866,24 +863,24 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Next timing measurement */ OMPI_TIMING_NEXT("modex-barrier"); - /* wait for everyone to reach this point - this is a hard - * barrier requirement at this time, though we hope to relax - * it at a later point */ - if (!ompi_async_mpi_init) { - /* if we executed the above fence in the background, then - * we have to wait here for it to complete. However, there - * is no reason to do two barriers! */ - if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) { - OMPI_LAZY_WAIT_FOR_COMPLETION(active); + /* if we executed the above fence in the background, then + * we have to wait here for it to complete. However, there + * is no reason to do two barriers! */ + if (background_fence) { + OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active); + OPAL_PMIX_DESTRUCT_LOCK(&lock); + } else if (!ompi_async_mpi_init) { + /* wait for everyone to reach this point - this is a hard + * barrier requirement at this time, though we hope to relax + * it at a later point */ + if (NULL != opal_pmix.fence_nb) { + OPAL_PMIX_CONSTRUCT_LOCK(&lock); + opal_pmix.fence_nb(NULL, false, + fence_release, (void*)&lock); + OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active); + OPAL_PMIX_DESTRUCT_LOCK(&lock); } else { - active = true; - if (NULL != opal_pmix.fence_nb) { - opal_pmix.fence_nb(NULL, false, - fence_release, (void*)&active); - OMPI_LAZY_WAIT_FOR_COMPLETION(active); - } else { - opal_pmix.fence(NULL, false); - } + opal_pmix.fence(NULL, false); } } diff --git a/orte/tools/prun/prun.c b/orte/tools/prun/prun.c index 6a31e4bb90..a8b81df918 100644 --- a/orte/tools/prun/prun.c +++ b/orte/tools/prun/prun.c @@ -781,7 +781,8 @@ static int create_app(int argc, char* argv[], /* Grab all MCA environment variables */ app->env = opal_argv_copy(*app_env); for (i=0; NULL != environ[i]; i++) { - if (0 == strncmp("PMIX_", environ[i], 5)) { + if (0 == strncmp("PMIX_", environ[i], 5) || + 0 == strncmp("OMPI_", environ[i], 5)) { /* check for duplicate in app->env - this * would have been placed there by the * cmd line processor. By convention, we