Merge pull request #4382 from rhc54/topic/scaling
Update the scaling script to avoid use of "system" command
Этот коммит содержится в:
Коммит
70c455938b
@ -24,7 +24,7 @@ my $ppn = 1;
|
|||||||
my @csvrow;
|
my @csvrow;
|
||||||
|
|
||||||
my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
|
my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
|
||||||
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
|
my @options = ("", "", "", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca pmix_base_collect_data 0", "-mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1 -mca pmix_base_collect_data 0");
|
||||||
my @starterlist = qw(mpirun prun srun aprun);
|
my @starterlist = qw(mpirun prun srun aprun);
|
||||||
my @starteroptionlist = (" --novm",
|
my @starteroptionlist = (" --novm",
|
||||||
" --system-server-only",
|
" --system-server-only",
|
||||||
@ -87,6 +87,7 @@ my $option;
|
|||||||
my $havedvm = 0;
|
my $havedvm = 0;
|
||||||
my @starters;
|
my @starters;
|
||||||
my @starteroptions;
|
my @starteroptions;
|
||||||
|
my $pid;
|
||||||
|
|
||||||
# if they explicitly requested specific starters, then
|
# if they explicitly requested specific starters, then
|
||||||
# only use those
|
# only use those
|
||||||
@ -267,12 +268,17 @@ foreach $starter (@starters) {
|
|||||||
# if we are going to use the dvm, then we
|
# if we are going to use the dvm, then we
|
||||||
if ($starter eq "prun") {
|
if ($starter eq "prun") {
|
||||||
# need to start it
|
# need to start it
|
||||||
$cmd = "orte-dvm --system_server 2>&1 &";
|
|
||||||
if ($myresults) {
|
if ($myresults) {
|
||||||
print FILE "\n\n$cmd\n";
|
print FILE "\n\norte-dvm --system-server\n";
|
||||||
}
|
}
|
||||||
if (!$SHOWME) {
|
if (!$SHOWME) {
|
||||||
system($cmd);
|
unless ($pid = fork) {
|
||||||
|
unless (fork) {
|
||||||
|
exec "orte-dvm --system-server 2>&1";
|
||||||
|
die "no exec";
|
||||||
|
}
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
$havedvm = 1;
|
$havedvm = 1;
|
||||||
}
|
}
|
||||||
# give it a couple of seconds to start
|
# give it a couple of seconds to start
|
||||||
@ -297,7 +303,7 @@ foreach $starter (@starters) {
|
|||||||
# pre-position the executable
|
# pre-position the executable
|
||||||
$cmd = $starter . $starteroptions[$index] . " $test 2>&1";
|
$cmd = $starter . $starteroptions[$index] . " $test 2>&1";
|
||||||
my $error;
|
my $error;
|
||||||
$error = system($cmd);
|
$error = `$cmd`;
|
||||||
if (0 != $error) {
|
if (0 != $error) {
|
||||||
if ($myresults) {
|
if ($myresults) {
|
||||||
print FILE "Command $cmd returned error $error\n";
|
print FILE "Command $cmd returned error $error\n";
|
||||||
@ -342,7 +348,8 @@ foreach $starter (@starters) {
|
|||||||
if ($havedvm) {
|
if ($havedvm) {
|
||||||
if (!$SHOWME) {
|
if (!$SHOWME) {
|
||||||
$cmd = "prun --system-server-only --terminate";
|
$cmd = "prun --system-server-only --terminate";
|
||||||
system($cmd);
|
my $rc = `$cmd`;
|
||||||
|
waitpid($pid, 0);
|
||||||
}
|
}
|
||||||
$havedvm = 0;
|
$havedvm = 0;
|
||||||
}
|
}
|
||||||
|
@ -59,7 +59,7 @@
|
|||||||
#include "opal/mca/rcache/rcache.h"
|
#include "opal/mca/rcache/rcache.h"
|
||||||
#include "opal/mca/mpool/base/base.h"
|
#include "opal/mca/mpool/base/base.h"
|
||||||
#include "opal/mca/btl/base/base.h"
|
#include "opal/mca/btl/base/base.h"
|
||||||
#include "opal/mca/pmix/pmix.h"
|
#include "opal/mca/pmix/base/base.h"
|
||||||
#include "opal/util/timings.h"
|
#include "opal/util/timings.h"
|
||||||
#include "opal/util/opal_environ.h"
|
#include "opal/util/opal_environ.h"
|
||||||
|
|
||||||
@ -366,8 +366,8 @@ static int ompi_register_mca_variables(void)
|
|||||||
|
|
||||||
static void fence_release(int status, void *cbdata)
|
static void fence_release(int status, void *cbdata)
|
||||||
{
|
{
|
||||||
volatile bool *active = (volatile bool*)cbdata;
|
opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
|
||||||
*active = false;
|
OPAL_PMIX_WAKEUP_THREAD(lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||||
@ -377,9 +377,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
size_t nprocs;
|
size_t nprocs;
|
||||||
char *error = NULL;
|
char *error = NULL;
|
||||||
ompi_errhandler_errtrk_t errtrk;
|
ompi_errhandler_errtrk_t errtrk;
|
||||||
volatile bool active;
|
|
||||||
opal_list_t info;
|
opal_list_t info;
|
||||||
opal_value_t *kv;
|
opal_value_t *kv;
|
||||||
|
opal_pmix_lock_t lock;
|
||||||
|
bool background_fence = false;
|
||||||
|
|
||||||
OMPI_TIMING_INIT(32);
|
OMPI_TIMING_INIT(32);
|
||||||
|
|
||||||
@ -682,24 +683,20 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) {
|
if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) {
|
||||||
/* execute the fence_nb in the background to collect
|
/* execute the fence_nb in the background to collect
|
||||||
* the data */
|
* the data */
|
||||||
if (!ompi_async_mpi_init) {
|
background_fence = true;
|
||||||
/* we are going to execute a barrier at the
|
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
||||||
* end of MPI_Init. We can only have ONE fence
|
opal_pmix.fence_nb(NULL, true, fence_release, (void*)&lock);
|
||||||
* operation with the identical involved procs
|
|
||||||
* at a time, so we will need to wait when we
|
|
||||||
* get there */
|
|
||||||
active = true;
|
|
||||||
opal_pmix.fence_nb(NULL, true, fence_release, (void*)&active);
|
|
||||||
} else {
|
|
||||||
opal_pmix.fence_nb(NULL, true, NULL, NULL);
|
|
||||||
}
|
|
||||||
} else if (!opal_pmix_base_async_modex) {
|
} else if (!opal_pmix_base_async_modex) {
|
||||||
active = true;
|
/* we want to do the modex */
|
||||||
|
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
||||||
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
|
opal_pmix.fence_nb(NULL, opal_pmix_collect_all_data,
|
||||||
fence_release, (void*)&active);
|
fence_release, (void*)&lock);
|
||||||
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
|
/* cannot just wait on thread as we need to call opal_progress */
|
||||||
|
OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active);
|
||||||
|
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
||||||
}
|
}
|
||||||
} else {
|
/* otherwise, we don't want to do the modex, so fall thru */
|
||||||
|
} else if (!opal_pmix_base_async_modex || opal_pmix_collect_all_data) {
|
||||||
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
|
opal_pmix.fence(NULL, opal_pmix_collect_all_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -866,24 +863,24 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
/* Next timing measurement */
|
/* Next timing measurement */
|
||||||
OMPI_TIMING_NEXT("modex-barrier");
|
OMPI_TIMING_NEXT("modex-barrier");
|
||||||
|
|
||||||
/* wait for everyone to reach this point - this is a hard
|
/* if we executed the above fence in the background, then
|
||||||
* barrier requirement at this time, though we hope to relax
|
* we have to wait here for it to complete. However, there
|
||||||
* it at a later point */
|
* is no reason to do two barriers! */
|
||||||
if (!ompi_async_mpi_init) {
|
if (background_fence) {
|
||||||
/* if we executed the above fence in the background, then
|
OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active);
|
||||||
* we have to wait here for it to complete. However, there
|
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
||||||
* is no reason to do two barriers! */
|
} else if (!ompi_async_mpi_init) {
|
||||||
if (opal_pmix_base_async_modex && opal_pmix_collect_all_data) {
|
/* wait for everyone to reach this point - this is a hard
|
||||||
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
|
* barrier requirement at this time, though we hope to relax
|
||||||
|
* it at a later point */
|
||||||
|
if (NULL != opal_pmix.fence_nb) {
|
||||||
|
OPAL_PMIX_CONSTRUCT_LOCK(&lock);
|
||||||
|
opal_pmix.fence_nb(NULL, false,
|
||||||
|
fence_release, (void*)&lock);
|
||||||
|
OMPI_LAZY_WAIT_FOR_COMPLETION(lock.active);
|
||||||
|
OPAL_PMIX_DESTRUCT_LOCK(&lock);
|
||||||
} else {
|
} else {
|
||||||
active = true;
|
opal_pmix.fence(NULL, false);
|
||||||
if (NULL != opal_pmix.fence_nb) {
|
|
||||||
opal_pmix.fence_nb(NULL, false,
|
|
||||||
fence_release, (void*)&active);
|
|
||||||
OMPI_LAZY_WAIT_FOR_COMPLETION(active);
|
|
||||||
} else {
|
|
||||||
opal_pmix.fence(NULL, false);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -781,7 +781,8 @@ static int create_app(int argc, char* argv[],
|
|||||||
/* Grab all MCA environment variables */
|
/* Grab all MCA environment variables */
|
||||||
app->env = opal_argv_copy(*app_env);
|
app->env = opal_argv_copy(*app_env);
|
||||||
for (i=0; NULL != environ[i]; i++) {
|
for (i=0; NULL != environ[i]; i++) {
|
||||||
if (0 == strncmp("PMIX_", environ[i], 5)) {
|
if (0 == strncmp("PMIX_", environ[i], 5) ||
|
||||||
|
0 == strncmp("OMPI_", environ[i], 5)) {
|
||||||
/* check for duplicate in app->env - this
|
/* check for duplicate in app->env - this
|
||||||
* would have been placed there by the
|
* would have been placed there by the
|
||||||
* cmd line processor. By convention, we
|
* cmd line processor. By convention, we
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user