1
1
openmpi/contrib/scaling/scaling.pl

337 строки
11 KiB
Perl
Исходник Обычный вид История

#!/usr/bin/env perl
#
# Copyright (c) 2012 Los Alamos National Security, Inc.
# All rights reserved.
2016-10-12 09:28:16 +03:00
# Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
use strict;
use Getopt::Long;
# globals
my $num_nodes = 2;
my $my_arg;
my $reps = 5;
my $usedvm = 0;
my $usesrun = 0;
my $usempirun = 0;
my $useaprun = 0;
my $useaprun = 0;
my $myapp;
my $runall = 0;
my $rawoutput = 0;
my $myresults = "myresults";
my $ppn = 1;
my @csvrow;
my @tests = qw(/bin/true ./orte_no_op ./mpi_no_op ./mpi_no_op ./mpi_no_op);
Create an alternative mapping method that pushes responsibility onto the backend daemons. By default, let mpirun only pack the app_context info and send that to the backend daemons where the mapping will be done. This significantly reduces the computational time on mpirun as it isn't running up/down the topology tree computing thousands of binding locations, and it reduces the launch message to a very small number of bytes. When running -novm, fall back to the old way of doing things where mpirun computes the entire map and binding, and then sends the full info to the backend daemon. Add a new cmd line option/mca param --fwd-mpirun-port that allows mpirun to dynamically select a port, but then passes that back to all the other daemons so they will use that port as a static port for their own wireup. In this mode, we no longer "phone home" directly to mpirun, but instead use the static port to wireup at daemon start. We then use the routing tree to rollup the initial launch report, and limit the number of open sockets on mpirun's node. Update ras simulator to track the new nidmap code Cleanup some bugs in the nidmap regex code, and enhance the error message for not enough slots to include the host on which the problem is found. Update gadget platform file Initialize the range count when starting a new range Fix the no-np case in managed allocation Ensure DVM node usage gets cleaned up after each job Update scaling.pl script to use --fwd-mpirun-port. Pre-connect the daemon to its parent during launch while we are otherwise waiting for the daemon's children to send their "phone home" rollup messages Signed-off-by: Ralph Castain <rhc@open-mpi.org>
2017-02-02 03:33:14 +03:00
my @options = ("", "", "", "--fwd-mpirun-port -mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1", "--fwd-mpirun-port -mca mpi_add_procs_cutoff 0 -mca pmix_base_async_modex 1 -mca async_mpi_init 1 -mca async_mpi_finalize 1");
my @starterlist = qw(mpirun orterun srun aprun);
my @starteroptionlist = ("--novm",
"--hnp file:dvm_uri",
"--distribution=cyclic -N",
"-N");
# Set to true if the script should merely print the cmds
# it would run, but don't run them
2016-10-12 09:28:16 +03:00
my $SHOWME = 0;
# Set to true to suppress most informational messages.
my $QUIET = 0;
# Set to true if we just want to see the help message
my $HELP = 0;
GetOptions(
"help" => \$HELP,
"quiet" => \$QUIET,
"showme" => \$SHOWME,
"reps=s" => \$reps,
"dvm" => \$usedvm,
"srun" => \$usesrun,
"aprun" => \$useaprun,
"mpirun" => \$usempirun,
"myapp=s" => \$myapp,
"all" => \$runall,
"results=s" => \$myresults,
"rawout" => \$rawoutput,
"ppn=s" => \$ppn,
) or die "unable to parse options, stopped";
if ($HELP) {
print "$0 [options]
--help | -h This help message
--quiet | -q Only output critical messages to stdout
--showme Show the actual commands without executing them
--reps=s Number of times to run each test (for statistics)
--mpirun Use mpirun (or its equivalent orterun)
--dvm Use orte-dvm to execute the test
--srun Use srun (if available) to execute the test
--arpun Use aprun (if available) to execute the test
--myapp=s In addition to the standard tests, run this specific application (including any args)
--all Use all available start commands [default]
--results=file File where results are to stored in comma-separated value format
--rawout Provide raw timing output to the file
--ppn=n Run n procs/node
";
exit(0);
}
my $n = 1;
my $cmd;
my $starter;
my $test;
my $output;
my @lines;
my $line;
my @results;
my $res;
my $idx;
my $option;
my $havedvm = 0;
my @starters;
my @starteroptions;
# if they asked for all, then set all starters to requested
if ($runall) {
$useaprun = 1;
$usempirun = 1;
$usesrun = 1;
$usedvm = 1;
}
# see which starters are available
my @path = split(":", $ENV{PATH});
my $exists = 0;
my $opt;
$idx=0;
foreach $starter (@starterlist) {
$exists = 0;
foreach my $path (@path) {
if ( -x "$path/$starter") {
$exists = 1;
last;
}
}
if ($exists) {
if ($usedvm && $starter eq "orterun") {
push @starters, $starter;
$opt = $starteroptionlist[$idx] . " --npernode " . $ppn;
push @starteroptions, $opt;
} elsif ($usempirun && $starter eq "mpirun") {
push @starters, $starter;
$opt = $starteroptionlist[$idx] . " --npernode " . $ppn;
push @starteroptions, $opt;
} elsif ($useaprun && $starter eq "aprun") {
push @starters, $starter;
$opt = $starteroptionlist[$idx] . " " . $ppn;
push @starteroptions, $opt;
} elsif ($usesrun && $starter eq "srun") {
push @starters, $starter;
$opt = $starteroptionlist[$idx] . " " . $ppn;
push @starteroptions, $opt;
}
}
$idx = $idx + 1;
}
# bozo check
if (scalar @starters == 0) {
print "No available starters\n";
exit;
}
# if they gave us an app, add it to the list of tests
if ($myapp) {
push @tests, $myapp;
}
if ($myresults) {
# open the results file
open FILE, ">$myresults" || die "file could not be opened";
}
# determine the number of nodes - doesn't
# matter which starter we use
$cmd = "mpirun --pernode hostname";
$output = `$cmd`;
@lines = split(/\n/, $output);
$num_nodes = $#lines + 1;
# get the local date and time
my ($sec,$min,$hour,$day,$month,$yr19,@rest) = localtime(time);
my $pstarts = join(", ", @starters);
# start by printing out the resulting configuration
print "\n--------------------------------------------------\n";
print "\nTest configuration:\n";
print "\tDate:\t" . "$day-".++$month. "-".($yr19+1900) . " " . sprintf("%02d",$hour).":".sprintf("%02d",$min).":".sprintf("%02d",$sec) . "\n";;
print "\tNum nodes:\t" . $num_nodes . "\n";
print "\tStarters:\t" . $pstarts . "\n";
print "\n--------------------------------------------------\n";
# and tag the output file as well
if ($myresults) {
print FILE "Test configuration:\n";
print FILE "Date:\t" . "$day-".++$month. "-".($yr19+1900) . " " . sprintf("%02d",$hour).":".sprintf("%02d",$min).":".sprintf("%02d",$sec) . "\n";;
print FILE "Num nodes:\t" . $num_nodes . "\n";
print FILE "Starters:\t" . $pstarts . "\n";
}
my $index = 0;
sub runcmd()
{
for (1..$reps) {
$output = `$cmd`;
if ($myresults && $rawoutput) {
print FILE $n . " " . $output . "\n";
}
@lines = split(/\n/, $output);
foreach $line (@lines) {
if (0 <= index($line, "real") ||
0 <= index($line, "elapsed")) {
# we know that at least one item of interest is
# in this line, so let's look for it - start
# by getting rid of any leading whitespace
$line =~ s/^\s+//;
@results = split (/ +/,$line);
$idx = 0;
foreach $res (@results) {
# we are only interested in the real or elapsed time
my $strloc = index($res, "real");
if (0 <= $strloc) {
# some systems put the number in front of
# this word, and some append the word to
# the number - consider both cases
if (0 == $strloc) {
if (0 == $idx) {
# it must be in the next location
push @csvrow,$results[1];
} else {
# it must be in the prior location
push @csvrow,$results[$idx-1];
}
} else {
# take the portion of the string up to the tag
push @csvrow,substr($res, 0, $strloc);
}
} else {
$strloc = index($res, "elapsed");
if (0 <= $strloc) {
# some systems put the number in front of
# this word, and some append the word to
# the number - consider both cases
if (0 == $strloc) {
if (0 == $idx) {
# it must be in the next location
push @csvrow,$results[1];
} else {
# it must be in the prior location
push @csvrow,$results[$idx-1];
}
} else {
# take the portion of the string up to the tag
push @csvrow,substr($res, 0, $strloc);
}
}
}
$idx = $idx + 1;
}
}
}
}
# we have now completed all the reps, so log the results
if ($myresults) {
my $myout;
my $mycnt=0;
while ($mycnt <= $#csvrow) {
if (0 == $mycnt) {
$myout = $csvrow[$mycnt];
} else {
$myout = $myout . "," . $csvrow[$mycnt];
}
$mycnt = $mycnt + 1;
}
print FILE "$myout\n";
# clear the output
@csvrow = ();
}
print "\n";
}
foreach $starter (@starters) {
print "STARTER: $starter\n";
# if we are going to use the dvm, then we
if ($starter eq "orterun") {
# need to start it
if (-e "dvm_uri") {
system("rm -f dvm_uri");
}
$cmd = "orte-dvm --report-uri dvm_uri 2>&1 &";
if ($myresults) {
print FILE "\n\n$cmd\n";
}
if (!$SHOWME) {
system($cmd);
# wait for the rendezvous file to appear
while (! -e "dvm_uri") {
sleep(1);
}
$havedvm = 1;
}
}
if ($myresults) {
print FILE "$starter $starteroptions[$index]\n\n";
}
my $testnum = 0;
foreach $test (@tests) {
$option = $options[$testnum];
if (-e $test) {
if ($myresults) {
print FILE "#nodes,$test,$option\n";
}
if (!$SHOWME) {
# pre-position the executable
$cmd = $starter . $starteroptions[$index] . " $test 2>&1";
system($cmd);
}
$n = 1;
while ($n <= $num_nodes) {
push @csvrow,$n;
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option $test 2>&1";
print $cmd . "\n";
if (!$SHOWME) {
runcmd();
}
$n = 2 * $n;
}
if (0 != $num_nodes & $n) {
$cmd = "time " . $starter . " " . $starteroptions[$index] . " $option $test 2>&1";
print $cmd . "\n";
if (!$SHOWME) {
runcmd();
}
}
print "\n--------------------------------------------------\n";
} else {
print "Test " . $test . " was not found - test skipped\n";
print "\n--------------------------------------------------\n";
}
$testnum = $testnum + 1;
}
if ($havedvm) {
if (!$SHOWME) {
$cmd = "orterun --hnp file:dvm_uri --terminate";
system($cmd);
}
if (-e "dvm_uri") {
system("rm -f dvm_uri");
}
}
$index = $index + 1;
}
if ($myresults) {
close(FILE);
}