1
1
openmpi/contrib/ompi-time.sh
Ralph Castain a2919174d0 Bring the RML modifications across. This is the first step in a revamp of the ORTE messaging subsystem to support fabric-based communications during launch and wireup phases. When completed, the grpcomm and plm frameworks will each have their own "conduit" for communication - each conduit corresponds to a particular RML messaging transport. This can be the active OOB-based component, or a provider from within the RML/OFI component. Messages sent down the conduit will flow across the associated transport.
Multiple conduits can exist at the same time, and can even point to the same base transport. Each conduit can have its own characteristics (e.g., flow control) based on the info keys provided to the "open_conduit" call. For ease during the transition period, the "legacy" RML interfaces remain as wrappers over the new conduit-based APIs using a default conduit opened during orte_init - this default conduit is tied to the OOB framework so that current behaviors are preserved. Once the transition has been completed, a one-time cleanup will be done to update all RML calls to the new APIs and the "legacy" interfaces will be deleted.

While we are at it: Remove oob/usock component to eliminate the TMPDIR length problem - get all working, including oob_stress
2016-10-11 16:01:02 -07:00

1245 строки
32 KiB
Bash
Исполняемый файл

#!/bin/sh
#
# Copyright (c) 2015 Mellanox Technologies, Inc.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This script is used to measure PMIx performance.
#
# --exec: scenario to run as sync or "test1 test2"
# --mpidir: path to mpi installation (/usr default)
# --parse: path to collected results
# HOWTO:
# 1 .Set test matrix using variables $node_list, ppn_list, test_list
# 2. Allocate nodes:
# $salloc --nodelist=node[1-4]
# or
# $salloc -N4
# 3. Launch script:
# $./opmi-time.sh --exec="test1 test2" --mpidir=<open>
# $./opmi-time.sh --mpidir=<open>
#
# Output location is test name folder
# Output file formats
# ()_base.log
# timestamp (usec) hostnode label
# 1441715028369350 mir14 start
# 1441715030540656 mir14 end
#
# ()_out.log
# timestamp (usec) rank node
# 1441715030460727 0 mir9
# 1441715030460628 1 mir10
#
# ()_result.log
# time rank node
# 2.089 3 mir12
# 2.093 2 mir11
#
# report.log
# nodes ppn mintime maxtime
# 4 1 2.089 2.093
# Settings
###############################################################################
node_list=(2 4)
ppn_list=(1 2)
test_list="test1 test2 test3 test4 test5 test6 test7 test8 test9 test10 test11 test12 test13"
# Declarations
###############################################################################
prefix=pmix
module=${BASH_SOURCE[0]}
# Command line parsing
###############################################################################
opt=""
while [ "$#" -gt 0 ]; do
case "$1" in
--parse=*) parse="${1#*=}"; shift 1;;
--exec=*) exec="${1#*=}"; shift 1;;
--mpidir=*) mpidir="${1#*=}"; shift 1;;
--parse|--exec|--mpidir) echo "$1 requires an argument" >&2; exit 1;;
-*) echo "unknown option: $1" >&2; exit 1;;
*) shift 1;;
esac
done
# The scenario of measurement
if [ -n "$exec" ]; then
test_list="$exec"
fi
# The mpi path
mpidir=${mpidir:=/usr}
# Functions
###############################################################################
# format text
function do_format() {
local is_format=true
if [[ $is_format == true ]] ; then
res=""
for ((i=2; i<=$#; i++)) ; do
case "${!i}" in
"bold" ) res="$res\e[1m" ;;
"underline" ) res="$res\e[4m" ;;
"reverse" ) res="$res\e[7m" ;;
"red" ) res="$res\e[91m" ;;
"green" ) res="$res\e[92m" ;;
"yellow" ) res="$res\e[93m" ;;
esac
done
echo -e "$res$1\e[0m"
else
echo "$1"
fi
}
# print message
function do_msg() {
echo -e "$*" 2>&1 | tee -a $logfile
}
# print error message and exit script
function do_err() {
echo -e $(do_format "$module failed. aborting. $*" "red" "bold") 2>&1 | tee -a $logfile
exit 1
}
# print the seconds and current microseconds.
function do_timestamp() {
do_msg "$(($(date +%s%N)/1000))\t$(hostname -s)" "$1"
}
# swap two files
function do_fswap() {
if (( $# == 2 )); then
mv "$1" /tmp/
mv "$2" "`dirname $1`"
mv "/tmp/`basename $1`" "`dirname $2`"
else
echo "Usage: swap <file1> <file2>"
return 1
fi
}
function do_cmd() {
cmd="$*"
do_msg "Doing:"
do_msg "=================================================="
do_msg "$*"
eval $cmd >> $logfile 2>&1
local status=$?
if test "$status" != "0"; then
echo "$module failed. Log:"
tail -20 $logfile
cat $logfile
exit $status
fi
do_msg "DONE"
do_msg ""
}
function do_export() {
do_msg "Exporting PATHs:"
do_msg "=================================================="
do_msg "$1"
export PATH="$1/bin:${PATH}"
export LD_LIBRARY_PATH="$1/lib:${LD_LIBRARY_PATH}"
export MANPATH="$1/share/man:${MANPATH}"
do_msg "DONE"
do_msg ""
}
function do_nodeinfo() {
do_msg "Node information:"
do_msg "=================================================="
do_msg $(hostname)
do_msg $(cat /etc/issue | grep We)
do_msg $(cat /proc/cpuinfo | grep 'model name' | sort -u | awk '{print $4, $5, $6, $7, $9}')
do_msg $(cat /proc/cpuinfo | grep proce | wc | awk '{print $1}')
do_msg $(uname -a | awk '{print $12}')
do_msg $(cat /proc/meminfo | grep [M,m]em)
do_msg $(uname -a | awk '{print $3}')
do_msg $(ibstat | grep -e "CA type" -e "Firmware version")
do_msg $(ibstatus | grep -e rate -e state | grep -v 'phys state')
do_msg $(ofed_info | head -6 | grep OFED)
do_msg "DONE"
do_msg ""
}
function do_validate() {
command -v mpiexec >/dev/null 2>&1 || { do_err "mpiexec is not found."; }
command -v srun >/dev/null 2>&1 || { do_err "srun is not found."; }
command -v salloc >/dev/null 2>&1 || { do_err "salloc is not found."; }
}
function do_check_pmix() {
eval "srun --mpi=list 2>&1 | grep pmix"
}
function do_checksync_mpisync() {
local status
local tooldir=${tempdir}/mpisync
local verbose=$1
local option=$*
do_msg "Checking synchronization using mpisync:"
if [ ! -e ${tooldir} ]; then
mkdir -p ${tooldir}
cd ${tooldir}
wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/mpigclock.c >> $logfile 2>&1
wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/mpigclock.h >> $logfile 2>&1
wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/hpctimer.c >> $logfile 2>&1
wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/hpctimer.h >> $logfile 2>&1
wget --no-check-certificate https://github.com/open-mpi/ompi/raw/master/ompi/tools/mpisync/sync.c >> $logfile 2>&1
mpicc hpctimer.c mpigclock.c sync.c -o mpisync >> $logfile 2>&1
fi
if [ ! -e "$tooldir" ] || [ ! -f "$tooldir/mpisync" ]; then
do_err "can not find $tooldir/mpisync"
fi
mpiexec -n $(($nodes)) -npernode 1 $mpioptions $tooldir/mpisync -o ${syncfile} ${option} 2>&1
do_msg "Analysing ${syncfile}"
cat ${syncfile} >> $logfile 2>&1
diff=$(grep -v '^#' ${syncfile} | cut -f3 -d' ' | sort -n | awk 'BEGIN {min=1000000; max=0;}; { if($1<min && $1 != "") min = $1; if($1>max && $1 != "") max = $1; } END { printf("%0.06f %0.06f %0.06f", min, max, max-min) }') >> $logfile 2>&1
do_msg "sync drift is equal: $diff"
diff=`echo $diff | cut -f3 -d' '`
status=$(if (( `bc <<< "$diff >= 0.001"` == 1 )); then echo "value $diff >= 0.001"; fi)
if [ -n "$status" ] && [ -n $verbose -a "$verbose" == "on" ]; then
do_err "mpisync reports issue with synchronization as $status"
else
do_msg "Warning: mpiperf reports issue with synchronization as $status"
fi
do_msg "DONE"
do_msg ""
}
function do_checksync_mpiperf() {
local status
local tooldir=${tempdir}/mpiperf-0.0.3
local verbose=$1
do_msg "Checking synchronization using mpiperf:"
if [ ! -f ${tempdir}/mpiperf-0.0.3.tar.gz ]; then
wget http://mpiperf.cpct.sibsutis.ru/uploads/Main/mpiperf-0.0.3.tar.gz >> $logfile 2>&1
tar zxvf mpiperf-0.0.3.tar.gz >> $logfile 2>&1
cd $tooldir
make >> $logfile 2>&1
fi
if [ ! -e "$tooldir" ] || [ ! -f "$tooldir/src/mpiperf" ]; then
do_err "can not find $tooldir/src/mpiperf"
fi
mpiexec -n 1 $mpioptions $tooldir/src/mpiperf -T >> $logfile 2>&1
if [ -z "$(mpiexec -n 1 $mpioptions $tooldir/src/mpiperf -j -t gettimeofday 2>&1 | tee -a $logfile | sed -n '/PASSED/p')" ]; then
do_err "mpiperf does not support gettimeofday"
fi
mpiexec -n $(($nodes)) -npernode 1 $mpioptions $tooldir/src/mpiperf -t gettimeofday WaitPatternNull >> ${syncfile} 2>&1
do_msg "Analysing ${syncfile}"
cat ${syncfile} >> $logfile 2>&1
status=$(grep -v '^#' ${syncfile} | awk -F ' ' '{ print $6 }' | while read i; do if (( `bc <<< "$i >= 1"` == 1 )); then echo "value $i >= 1.00"; break; fi; done)
if [ -n "$status" ] && [ -n $verbose -a "$verbose" == "on" ]; then
do_err "mpiperf reports issue with synchronization as $status"
else
do_msg "Warning: mpiperf reports issue with synchronization as $status"
fi
do_msg "DONE"
do_msg ""
}
# $1 - sync filename
# $2 - verbose mode: on - exit in case synchronization values exceed a treshold and off - silent mode (default: off)
# $3+ - application additional options
function do_checksync() {
if [ -z "$1" ]; then
syncfile=${tempdir}/mpisync.log
else
syncfile=$1
fi
do_checksync_mpisync $2 "-a 0"
# do_checksync_mpisync $2 "-a 1"
# do_checksync_mpiperf
do_msg "syncfile: $syncfile"
}
function do_analysis() {
local testdir=$1
local basefile=$2
local outfile=$3
local outfile1="${3}.1"
local resultfile=${testdir}/${nodes}x${ppn}_result.log
if [ ! -e $tesdir ]; then
do_err "can not find testdir: $testdir"
fi
if [ -z $basefile -o ! -f $basefile ]; then
do_err "can not find basefile: $basefile"
fi
if [ -z $outfile -o ! -f $outfile ]; then
do_err "can not find outfile: $outfile"
fi
if [ "$(cat $outfile | wc -l)" != "$(($nodes * $ppn))" ]; then
do_msg "Warning: number of lines in $outfile ($(cat $outfile | wc -l)) is not equal ($nodes * $ppn)."
fi
start_t=`awk -F $'\t' '{ if (NR == 1) print $1 }' $basefile`
# Add sync value in output file
while read line; do
if [[ ! $line =~ ^[0-9] ]]; then
do_msg "Warning: ignoring line: $line."
continue
fi
local n=$(echo $line | cut -f3 -d' ')
local v1=$(echo $line | cut -f1 -d' ')
local v2=0
if [ ! -z $syncfile -o -f $syncfile ]; then
v2=$(echo "scale=2; ($(grep $n $syncfile | cut -f3 -d' ') * 1000000)" | bc -l)
# Round float value to int
v2=$(echo ${v2%%.*})
v2=${v2:=0}
fi
echo -e "$(($v1 + $v2))\t${v2}\t${line}" >> $outfile1
done < $outfile
# Find maximum and minimum lines
min_line=`sort -n $outfile1 | head -n1`
max_line=`sort -n $outfile1 | tail -n1`
if [ -z "$min_line" -o -z "$max_line" ]; then
do_err "can not find max/min lines in : $outfile1"
fi
min_t=$( echo "$min_line" | cut -f1 -d$'\t')
max_t=$( echo "$max_line" | cut -f1 -d$'\t')
echo -e "`bc -l <<< "scale=3; (($min_t - $start_t) / 1000000)"`\t`echo "$min_line" | cut -f4 -d$'\t'`\t`echo "$min_line" | cut -f5 -d$'\t'`" >> $resultfile 2>&1
echo -e "`bc -l <<< "scale=3; (($max_t - $start_t) / 1000000)"`\t`echo "$max_line" | cut -f4 -d$'\t'`\t`echo "$max_line" | cut -f5 -d$'\t'`" >> $resultfile 2>&1
echo -e "\n# Used synchronization file: $syncfile" >> $outfile1
do_report $testdir $resultfile
}
function do_report() {
local testdir=$1
local resultfile=$2
local reportfile=${testdir}/report.log
if [ -z $resultfile -o ! -f $resultfile ]; then
do_err "can not find resultfile: $resultfile"
fi
min_t=`awk -F $'\t' '{ if (NR == 1) print $1 }' $resultfile`
max_t=`awk -F $'\t' '{ if (NR == 2) print $1 }' $resultfile`
echo -e "${nodes}\t${ppn}\t${min_t}\t${max_t}" >> $reportfile 2>&1
}
function do_postresult() {
cd $tempdir/..
tar -zcvf $PWD/pmix.$$.tar.gz $tempdir > /dev/null 2>&1
}
include_timestamp_func=$(cat <<END_MSG
#include <dlfcn.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdint.h>
#include <sys/stat.h>
#include <time.h>
#include <sys/time.h>
static inline void timestamp(FILE *file)
{
struct timeval tv;
char name[256];
char *host_name = NULL;
char *domain = NULL;
int procid = -1;
char *str = NULL;
gettimeofday(&tv, NULL);
if (gethostname(name, sizeof(name)) != 0)
strcpy(name, "localhost");
host_name = strdup(name);
domain = strchr(host_name, '.');
if (domain)
*domain = '\0';
str = getenv("SLURM_PROCID");
procid = ( str ? strtol(str, NULL, 10) : -1);
fprintf(file, "%lld\t%d\t%s\n", tv.tv_sec * 1000000LL + tv.tv_usec, procid, host_name);
fflush(file);
}
END_MSG
)
function do_exec() {
# The number of nodes (see SLURM_NNODES)
nodes=${SLURM_NNODES}
nodes=${nodes:=2}
# The number of tasks per node (see SLURM_NTASKS_PER_NODE or SLURM_TASKS_PER_NODE)
ppn=${SLURM_NTASKS_PER_NODE}
ppn=${ppn:=1}
mpioptions=' -novm -mca btl_openib_warn_default_gid_prefix 0 -mca mpi_add_procs_cutoff 100000 '
slurmoptions=' OMPI_MCA_btl_openib_warn_default_gid_prefix=0 OMPI_MCA_mpi_add_procs_cutoff=100000 '
if [ -z "$(env | grep SLURM)" ]; then
do_err "Do not see allocated nodes by SLURM. Probably salloc -N option is not set"
fi
#if [ "${SLURM_NPROCS}" != "$(($nodes * $ppn))" ]; then
# do_err "SLURM_NPROCS=${SLURM_NPROCS} is not equal ($nodes * $ppn). Probably salloc -N option is not set"
#fi
do_msg ""
do_msg "Configuration:"
do_msg "=================================================="
do_msg "tempdir: $tempdir"
do_msg "logfile: $logfile"
do_msg "mpi: $mpidir"
do_msg "exec: $exec"
do_msg "nodes: $nodes"
do_msg "ppn: $ppn"
do_msg "mpioptions: $mpioptions"
do_msg "slurmoptions: $slurmoptions"
do_msg "node list: $node_list"
do_msg "ppn list: $ppn_list"
do_msg "test list: $test_list"
do_msg ""
do_export $mpidir
do_nodeinfo
do_validate
if [ -f "${tempdir}/mpisync.log" ]; then
syncfile=${tempdir}/mpisync.log
do_msg "found sync data at ${syncfile}"
elif [ -f "${tempdir}/mpiperf.log" ]; then
syncfile=${tempdir}/mpiperf.log
do_msg "found sync data at ${syncfile}"
else
do_msg "sync data is not found"
fi
# Launch scenario
node_list_len=${#node_list[*]}
ppn_list_len=${#ppn_list[*]}
for ((i=0; $i < $node_list_len; i=$((i=$i+1)))); do
for ((j=0; $j < $ppn_list_len; j=$((j=$j+1)))); do
for test in $test_list; do
nodes=${node_list[$i]}
ppn=${ppn_list[$j]}
if [ "$test" = "sync" ]; then
do_checksync "${tempdir}/${nodes}x${ppn}_mpisync00.log" "off"
else
do_checksync "${tempdir}/${nodes}x${ppn}_mpisync_before.log" "off"
eval "do_${test}"
do_checksync "${tempdir}/${nodes}x${ppn}_mpisyn_after.log" "off"
fi
done
done
done
do_postresult
}
# $1 - result location
function do_parse() {
local parsedir=$1
local result_list
local test_list
local parsefile
for result in `ls -1 $workdir`; do
if [ ! -d "${parsedir}/${result}" ]; then
continue
fi
for test in `ls -1 "${parsedir}/${result}" | grep -v mpisync`; do
if [ ! -d "${parsedir}/${result}/${test}" ]; then
continue
fi
result_list="${result_list} ${result}"
test_list="${test_list} ${test}"
done
done
result_list=`echo $result_list | tr " " "\n" | sort | uniq | tr "\n" " "`
test_list=`echo $test_list | tr " " "\n" | sort | uniq | tr "\n" " "`
do_msg "results: $result_list"
do_msg "tests: $test_list"
for test in $test_list; do
parsefile="${parsedir}/parse_${test}.log"
for result in $result_list; do
echo -e "\n${result}:" >> $parsefile 2>&1
echo -e "nodes\tppn\tmin\tmax" >> $parsefile 2>&1
cat "${parsedir}/${result}/${test}/report.log" >> $parsefile 2>&1
done
done
}
# Pure application srun launch
#####################################################
function do_test1
{
local status
local scenario=test1
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
int main()
{
timestamp(stdout);
return 0;
}
END_MSG
gcc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "srun pure overhead" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# Pure application mpiexec launch
#####################################################
function do_test2
{
local status
local scenario=test2
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
int main()
{
timestamp(stdout);
return 0;
}
END_MSG
gcc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "mpiexec pure overhead" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# Pure application oshrun launch
#####################################################
function do_test3
{
local status
local scenario=test3
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
int main()
{
timestamp(stdout);
return 0;
}
END_MSG
gcc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "osrun pure overhead" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# MPI_init application srun/pmi2 launch
#####################################################
function do_test4
{
local status
local scenario=test4
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
#include "mpi.h"
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
timestamp(stdout);
MPI_Finalize();
return 0;
}
END_MSG
mpicc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmi2 ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "srun --mpi=pmi2:MPI_Init" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# start_pes application srun/pmi2 launch
#####################################################
function do_test5
{
local status
local scenario=test5
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
#include "shmem.h"
int main(int argc, char* argv[])
{
start_pes(0);
timestamp(stdout);
return 0;
}
END_MSG
oshcc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmi2 ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "srun --mpi=pmi2:start_pes" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# MPI_Init application mpiexec launch
#####################################################
function do_test6
{
local status
local scenario=test6
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
#include "mpi.h"
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
timestamp(stdout);
MPI_Finalize();
return 0;
}
END_MSG
mpicc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "mpiexec:MPI_Init" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# start_pes application oshrun launch
#####################################################
function do_test7
{
local status
local scenario=test7
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
#include "shmem.h"
int main(int argc, char* argv[])
{
start_pes(0);
timestamp(stdout);
return 0;
}
END_MSG
oshcc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "osrun:start_pes" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# pure application mpiexec:orte_daemon
#####################################################
function do_test8
{
local status
local scenario=test8
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
#include "mpi.h"
int main(int argc, char* argv[])
{
return 0;
}
END_MSG
cat > lib_$scenario.c <<END_MSG
#define _GNU_SOURCE
$include_timestamp_func
int orte_daemon(int argc, char *argv[])
{
static int (*_orte_daemon)(int argc, char *argv[]) = NULL;
if (!_orte_daemon) {
_orte_daemon=dlsym(RTLD_NEXT,"orte_daemon");
if (!_orte_daemon) {
fprintf(stderr, "Error in 'dlsym': %s\n", dlerror());
exit(1);
} else {
FILE *fd = NULL;
char filename[1024];
char *str = getenv("SLURM_PROCID");
if (str) {
sprintf(filename, "%s.%s", "$outfile", str);
fd = fopen(filename, "a");
if (fd) {
timestamp(fd);
fclose(fd);
}
}
}
}
return _orte_daemon(argc, argv);
}
END_MSG
mpicc $scenario.c -o $scenario.out >> $logfile 2>&1
gcc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
LD_PRELOAD=$PWD/$scenario.so mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
eval cat $outfile.* >> $outfile
rm $outfile.*
echo -e "mpiexec:orte_daemon" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# pure application oshrun:orte_daemon
#####################################################
function do_test9
{
local status
local scenario=test9
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
#include "mpi.h"
int main(int argc, char* argv[])
{
return 0;
}
END_MSG
cat > lib_$scenario.c <<END_MSG
#define _GNU_SOURCE
$include_timestamp_func
int orte_daemon(int argc, char *argv[])
{
static int (*_orte_daemon)(int argc, char *argv[]) = NULL;
if (!_orte_daemon) {
_orte_daemon=dlsym(RTLD_NEXT,"orte_daemon");
if (!_orte_daemon) {
fprintf(stderr, "Error in 'dlsym': %s\n", dlerror());
exit(1);
} else {
FILE *fd = NULL;
char filename[1024];
char *str = getenv("SLURM_PROCID");
if (str) {
sprintf(filename, "%s.%s", "$outfile", str);
fd = fopen(filename, "a");
if (fd) {
timestamp(fd);
fclose(fd);
}
}
}
}
return _orte_daemon(argc, argv);
}
END_MSG
mpicc $scenario.c -o $scenario.out >> $logfile 2>&1
gcc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
LD_PRELOAD=$PWD/$scenario.so oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
eval cat $outfile.* >> $outfile
rm $outfile.*
echo -e "oshrun:orte_daemon" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# pure application mpiexec:orte_rml_base_update_contact_info
#####################################################
function do_test10
{
local status
local scenario=test10
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
#include "mpi.h"
int main(int argc, char* argv[])
{
return 0;
}
END_MSG
cat > lib_$scenario.c <<END_MSG
#define _GNU_SOURCE
$include_timestamp_func
int orte_rml_base_update_contact_info(void * data)
{
static int (*_real_func)(void* data) = NULL;
if (!_real_func) {
_real_func=dlsym(RTLD_NEXT,"orte_rml_base_update_contact_info");
if (!_real_func) {
fprintf(stderr, "Error in 'dlsym': %s\n", dlerror());
exit(1);
} else {
FILE *fd = NULL;
char filename[1024];
char *str = getenv("SLURM_PROCID");
if (str) {
sprintf(filename, "%s.%s", "$outfile", str);
fd = fopen(filename, "a");
if (fd) {
timestamp(fd);
fclose(fd);
}
}
}
}
return _real_func(data);
}
END_MSG
mpicc $scenario.c -o $scenario.out >> $logfile 2>&1
mpicc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
LD_PRELOAD=$PWD/$scenario.so mpiexec -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
eval "cat $outfile.* >> $outfile" >> $logfile 2>&1
rm $outfile.* >> $logfile 2>&1
echo -e "mpiexec:orte_rml_base_update_contact_info" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# pure application oshrun:orte_rml_base_update_contact_info
#####################################################
function do_test11
{
local status
local scenario=test11
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
#include "shmem.h"
int main(int argc, char* argv[])
{
return 0;
}
END_MSG
cat > lib_$scenario.c <<END_MSG
#define _GNU_SOURCE
$include_timestamp_func
int orte_rml_base_update_contact_info(void * data)
{
static int (*_real_func)(void* data) = NULL;
if (!_real_func) {
_real_func=dlsym(RTLD_NEXT,"orte_rml_base_update_contact_info");
if (!_real_func) {
fprintf(stderr, "Error in 'dlsym': %s\n", dlerror());
exit(1);
} else {
FILE *fd = NULL;
char filename[1024];
char *str = getenv("SLURM_PROCID");
if (str) {
sprintf(filename, "%s.%s", "$outfile", str);
fd = fopen(filename, "a");
if (fd) {
timestamp(fd);
fclose(fd);
}
}
}
}
return _real_func(data);
}
END_MSG
oshcc $scenario.c -o $scenario.out >> $logfile 2>&1
oshcc lib_$scenario.c -o $scenario.so -shared -fPIC -ldl >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
LD_PRELOAD=$PWD/$scenario.so oshrun -n $(($nodes * $ppn)) -npernode $ppn $mpioptions ./$scenario.out
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
eval "cat $outfile.* >> $outfile" >> $logfile 2>&1
rm $outfile.* >> $logfile 2>&1
echo -e "oshrun:orte_rml_base_update_contact_info" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# MPI_Init application mpiexec:srun/pmix
#####################################################
function do_test12
{
local status
local scenario=test12
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
do_check_pmix
if [ $? -eq 0 ]; then
do_msg "slurm has pmix plugin"
else
do_msg "skipping this test : slurm does not have pmix plugin"
return 1
fi
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
#include "mpi.h"
int main(int argc, char* argv[])
{
MPI_Init(&argc, &argv);
timestamp(stdout);
MPI_Finalize();
return 0;
}
END_MSG
mpicc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmix ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "srun --mpi=pmix:MPI_Init" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# start_pes application oshrun:srun/pmix
#####################################################
function do_test13
{
local status
local scenario=test13
local testdir=${tempdir}/$scenario
local outfile=${testdir}/${nodes}x${ppn}_out.log
local basefile=${testdir}/${nodes}x${ppn}_base.log
do_msg "Running $scenario ${nodes}x${ppn} :"
do_check_pmix
if [ $? -eq 0 ]; then
do_msg "slurm has pmix plugin"
else
do_msg "skipping this test : slurm does not have pmix plugin"
return 1
fi
mkdir -p $testdir
cd $testdir
cat > $scenario.c <<END_MSG
$include_timestamp_func
#include "shmem.h"
int main(int argc, char* argv[])
{
start_pes(0);
timestamp(stdout);
return 0;
}
END_MSG
oshcc $scenario.c -o $scenario.out >> $logfile 2>&1
# Do test
do_timestamp "start" 2>&1 | tee -a $basefile
env $slurmoptions srun -n$(($nodes * $ppn)) -N$nodes --ntasks-per-node=$ppn --mpi=pmix ./$scenario.out >> $outfile 2>&1
test $? -eq 0 && status=OK || status=FAIL
do_timestamp "end" 2>&1 | tee -a $basefile
if [ "$status" == "FAIL" ]; then
do_err "can not launch a test"
fi
echo -e "srun --mpi=pmix:start_pes" > ${testdir}/info.log 2>&1
do_analysis $testdir $basefile $outfile
do_msg "DONE"
}
# Main
###############################################################################
# Check if --exec option is passed ($exec is defined)
if test ${exec+defined}; then
tempdir=$PWD/tmp/${prefix}.$$
logfile=${tempdir}/${prefix}-time.log
mkdir -p $tempdir
rm -f $logfile
cd $tempdir
do_exec
fi
# Check if --parse option is passed ($parse is defined)
if test ${parse+defined}; then
if [ -z "$parse" ]; then
tempdir=$PWD/tmp
else
tempdir=$parse
fi
logfile=${tempdir}/${prefix}-parse.log
mkdir -p $tempdir
rm -f $logfile
cd $tempdir
do_parse "$tempdir"
fi
exit 0