Fix timing framework:
1. Fixes according to (http://www.open-mpi.org/community/lists/devel/2014/09/15869.php) 2. Force mpisync:rank0 to gather results. Now sync info is written by rank0 to the output file. 3. Improve mpirun_prof: 1) adopt to the environment (SLURM/TORQUE); 2) recognize some noteset-related mpirun options. This commit was SVN r32772.
Этот коммит содержится в:
родитель
9c20940190
Коммит
f2e586980b
@ -185,6 +185,7 @@ fi
|
||||
AC_DEFINE_UNQUOTED(OPAL_ENABLE_TIMING, $WANT_TIMING,
|
||||
[Whether we want developer-level timing framework or not])
|
||||
|
||||
AM_CONDITIONAL([OPAL_COMPILE_TIMING], [test "$WANT_TIMING" = "1"])
|
||||
AM_CONDITIONAL([OPAL_INSTALL_TIMING_BINARIES], [test "$WANT_TIMING" = "1" -a "$enable_binaries" != "no"])
|
||||
|
||||
|
||||
|
@ -21,10 +21,16 @@
|
||||
|
||||
SUBDIRS += \
|
||||
tools/ompi_info \
|
||||
tools/wrappers \
|
||||
tools/mpisync
|
||||
tools/wrappers
|
||||
|
||||
if OPAL_COMPILE_TIMING
|
||||
SUBDIRS += tools/mpisync
|
||||
endif
|
||||
|
||||
DIST_SUBDIRS += \
|
||||
tools/ompi_info \
|
||||
tools/wrappers \
|
||||
tools/mpisync
|
||||
tools/wrappers
|
||||
|
||||
if OPAL_COMPILE_TIMING
|
||||
DIST_SUBDIRS += tools/mpisync
|
||||
endif
|
||||
|
@ -42,13 +42,24 @@ AM_CFLAGS = \
|
||||
-DOPAL_CC_ABSOLUTE="\"@OPAL_CC_ABSOLUTE@\"" \
|
||||
-DOMPI_CXX_ABSOLUTE="\"@OMPI_CXX_ABSOLUTE@\""
|
||||
|
||||
#if (OPAL_INSTALL_BINARIES && OPAL_ENABLE_TIMING)
|
||||
include $(top_srcdir)/Makefile.ompi-rules
|
||||
|
||||
if OPAL_INSTALL_TIMING_BINARIES
|
||||
|
||||
bin_PROGRAMS = mpisync
|
||||
bin_SCRIPTS = mpirun_prof ompi_timing_post
|
||||
|
||||
man_pages = mpisync.1
|
||||
EXTRA_DIST = $(man_pages:.1=.1in)
|
||||
|
||||
nodist_man_MANS = $(man_pages)
|
||||
|
||||
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
|
||||
|
||||
install-data-hook:
|
||||
(cd $(DESTDIR)$(mandir)/man1; rm -f mpirun_prof.1; $(LN_S) mpisync.1 mpirun_prof.1)
|
||||
(cd $(DESTDIR)$(mandir)/man1; rm -f ompi_timing_post.1; $(LN_S) mpisync.1 ompi_timing_post.1)
|
||||
|
||||
endif
|
||||
|
||||
mpisync_SOURCES = \
|
||||
|
@ -1,23 +1,78 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
|
||||
function get_jobid()
|
||||
{
|
||||
if [ -n "$SLURM_JOBID" ]; then
|
||||
echo "$SLURM_JOBID"
|
||||
return
|
||||
elif [ -n "$PBS_JOBID" ]; then
|
||||
echo "$PBS_JOBID"
|
||||
return
|
||||
else
|
||||
echo "$$"
|
||||
return
|
||||
fi
|
||||
}
|
||||
|
||||
function extract_nodeset_opts()
|
||||
{
|
||||
short_opts="H:h:"
|
||||
long_opts="host:"
|
||||
opts=""
|
||||
|
||||
while [ -n "$1" ]; do
|
||||
case "$1" in
|
||||
-H | -host | --host )
|
||||
opts=$opts" $1 $2";
|
||||
shift 2;;
|
||||
-hostfile | --hostfile )
|
||||
opts=$opts" $1 $2";
|
||||
shift 2;;
|
||||
-machinefile | --machinefile )
|
||||
opts=$opts" $1 $2";
|
||||
shift 2;;
|
||||
*)
|
||||
shift 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "$opts"
|
||||
}
|
||||
|
||||
|
||||
ompi_instdir=`dirname $0`
|
||||
syncfile="ompi_clock_sync_data.$$"
|
||||
tmp_timings=mpirun_prof_timings.out
|
||||
tmp_out=mpirun_prof.out
|
||||
jobid=`get_jobid`
|
||||
syncfile=`pwd`"/ompi_clock_sync_data.$jobid"
|
||||
tmp_timings=`pwd`"/ompi_timing_temp_file.$jobid"
|
||||
tmp_out=`pwd`"/ompi_mpirun_prof.$jobid"
|
||||
|
||||
timing_bkp=$OMPI_MCA_opal_timing_file
|
||||
export OMPI_MCA_opal_timing_file=$tmp_timings
|
||||
${ompi_instdir}/mpirun --npernode 1 ${ompi_instdir}/mpisync -o $syncfile >$tmp_out 2>&1
|
||||
|
||||
export OMPI_MCA_opal_timing_file=$timing_bkp
|
||||
export OMPI_MCA_opal_clksync_file=$syncfile
|
||||
opts=`extract_nodeset_opts $@`
|
||||
${ompi_instdir}/mpirun --npernode 1 $opts ${ompi_instdir}/mpisync -o $syncfile >$tmp_out 2>&1
|
||||
|
||||
export OMPI_MCA_opal_timing_output=$timing_bkp
|
||||
export OMPI_MCA_opal_timing_sync_file=$syncfile
|
||||
|
||||
# Remove old output
|
||||
rm -f $OMPI_MCA_opal_timing_file
|
||||
rm -f $OMPI_MCA_opal_timing_output
|
||||
|
||||
# Run a program of interest
|
||||
${ompi_instdir}/mpirun $@
|
||||
|
||||
if [ -n "$timing_bkp" ]; then
|
||||
${ompi_instdir}/ompi_timing_post $timing_bkp $timing_bkp.post
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
rm -f $syncfile
|
||||
rm -f $tmp_timings
|
||||
|
48
ompi/tools/mpisync/mpisync.1in
Обычный файл
48
ompi/tools/mpisync/mpisync.1in
Обычный файл
@ -0,0 +1,48 @@
|
||||
.\" Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>. All rights reserved.
|
||||
.TH MPISYNC 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
|
||||
.SH NAME
|
||||
Open MPI timing tools
|
||||
.
|
||||
.SH SYNTAX
|
||||
.B mpisync
|
||||
[\fIoptions\fR]
|
||||
.br
|
||||
.B mpirun_prof
|
||||
[\fIoptions\fR]
|
||||
.br
|
||||
.B ompi_timing_post
|
||||
[\fI<timing-file>\fR] [\fI<processed-file>\fR]
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
.BR mpisync
|
||||
determines clock offsets relative to Head Node Process (HNP). It accepts the following options:
|
||||
.TP
|
||||
\fB\-o\fR, \fB\-\-output\fR
|
||||
The name of output file where offsets related to HNP will be written
|
||||
.TP
|
||||
\fB\-h\fR, \fB\-\-help\fR
|
||||
Print help information
|
||||
.PP
|
||||
.BR ompi_timing_post
|
||||
takes the timing output file as input parameter. The events are sorted by the timestamps. Next, the timestamps are replaced with time offsets relative to the
|
||||
.BR first
|
||||
:
|
||||
.BR previous
|
||||
event.
|
||||
.PP
|
||||
.BR mpirun_prof
|
||||
is a wrapper around
|
||||
.BR mpirun
|
||||
that performs clock synchronisation and post-processing of the timing output file.
|
||||
|
||||
.SH NOTES
|
||||
.PP
|
||||
The mpisync code was derived from MPIPerf project:
|
||||
http://mpiperf.cpct.sibsutis.ru/index.php/Main/Documentation
|
||||
.
|
||||
.SH FILES
|
||||
.PP
|
||||
The output file has following format:
|
||||
.PP
|
||||
<hostname> <round-trip-time> <offset-from-hnp>
|
@ -1,4 +1,11 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
|
||||
#####################################################################
|
||||
# Evaluate a floating point number expression.
|
||||
@ -18,16 +25,17 @@ function float_eval(){
|
||||
return $stat
|
||||
}
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Need the name of a timing file"
|
||||
if [ -z "$1" ] || [ -z "$2" ]; then
|
||||
echo "Need the name of a timing file and the output file"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
thefile=$1
|
||||
postfile=$2
|
||||
sed '/^$/d' $thefile > ${thefile}_tmp
|
||||
sort ${thefile}_tmp > ${thefile}
|
||||
sort ${thefile}_tmp > ${postfile}
|
||||
|
||||
read line < ${thefile}
|
||||
read line < ${postfile}
|
||||
first_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'`
|
||||
prev_ts=$first_ts
|
||||
echo $first_ts
|
||||
@ -39,8 +47,8 @@ while read line ; do
|
||||
newline=`echo $line | sed -e "s/$cur_ts/$dif1:$dif2/"`
|
||||
prev_ts=$cur_ts
|
||||
echo $newline
|
||||
done < ${thefile} > ${thefile}_tmp
|
||||
done < ${postfile} > ${thefile}_tmp
|
||||
|
||||
cat ${thefile}_tmp > ${thefile}
|
||||
cat ${thefile}_tmp > ${postfile}
|
||||
|
||||
rm -f ${thefile}_tmp
|
@ -23,8 +23,6 @@ typedef enum { Gen, Chk } prog_mode_t;
|
||||
|
||||
char *filename = NULL;
|
||||
prog_mode_t mode = Gen;
|
||||
double orig_rtt = 0.0, orig_offs = 0.0;
|
||||
|
||||
void print_help(char *progname);
|
||||
int parse_opts(int rank, int argc, char **argv);
|
||||
|
||||
@ -70,14 +68,13 @@ int main(int argc, char **argv)
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm comm = MPI_COMM_WORLD;
|
||||
int rank, commsize;
|
||||
double offs, rtt;
|
||||
double offs = 0, rtt = 0;
|
||||
char hname[1024];
|
||||
|
||||
MPI_Comm_rank(comm, &rank);
|
||||
MPI_Comm_size(comm, &commsize);
|
||||
|
||||
int ret = parse_opts(rank, argc, argv);
|
||||
|
||||
if( ret < 0 ){
|
||||
// Error exit
|
||||
MPI_Finalize();
|
||||
@ -90,49 +87,58 @@ int main(int argc, char **argv)
|
||||
|
||||
if( filename == NULL ){
|
||||
if( rank == 0 ){
|
||||
fprintf(stderr, "The name of output file wasn't specified. Abort\n");
|
||||
print_help(argv[0]);
|
||||
}
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
if( gethostname(hname, 1024) ){
|
||||
perror("Cannot get hostname");
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Clear output file if it exists
|
||||
if( rank == 0 ){
|
||||
FILE *fp = fopen(filename, "w");
|
||||
if( fp == NULL ){
|
||||
fprintf(stderr,"Cannot open output file %s for writing. Abort: %s\n",
|
||||
filename, strerror(errno));
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
fclose(fp);
|
||||
perror("Cannot get hostname. Abort");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
int rc = hpctimer_initialize("gettimeofday");
|
||||
|
||||
if( rc == HPCTIMER_FAILURE ){
|
||||
fprintf(stderr, "Fail to initialize hpc timer. Abort\n");
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
offs = mpigclock_sync_linear(comm, 0, &rtt);
|
||||
|
||||
FILE *fp = fopen(filename,"a");
|
||||
if( fp == NULL ){
|
||||
fprintf(stderr, "Cannot open %s for appending. Abort\n", filename);
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
double send[2] = { rtt, offs };
|
||||
if( rank == 0 ){
|
||||
double *measure = malloc(commsize*2*sizeof(double));
|
||||
if( measure == NULL ){
|
||||
fprintf(stderr, "Fail to allocate memory. Abort\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
char *hnames = malloc(1024*commsize);
|
||||
if( hnames == NULL ){
|
||||
fprintf(stderr, "Fail to allocate memory. Abort\n");
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
|
||||
MPI_Gather(hname,1024,MPI_CHAR,hnames,1024,MPI_CHAR, 0, MPI_COMM_WORLD);
|
||||
MPI_Gather(send,2,MPI_DOUBLE,measure,2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
||||
char tmpname[128];
|
||||
FILE *fp = fopen(filename,"w");
|
||||
if( fp == NULL ){
|
||||
fprintf(stderr, "Fail to open the file %s. Abort\n", filename);
|
||||
MPI_Abort(MPI_COMM_WORLD, 1);
|
||||
}
|
||||
double (*m)[2] = (void*)measure;
|
||||
char (*h)[1024] = (void*)hnames;
|
||||
int i;
|
||||
for(i=0; i<commsize;i++){
|
||||
fprintf(fp, "%s %lf %lf\n", h[i], m[i][0], m[i][1]);
|
||||
}
|
||||
fclose(fp);
|
||||
} else {
|
||||
MPI_Gather(hname,1024, MPI_CHAR, NULL, 1024, MPI_CHAR, 0, MPI_COMM_WORLD);
|
||||
MPI_Gather(send,2, MPI_DOUBLE, NULL, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
|
||||
}
|
||||
fprintf(fp, "%s %lf %lf\n", hname, rtt, offs);
|
||||
fclose(fp);
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
|
@ -49,9 +49,9 @@ char *opal_net_private_ipv4 = NULL;
|
||||
char *opal_set_max_sys_limits = NULL;
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
char *opal_clksync_file = NULL;
|
||||
char *opal_timing_file = NULL;
|
||||
bool opal_timing_account_overhead = true;
|
||||
char *opal_timing_sync_file = NULL;
|
||||
char *opal_timing_output = NULL;
|
||||
bool opal_timing_overhead = true;
|
||||
#endif
|
||||
|
||||
bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
|
||||
@ -231,26 +231,28 @@ int opal_register_params(void)
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "clksync_file",
|
||||
"Mapping of clock offsets from HNP node",
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "timing_sync_file",
|
||||
"Clock synchronisation information generated by mpisync tool. You don't need to touch this if you use mpirun_prof tool.",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&opal_clksync_file);
|
||||
if( opal_timing_clksync_read(opal_clksync_file) ){
|
||||
opal_output(0, "Cannot read file %s containing clock synchronisation information\n", opal_clksync_file);
|
||||
&opal_timing_sync_file);
|
||||
if( opal_timing_clocksync_read(opal_timing_sync_file) ){
|
||||
opal_output(0, "Cannot read file %s containing clock synchronisation information\n", opal_timing_sync_file);
|
||||
}
|
||||
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "timing_file",
|
||||
"OPAL Timing framework output file",
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "timing_output",
|
||||
"The name of output file for timing information. If this parameter is not set then output will be directed into OPAL debug channel.",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&opal_timing_file);
|
||||
&opal_timing_output);
|
||||
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "timing_overhead",
|
||||
"Whether account measured timing overhead or not (default: true)",
|
||||
"Timing framework introduce additional overhead (malloc's mostly)."
|
||||
" The time spend in such costly routines is measured and may be accounted"
|
||||
" (subtracted from timestamps). 'true' means consider overhead, 'false' - ignore (default: true).",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&opal_timing_account_overhead);
|
||||
&opal_timing_overhead);
|
||||
#endif
|
||||
|
||||
opal_warn_on_fork = true;
|
||||
|
@ -31,9 +31,9 @@ extern char *opal_net_private_ipv4;
|
||||
extern char *opal_set_max_sys_limits;
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
extern char *opal_clksync_file;
|
||||
extern char *opal_timing_file;
|
||||
extern bool opal_timing_account_overhead;
|
||||
extern char *opal_timing_sync_file;
|
||||
extern char *opal_timing_output;
|
||||
extern bool opal_timing_overhead;
|
||||
#endif
|
||||
|
||||
OPAL_DECLSPEC extern int opal_initialized;
|
||||
|
@ -103,9 +103,12 @@ libopalutil_la_SOURCES = \
|
||||
stacktrace.c \
|
||||
strncpy.c \
|
||||
sys_limits.c \
|
||||
timings.c \
|
||||
uri.c
|
||||
|
||||
if OPAL_COMPILE_TIMING
|
||||
libopalutil_la_SOURCES += timings.c
|
||||
endif
|
||||
|
||||
libopalutil_la_LIBADD = \
|
||||
keyval/libopalutilkeyval.la
|
||||
libopalutil_la_DEPENDENCIES = \
|
||||
|
@ -8,18 +8,16 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include "opal_config.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
// TODO : restore ifdefs
|
||||
//#ifdef HAVE_STRING_H
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
//#endif
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
@ -41,9 +39,6 @@
|
||||
#include "opal/util/timings.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
|
||||
|
||||
static void debug_hang(int i)
|
||||
{
|
||||
while( i ){
|
||||
@ -65,10 +60,9 @@ opal_mutex_t tm_lock;
|
||||
static char *nodename = NULL;
|
||||
static char *jobid = "";
|
||||
static double hnp_offs = 0;
|
||||
// TODO use RTT to estimate precise of measurement
|
||||
static double hnp_rtt = 0;
|
||||
|
||||
int opal_timing_clksync_read(char *fname)
|
||||
int opal_timing_clocksync_read(char *fname)
|
||||
{
|
||||
int rc = 0;
|
||||
FILE *fp = NULL;
|
||||
@ -79,9 +73,8 @@ int opal_timing_clksync_read(char *fname)
|
||||
|
||||
char hname[1024];
|
||||
if( gethostname(hname, 1024) ){
|
||||
rc = -1;
|
||||
opal_output(0, "opal_timing_clksync_read(%s): Cannot gethostname\n",fname);
|
||||
return -1;
|
||||
opal_output(0, "opal_timing_clocksync_read(%s): Cannot gethostname\n",fname);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
nodename = strdup(hname);
|
||||
ptr = strchr(nodename,'.');
|
||||
@ -95,8 +88,8 @@ int opal_timing_clksync_read(char *fname)
|
||||
|
||||
fp = fopen(fname,"r");
|
||||
if( fp == NULL ){
|
||||
opal_output(0, "opal_timing_clksync_read(%s): Cannot open the file\n",fname);
|
||||
return -1;
|
||||
opal_output(0, "opal_timing_clocksync_read(%s): Cannot open the file\n",fname);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
while( getline(&line,&n,fp) > 0 ){
|
||||
@ -118,8 +111,8 @@ int opal_timing_clksync_read(char *fname)
|
||||
}
|
||||
|
||||
if( !found ){
|
||||
opal_output(0,"opal_timing_clksync_read: Can't find my host %s in %s\n", hname, fname);
|
||||
rc = -1;
|
||||
opal_output(0,"opal_timing_clocksync_read: Can't find my host %s in %s\n", hname, fname);
|
||||
rc = OPAL_ERROR;
|
||||
}
|
||||
|
||||
err_exit:
|
||||
@ -138,7 +131,7 @@ int opal_timing_set_jobid(char *jid)
|
||||
{
|
||||
jobid = strdup(jid);
|
||||
if( jobid == NULL ){
|
||||
return -1;
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -159,7 +152,8 @@ opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t)
|
||||
|
||||
t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size);
|
||||
if( t->buffer == NULL ){
|
||||
// TODO: out of memory error process
|
||||
opal_output(0, "opal_timing_event_alloc: Out of memory!\n");
|
||||
return NULL;
|
||||
}
|
||||
memset(t->buffer, 0, sizeof(opal_timing_event_t)*t->buffer_size);
|
||||
|
||||
@ -193,6 +187,10 @@ void opal_timing_init(opal_timing_t *t)
|
||||
opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...)
|
||||
{
|
||||
opal_timing_event_t *ev = opal_timing_event_alloc(t);
|
||||
if( ev == NULL ){
|
||||
opal_timing_prep_t p = { t, NULL, OPAL_ERR_OUT_OF_RESOURCE };
|
||||
return p;
|
||||
}
|
||||
OBJ_CONSTRUCT(ev, opal_timing_event_t);
|
||||
ev->ts = opal_timing_get_ts();
|
||||
va_list args;
|
||||
@ -200,18 +198,20 @@ opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...)
|
||||
vsnprintf(ev->descr, OPAL_TIMING_DESCR_MAX - 1, fmt, args);
|
||||
ev->descr[OPAL_TIMING_DESCR_MAX-1] = '\0';
|
||||
va_end( args );
|
||||
opal_timing_prep_t p = { t, ev };
|
||||
opal_timing_prep_t p = { t, ev, 0 };
|
||||
return p;
|
||||
}
|
||||
|
||||
void opal_timing_add_step(opal_timing_prep_t p,
|
||||
const char *func, const char *file, int line)
|
||||
{
|
||||
p.ev->func = func;
|
||||
p.ev->file = file;
|
||||
p.ev->line = line;
|
||||
p.ev->type = TEVENT;
|
||||
opal_list_append(p.t->events, (opal_list_item_t*)p.ev);
|
||||
if( !p.errcode ) {
|
||||
p.ev->func = func;
|
||||
p.ev->file = file;
|
||||
p.ev->line = line;
|
||||
p.ev->type = TEVENT;
|
||||
opal_list_append(p.t->events, (opal_list_item_t*)p.ev);
|
||||
}
|
||||
}
|
||||
|
||||
int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname)
|
||||
@ -221,14 +221,14 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref
|
||||
FILE *fp = NULL;
|
||||
char *buf = NULL;
|
||||
int buf_size = 0;
|
||||
int rc = 0;
|
||||
int rc = OPAL_SUCCESS;
|
||||
|
||||
debug_hang(0);
|
||||
|
||||
if( fname != NULL ){
|
||||
fp = fopen(fname,"a");
|
||||
if( fp == NULL ){
|
||||
// TODO: log error
|
||||
opal_output(0, "opal_timing_report: Cannot open %s file for writing timing information!\n",fname);
|
||||
rc = OPAL_ERROR;
|
||||
goto err_exit;
|
||||
}
|
||||
@ -237,8 +237,8 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref
|
||||
|
||||
buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1);
|
||||
if( buf == NULL ){
|
||||
// TODO: log error
|
||||
rc = OPAL_ERROR;
|
||||
opal_output(0, "opal_timing_report: Out of memory!\n");
|
||||
rc = OPAL_ERR_OUT_OF_RESOURCE;
|
||||
goto err_exit;
|
||||
}
|
||||
buf[0] = '\0';
|
||||
@ -269,14 +269,16 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref
|
||||
ev->descr, nodename, jobid, ev->func, file_name, ev->line);
|
||||
}
|
||||
if( rc < 0 ){
|
||||
// TODO: log mem allocation problems
|
||||
opal_output(0, "opal_timing_report: Cannot asprintf!\n");
|
||||
rc = OPAL_ERR_OUT_OF_RESOURCE;
|
||||
goto err_exit;
|
||||
}
|
||||
rc = 0;
|
||||
|
||||
if( strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){
|
||||
// TODO: log buffer overflow
|
||||
opal_output(0, "opal_timing_report: timing output buffer overflow!\n");
|
||||
free(line);
|
||||
rc = OPAL_ERR_OUT_OF_RESOURCE;
|
||||
goto err_exit;
|
||||
}
|
||||
if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){
|
||||
@ -313,6 +315,7 @@ err_exit:
|
||||
free(buf);
|
||||
}
|
||||
if( fp != NULL ){
|
||||
fflush(fp);
|
||||
fclose(fp);
|
||||
}
|
||||
return rc;
|
||||
@ -346,5 +349,3 @@ void opal_timing_release(opal_timing_t *t)
|
||||
OBJ_RELEASE(t->events);
|
||||
t->events = NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -8,9 +8,8 @@
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#ifndef OPAL_SYS_TIMING_H
|
||||
#define OPAL_SYS_TIMING_H
|
||||
#ifndef OPAL_UTIL_TIMING_H
|
||||
#define OPAL_UTIL_TIMING_H
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/runtime/opal_params.h"
|
||||
@ -47,40 +46,149 @@ typedef struct opal_timing_t
|
||||
typedef struct {
|
||||
opal_timing_t *t;
|
||||
opal_timing_event_t *ev;
|
||||
int errcode;
|
||||
} opal_timing_prep_t;
|
||||
|
||||
int opal_timing_clksync_read(char *opal_clksync_file);
|
||||
/**
|
||||
* Read synchronisation information from the file
|
||||
* provided through the MCA parameter.
|
||||
* Should not be directly used, for service purposes.
|
||||
*
|
||||
* @param sync_file Name of the file to read
|
||||
*
|
||||
* @retval OPAL_SUCCESS On success
|
||||
* @retval OPAL_ERROR On failure
|
||||
*/
|
||||
int opal_timing_clocksync_read(char *sync_file);
|
||||
|
||||
/**
|
||||
* Pass string representation of ORTE job ID down to the OPAL.
|
||||
* Should not be directly used, for service purposes.
|
||||
*
|
||||
* @param jid job id
|
||||
*
|
||||
* @retval OPAL_SUCCESS On success
|
||||
* @retval OPAL_ERROR On failure
|
||||
*/
|
||||
int opal_timing_set_jobid(char *jid);
|
||||
|
||||
/**
|
||||
* Initialize timing structure.
|
||||
*
|
||||
* @param t pointer to the timing handler structure
|
||||
*
|
||||
* @retval OPAL_SUCCESS On success
|
||||
* @retval OPAL_ERROR On failure
|
||||
*/
|
||||
void opal_timing_init(opal_timing_t *t);
|
||||
|
||||
/**
|
||||
* Prepare timing event, do all printf-like processing.
|
||||
* Should not be directly used, for service purposes.
|
||||
*
|
||||
* @param t pointer to the timing handler structure
|
||||
* @param fmt printf-like format
|
||||
* @param ... other parameters that should be converted to string representation
|
||||
*
|
||||
* @retval partly filled opal_timing_prep_t structure
|
||||
*/
|
||||
opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...);
|
||||
|
||||
/**
|
||||
* Enqueue timing event into the list of events in handler 't'.
|
||||
*
|
||||
* @param p result of opal_timing_prep_ev
|
||||
* @param func function name where event occurs
|
||||
* @param file file name where event occurs
|
||||
* @param line line number in the file
|
||||
*
|
||||
* @retval
|
||||
*/
|
||||
void opal_timing_add_step(opal_timing_prep_t p,
|
||||
const char *func, const char *file, int line);
|
||||
|
||||
/*
|
||||
opal_timing_prep_t opal_timing_prep_end(opal_timing_t *t, int id, const char *fmt, ...);
|
||||
int opal_timing_begin(opal_timing_t *t, char *file, int line);
|
||||
void opal_timing_end(opal_timing_prep_t p, char *file, int line);
|
||||
*/
|
||||
/**
|
||||
* Report all events that were enqueued in the timing handler 't'.
|
||||
* - if fname == NULL the output will be done using opal_output and
|
||||
* each line will be prefixed with "prefix" to ease grep'ing.
|
||||
* - otherwise the corresponding file will be used for output in "append" mode
|
||||
* WARRNING: not all filesystems provide enough support for that feature, some records may
|
||||
* disappear.
|
||||
*
|
||||
* @param t timing handler
|
||||
* @param accovh consider malloc overhead introduced by timing code
|
||||
* @param prefix prefix to use when no fname was specifyed to ease grep'ing
|
||||
* @param fname name of the output file (may be NULL)
|
||||
*
|
||||
* @retval OPAL_SUCCESS On success
|
||||
* @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure
|
||||
*/
|
||||
int opal_timing_report(opal_timing_t *t, bool accovh, const char *prefix, char *fname);
|
||||
|
||||
int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname);
|
||||
/**
|
||||
* Release all memory allocated for the timing handler 't'.
|
||||
*
|
||||
* @param t timing handler
|
||||
*
|
||||
* @retval
|
||||
*/
|
||||
void opal_timing_release(opal_timing_t *t);
|
||||
|
||||
/**
|
||||
* Main macro for use in declaring opal timing handler;
|
||||
* will be "compiled out" when OPAL is configured without
|
||||
* --enable-timing.
|
||||
*
|
||||
*/
|
||||
#define OPAL_TIMING_DECLARE(t) opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled
|
||||
|
||||
/**
|
||||
* Main macro for use in declaring external opal timing handler;
|
||||
* will be "compiled out" when OPAL is configured without
|
||||
* --enable-timing.
|
||||
*
|
||||
*/
|
||||
#define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled
|
||||
|
||||
/**
|
||||
* Main macro for use in initializing opal timing handler;
|
||||
* will be "compiled out" when OPAL is configured without
|
||||
* --enable-timing.
|
||||
*
|
||||
* @see opal_timing_init()
|
||||
*/
|
||||
#define OPAL_TIMING_INIT(t) opal_timing_init(t)
|
||||
|
||||
/**
|
||||
* Main macro for use in adding new timing event for the specifyed timing handler;
|
||||
* will be "compiled out" when OPAL is configured without
|
||||
* --enable-timing.
|
||||
*
|
||||
* @see opal_timing_add_step()
|
||||
*/
|
||||
#define OPAL_TIMING_EVENT(x) opal_timing_add_step( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__)
|
||||
|
||||
/**
|
||||
* Main macro for use in reporting collected events;
|
||||
* will be "compiled out" when OPAL is configured without
|
||||
* --enable-timing.
|
||||
*
|
||||
* @param enable flag that enables/disables reporting. Used for fine-grained timing.
|
||||
* @see opal_timing_report()
|
||||
*/
|
||||
#define OPAL_TIMING_REPORT(enable, t, prefix) { \
|
||||
if( enable ) { \
|
||||
opal_timing_report(t, opal_timing_account_overhead, prefix, opal_timing_file); \
|
||||
opal_timing_report(t, opal_timing_overhead, prefix, opal_timing_output); \
|
||||
} \
|
||||
}
|
||||
|
||||
/**
|
||||
* Main macro for use in releasing allocated resources;
|
||||
* will be "compiled out" when OPAL is configured without
|
||||
* --enable-timing.
|
||||
*
|
||||
* @see opal_timing_release()
|
||||
*/
|
||||
#define OPAL_TIMING_RELEASE(t) opal_timing_release(t)
|
||||
|
||||
#else
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user