1
1
1. Fixes according to (http://www.open-mpi.org/community/lists/devel/2014/09/15869.php)
2. Force mpisync:rank0 to gather results. Now sync info is written by rank0 to the output file.
3. Improve mpirun_prof: 1) adopt to the environment (SLURM/TORQUE); 2) recognize some noteset-related mpirun options.

This commit was SVN r32772.
Этот коммит содержится в:
Artem Polyakov 2014-09-23 12:59:54 +00:00
родитель 9c20940190
Коммит f2e586980b
12 изменённых файлов: 360 добавлений и 111 удалений

Просмотреть файл

@ -185,6 +185,7 @@ fi
AC_DEFINE_UNQUOTED(OPAL_ENABLE_TIMING, $WANT_TIMING, AC_DEFINE_UNQUOTED(OPAL_ENABLE_TIMING, $WANT_TIMING,
[Whether we want developer-level timing framework or not]) [Whether we want developer-level timing framework or not])
AM_CONDITIONAL([OPAL_COMPILE_TIMING], [test "$WANT_TIMING" = "1"])
AM_CONDITIONAL([OPAL_INSTALL_TIMING_BINARIES], [test "$WANT_TIMING" = "1" -a "$enable_binaries" != "no"]) AM_CONDITIONAL([OPAL_INSTALL_TIMING_BINARIES], [test "$WANT_TIMING" = "1" -a "$enable_binaries" != "no"])

Просмотреть файл

@ -21,10 +21,16 @@
SUBDIRS += \ SUBDIRS += \
tools/ompi_info \ tools/ompi_info \
tools/wrappers \ tools/wrappers
tools/mpisync
if OPAL_COMPILE_TIMING
SUBDIRS += tools/mpisync
endif
DIST_SUBDIRS += \ DIST_SUBDIRS += \
tools/ompi_info \ tools/ompi_info \
tools/wrappers \ tools/wrappers
tools/mpisync
if OPAL_COMPILE_TIMING
DIST_SUBDIRS += tools/mpisync
endif

Просмотреть файл

@ -42,13 +42,24 @@ AM_CFLAGS = \
-DOPAL_CC_ABSOLUTE="\"@OPAL_CC_ABSOLUTE@\"" \ -DOPAL_CC_ABSOLUTE="\"@OPAL_CC_ABSOLUTE@\"" \
-DOMPI_CXX_ABSOLUTE="\"@OMPI_CXX_ABSOLUTE@\"" -DOMPI_CXX_ABSOLUTE="\"@OMPI_CXX_ABSOLUTE@\""
#if (OPAL_INSTALL_BINARIES && OPAL_ENABLE_TIMING) include $(top_srcdir)/Makefile.ompi-rules
if OPAL_INSTALL_TIMING_BINARIES if OPAL_INSTALL_TIMING_BINARIES
bin_PROGRAMS = mpisync bin_PROGRAMS = mpisync
bin_SCRIPTS = mpirun_prof ompi_timing_post bin_SCRIPTS = mpirun_prof ompi_timing_post
man_pages = mpisync.1
EXTRA_DIST = $(man_pages:.1=.1in)
nodist_man_MANS = $(man_pages)
$(nodist_man_MANS): $(top_builddir)/opal/include/opal_config.h
install-data-hook:
(cd $(DESTDIR)$(mandir)/man1; rm -f mpirun_prof.1; $(LN_S) mpisync.1 mpirun_prof.1)
(cd $(DESTDIR)$(mandir)/man1; rm -f ompi_timing_post.1; $(LN_S) mpisync.1 ompi_timing_post.1)
endif endif
mpisync_SOURCES = \ mpisync_SOURCES = \

Просмотреть файл

@ -1,23 +1,78 @@
#!/bin/sh #!/bin/sh
#!/bin/bash
#
# Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
function get_jobid()
{
if [ -n "$SLURM_JOBID" ]; then
echo "$SLURM_JOBID"
return
elif [ -n "$PBS_JOBID" ]; then
echo "$PBS_JOBID"
return
else
echo "$$"
return
fi
}
function extract_nodeset_opts()
{
short_opts="H:h:"
long_opts="host:"
opts=""
while [ -n "$1" ]; do
case "$1" in
-H | -host | --host )
opts=$opts" $1 $2";
shift 2;;
-hostfile | --hostfile )
opts=$opts" $1 $2";
shift 2;;
-machinefile | --machinefile )
opts=$opts" $1 $2";
shift 2;;
*)
shift 1;;
esac
done
echo "$opts"
}
ompi_instdir=`dirname $0` ompi_instdir=`dirname $0`
syncfile="ompi_clock_sync_data.$$" jobid=`get_jobid`
tmp_timings=mpirun_prof_timings.out syncfile=`pwd`"/ompi_clock_sync_data.$jobid"
tmp_out=mpirun_prof.out tmp_timings=`pwd`"/ompi_timing_temp_file.$jobid"
tmp_out=`pwd`"/ompi_mpirun_prof.$jobid"
timing_bkp=$OMPI_MCA_opal_timing_file timing_bkp=$OMPI_MCA_opal_timing_file
export OMPI_MCA_opal_timing_file=$tmp_timings export OMPI_MCA_opal_timing_file=$tmp_timings
${ompi_instdir}/mpirun --npernode 1 ${ompi_instdir}/mpisync -o $syncfile >$tmp_out 2>&1
export OMPI_MCA_opal_timing_file=$timing_bkp opts=`extract_nodeset_opts $@`
export OMPI_MCA_opal_clksync_file=$syncfile ${ompi_instdir}/mpirun --npernode 1 $opts ${ompi_instdir}/mpisync -o $syncfile >$tmp_out 2>&1
export OMPI_MCA_opal_timing_output=$timing_bkp
export OMPI_MCA_opal_timing_sync_file=$syncfile
# Remove old output # Remove old output
rm -f $OMPI_MCA_opal_timing_file rm -f $OMPI_MCA_opal_timing_output
# Run a program of interest # Run a program of interest
${ompi_instdir}/mpirun $@ ${ompi_instdir}/mpirun $@
if [ -n "$timing_bkp" ]; then
${ompi_instdir}/ompi_timing_post $timing_bkp $timing_bkp.post
fi
# Cleanup # Cleanup
rm -f $syncfile rm -f $syncfile
rm -f $tmp_timings rm -f $tmp_timings

48
ompi/tools/mpisync/mpisync.1in Обычный файл
Просмотреть файл

@ -0,0 +1,48 @@
.\" Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>. All rights reserved.
.TH MPISYNC 1 "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
.SH NAME
Open MPI timing tools
.
.SH SYNTAX
.B mpisync
[\fIoptions\fR]
.br
.B mpirun_prof
[\fIoptions\fR]
.br
.B ompi_timing_post
[\fI<timing-file>\fR] [\fI<processed-file>\fR]
.
.SH DESCRIPTION
.PP
.BR mpisync
determines clock offsets relative to Head Node Process (HNP). It accepts the following options:
.TP
\fB\-o\fR, \fB\-\-output\fR
The name of output file where offsets related to HNP will be written
.TP
\fB\-h\fR, \fB\-\-help\fR
Print help information
.PP
.BR ompi_timing_post
takes the timing output file as input parameter. The events are sorted by the timestamps. Next, the timestamps are replaced with time offsets relative to the
.BR first
:
.BR previous
event.
.PP
.BR mpirun_prof
is a wrapper around
.BR mpirun
that performs clock synchronisation and post-processing of the timing output file.
.SH NOTES
.PP
The mpisync code was derived from MPIPerf project:
http://mpiperf.cpct.sibsutis.ru/index.php/Main/Documentation
.
.SH FILES
.PP
The output file has following format:
.PP
<hostname> <round-trip-time> <offset-from-hnp>

Просмотреть файл

@ -1,4 +1,11 @@
#!/bin/bash #!/bin/bash
#
# Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
##################################################################### #####################################################################
# Evaluate a floating point number expression. # Evaluate a floating point number expression.
@ -18,16 +25,17 @@ function float_eval(){
return $stat return $stat
} }
if [ -z "$1" ]; then if [ -z "$1" ] || [ -z "$2" ]; then
echo "Need the name of a timing file" echo "Need the name of a timing file and the output file"
exit 0 exit 0
fi fi
thefile=$1 thefile=$1
postfile=$2
sed '/^$/d' $thefile > ${thefile}_tmp sed '/^$/d' $thefile > ${thefile}_tmp
sort ${thefile}_tmp > ${thefile} sort ${thefile}_tmp > ${postfile}
read line < ${thefile} read line < ${postfile}
first_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'` first_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'`
prev_ts=$first_ts prev_ts=$first_ts
echo $first_ts echo $first_ts
@ -39,8 +47,8 @@ while read line ; do
newline=`echo $line | sed -e "s/$cur_ts/$dif1:$dif2/"` newline=`echo $line | sed -e "s/$cur_ts/$dif1:$dif2/"`
prev_ts=$cur_ts prev_ts=$cur_ts
echo $newline echo $newline
done < ${thefile} > ${thefile}_tmp done < ${postfile} > ${thefile}_tmp
cat ${thefile}_tmp > ${thefile} cat ${thefile}_tmp > ${postfile}
rm -f ${thefile}_tmp rm -f ${thefile}_tmp

Просмотреть файл

@ -23,8 +23,6 @@ typedef enum { Gen, Chk } prog_mode_t;
char *filename = NULL; char *filename = NULL;
prog_mode_t mode = Gen; prog_mode_t mode = Gen;
double orig_rtt = 0.0, orig_offs = 0.0;
void print_help(char *progname); void print_help(char *progname);
int parse_opts(int rank, int argc, char **argv); int parse_opts(int rank, int argc, char **argv);
@ -70,14 +68,13 @@ int main(int argc, char **argv)
MPI_Init(&argc, &argv); MPI_Init(&argc, &argv);
MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm comm = MPI_COMM_WORLD;
int rank, commsize; int rank, commsize;
double offs, rtt; double offs = 0, rtt = 0;
char hname[1024]; char hname[1024];
MPI_Comm_rank(comm, &rank); MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &commsize); MPI_Comm_size(comm, &commsize);
int ret = parse_opts(rank, argc, argv); int ret = parse_opts(rank, argc, argv);
if( ret < 0 ){ if( ret < 0 ){
// Error exit // Error exit
MPI_Finalize(); MPI_Finalize();
@ -90,49 +87,58 @@ int main(int argc, char **argv)
if( filename == NULL ){ if( filename == NULL ){
if( rank == 0 ){ if( rank == 0 ){
fprintf(stderr, "The name of output file wasn't specified. Abort\n"); print_help(argv[0]);
} }
MPI_Finalize(); MPI_Finalize();
exit(1); exit(1);
} }
if( gethostname(hname, 1024) ){ if( gethostname(hname, 1024) ){
perror("Cannot get hostname"); perror("Cannot get hostname. Abort");
MPI_Finalize(); MPI_Abort(MPI_COMM_WORLD, 1);
exit(1);
}
// Clear output file if it exists
if( rank == 0 ){
FILE *fp = fopen(filename, "w");
if( fp == NULL ){
fprintf(stderr,"Cannot open output file %s for writing. Abort: %s\n",
filename, strerror(errno));
MPI_Finalize();
exit(1);
}
fclose(fp);
} }
int rc = hpctimer_initialize("gettimeofday"); int rc = hpctimer_initialize("gettimeofday");
if( rc == HPCTIMER_FAILURE ){ if( rc == HPCTIMER_FAILURE ){
fprintf(stderr, "Fail to initialize hpc timer. Abort\n"); fprintf(stderr, "Fail to initialize hpc timer. Abort\n");
MPI_Finalize(); MPI_Abort(MPI_COMM_WORLD, 1);
exit(1);
} }
offs = mpigclock_sync_linear(comm, 0, &rtt); offs = mpigclock_sync_linear(comm, 0, &rtt);
FILE *fp = fopen(filename,"a"); double send[2] = { rtt, offs };
if( fp == NULL ){ if( rank == 0 ){
fprintf(stderr, "Cannot open %s for appending. Abort\n", filename); double *measure = malloc(commsize*2*sizeof(double));
MPI_Finalize(); if( measure == NULL ){
exit(1); fprintf(stderr, "Fail to allocate memory. Abort\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
char *hnames = malloc(1024*commsize);
if( hnames == NULL ){
fprintf(stderr, "Fail to allocate memory. Abort\n");
MPI_Abort(MPI_COMM_WORLD, 1);
}
MPI_Gather(hname,1024,MPI_CHAR,hnames,1024,MPI_CHAR, 0, MPI_COMM_WORLD);
MPI_Gather(send,2,MPI_DOUBLE,measure,2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
char tmpname[128];
FILE *fp = fopen(filename,"w");
if( fp == NULL ){
fprintf(stderr, "Fail to open the file %s. Abort\n", filename);
MPI_Abort(MPI_COMM_WORLD, 1);
}
double (*m)[2] = (void*)measure;
char (*h)[1024] = (void*)hnames;
int i;
for(i=0; i<commsize;i++){
fprintf(fp, "%s %lf %lf\n", h[i], m[i][0], m[i][1]);
}
fclose(fp);
} else {
MPI_Gather(hname,1024, MPI_CHAR, NULL, 1024, MPI_CHAR, 0, MPI_COMM_WORLD);
MPI_Gather(send,2, MPI_DOUBLE, NULL, 2, MPI_DOUBLE, 0, MPI_COMM_WORLD);
} }
fprintf(fp, "%s %lf %lf\n", hname, rtt, offs);
fclose(fp);
MPI_Finalize(); MPI_Finalize();
return 0; return 0;

Просмотреть файл

@ -49,9 +49,9 @@ char *opal_net_private_ipv4 = NULL;
char *opal_set_max_sys_limits = NULL; char *opal_set_max_sys_limits = NULL;
#if OPAL_ENABLE_TIMING #if OPAL_ENABLE_TIMING
char *opal_clksync_file = NULL; char *opal_timing_sync_file = NULL;
char *opal_timing_file = NULL; char *opal_timing_output = NULL;
bool opal_timing_account_overhead = true; bool opal_timing_overhead = true;
#endif #endif
bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT); bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
@ -231,26 +231,28 @@ int opal_register_params(void)
} }
#if OPAL_ENABLE_TIMING #if OPAL_ENABLE_TIMING
(void) mca_base_var_register ("opal", "opal", NULL, "clksync_file", (void) mca_base_var_register ("opal", "opal", NULL, "timing_sync_file",
"Mapping of clock offsets from HNP node", "Clock synchronisation information generated by mpisync tool. You don't need to touch this if you use mpirun_prof tool.",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&opal_clksync_file); &opal_timing_sync_file);
if( opal_timing_clksync_read(opal_clksync_file) ){ if( opal_timing_clocksync_read(opal_timing_sync_file) ){
opal_output(0, "Cannot read file %s containing clock synchronisation information\n", opal_clksync_file); opal_output(0, "Cannot read file %s containing clock synchronisation information\n", opal_timing_sync_file);
} }
(void) mca_base_var_register ("opal", "opal", NULL, "timing_file", (void) mca_base_var_register ("opal", "opal", NULL, "timing_output",
"OPAL Timing framework output file", "The name of output file for timing information. If this parameter is not set then output will be directed into OPAL debug channel.",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&opal_timing_file); &opal_timing_output);
(void) mca_base_var_register ("opal", "opal", NULL, "timing_overhead", (void) mca_base_var_register ("opal", "opal", NULL, "timing_overhead",
"Whether account measured timing overhead or not (default: true)", "Timing framework introduce additional overhead (malloc's mostly)."
" The time spend in such costly routines is measured and may be accounted"
" (subtracted from timestamps). 'true' means consider overhead, 'false' - ignore (default: true).",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&opal_timing_account_overhead); &opal_timing_overhead);
#endif #endif
opal_warn_on_fork = true; opal_warn_on_fork = true;

Просмотреть файл

@ -31,9 +31,9 @@ extern char *opal_net_private_ipv4;
extern char *opal_set_max_sys_limits; extern char *opal_set_max_sys_limits;
#if OPAL_ENABLE_TIMING #if OPAL_ENABLE_TIMING
extern char *opal_clksync_file; extern char *opal_timing_sync_file;
extern char *opal_timing_file; extern char *opal_timing_output;
extern bool opal_timing_account_overhead; extern bool opal_timing_overhead;
#endif #endif
OPAL_DECLSPEC extern int opal_initialized; OPAL_DECLSPEC extern int opal_initialized;

Просмотреть файл

@ -103,9 +103,12 @@ libopalutil_la_SOURCES = \
stacktrace.c \ stacktrace.c \
strncpy.c \ strncpy.c \
sys_limits.c \ sys_limits.c \
timings.c \
uri.c uri.c
if OPAL_COMPILE_TIMING
libopalutil_la_SOURCES += timings.c
endif
libopalutil_la_LIBADD = \ libopalutil_la_LIBADD = \
keyval/libopalutilkeyval.la keyval/libopalutilkeyval.la
libopalutil_la_DEPENDENCIES = \ libopalutil_la_DEPENDENCIES = \

Просмотреть файл

@ -8,18 +8,16 @@
* $HEADER$ * $HEADER$
*/ */
#define _GNU_SOURCE #include "opal_config.h"
#include <stdlib.h> #include <stdlib.h>
#include <stdarg.h> #include <stdarg.h>
#include <stdio.h> #include <stdio.h>
#include <unistd.h> #include <unistd.h>
#include "opal_config.h" #ifdef HAVE_STRING_H
// TODO : restore ifdefs
//#ifdef HAVE_STRING_H
#include <string.h> #include <string.h>
//#endif #endif
#include <errno.h> #include <errno.h>
#ifdef HAVE_SYS_TYPES_H #ifdef HAVE_SYS_TYPES_H
@ -41,9 +39,6 @@
#include "opal/util/timings.h" #include "opal/util/timings.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#if OPAL_ENABLE_TIMING
static void debug_hang(int i) static void debug_hang(int i)
{ {
while( i ){ while( i ){
@ -65,10 +60,9 @@ opal_mutex_t tm_lock;
static char *nodename = NULL; static char *nodename = NULL;
static char *jobid = ""; static char *jobid = "";
static double hnp_offs = 0; static double hnp_offs = 0;
// TODO use RTT to estimate precise of measurement
static double hnp_rtt = 0; static double hnp_rtt = 0;
int opal_timing_clksync_read(char *fname) int opal_timing_clocksync_read(char *fname)
{ {
int rc = 0; int rc = 0;
FILE *fp = NULL; FILE *fp = NULL;
@ -79,9 +73,8 @@ int opal_timing_clksync_read(char *fname)
char hname[1024]; char hname[1024];
if( gethostname(hname, 1024) ){ if( gethostname(hname, 1024) ){
rc = -1; opal_output(0, "opal_timing_clocksync_read(%s): Cannot gethostname\n",fname);
opal_output(0, "opal_timing_clksync_read(%s): Cannot gethostname\n",fname); return OPAL_ERROR;
return -1;
} }
nodename = strdup(hname); nodename = strdup(hname);
ptr = strchr(nodename,'.'); ptr = strchr(nodename,'.');
@ -95,8 +88,8 @@ int opal_timing_clksync_read(char *fname)
fp = fopen(fname,"r"); fp = fopen(fname,"r");
if( fp == NULL ){ if( fp == NULL ){
opal_output(0, "opal_timing_clksync_read(%s): Cannot open the file\n",fname); opal_output(0, "opal_timing_clocksync_read(%s): Cannot open the file\n",fname);
return -1; return OPAL_ERROR;
} }
while( getline(&line,&n,fp) > 0 ){ while( getline(&line,&n,fp) > 0 ){
@ -118,8 +111,8 @@ int opal_timing_clksync_read(char *fname)
} }
if( !found ){ if( !found ){
opal_output(0,"opal_timing_clksync_read: Can't find my host %s in %s\n", hname, fname); opal_output(0,"opal_timing_clocksync_read: Can't find my host %s in %s\n", hname, fname);
rc = -1; rc = OPAL_ERROR;
} }
err_exit: err_exit:
@ -138,7 +131,7 @@ int opal_timing_set_jobid(char *jid)
{ {
jobid = strdup(jid); jobid = strdup(jid);
if( jobid == NULL ){ if( jobid == NULL ){
return -1; return OPAL_ERROR;
} }
return 0; return 0;
} }
@ -159,7 +152,8 @@ opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t)
t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size); t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size);
if( t->buffer == NULL ){ if( t->buffer == NULL ){
// TODO: out of memory error process opal_output(0, "opal_timing_event_alloc: Out of memory!\n");
return NULL;
} }
memset(t->buffer, 0, sizeof(opal_timing_event_t)*t->buffer_size); memset(t->buffer, 0, sizeof(opal_timing_event_t)*t->buffer_size);
@ -193,6 +187,10 @@ void opal_timing_init(opal_timing_t *t)
opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...) opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...)
{ {
opal_timing_event_t *ev = opal_timing_event_alloc(t); opal_timing_event_t *ev = opal_timing_event_alloc(t);
if( ev == NULL ){
opal_timing_prep_t p = { t, NULL, OPAL_ERR_OUT_OF_RESOURCE };
return p;
}
OBJ_CONSTRUCT(ev, opal_timing_event_t); OBJ_CONSTRUCT(ev, opal_timing_event_t);
ev->ts = opal_timing_get_ts(); ev->ts = opal_timing_get_ts();
va_list args; va_list args;
@ -200,18 +198,20 @@ opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...)
vsnprintf(ev->descr, OPAL_TIMING_DESCR_MAX - 1, fmt, args); vsnprintf(ev->descr, OPAL_TIMING_DESCR_MAX - 1, fmt, args);
ev->descr[OPAL_TIMING_DESCR_MAX-1] = '\0'; ev->descr[OPAL_TIMING_DESCR_MAX-1] = '\0';
va_end( args ); va_end( args );
opal_timing_prep_t p = { t, ev }; opal_timing_prep_t p = { t, ev, 0 };
return p; return p;
} }
void opal_timing_add_step(opal_timing_prep_t p, void opal_timing_add_step(opal_timing_prep_t p,
const char *func, const char *file, int line) const char *func, const char *file, int line)
{ {
p.ev->func = func; if( !p.errcode ) {
p.ev->file = file; p.ev->func = func;
p.ev->line = line; p.ev->file = file;
p.ev->type = TEVENT; p.ev->line = line;
opal_list_append(p.t->events, (opal_list_item_t*)p.ev); p.ev->type = TEVENT;
opal_list_append(p.t->events, (opal_list_item_t*)p.ev);
}
} }
int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname) int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname)
@ -221,14 +221,14 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref
FILE *fp = NULL; FILE *fp = NULL;
char *buf = NULL; char *buf = NULL;
int buf_size = 0; int buf_size = 0;
int rc = 0; int rc = OPAL_SUCCESS;
debug_hang(0); debug_hang(0);
if( fname != NULL ){ if( fname != NULL ){
fp = fopen(fname,"a"); fp = fopen(fname,"a");
if( fp == NULL ){ if( fp == NULL ){
// TODO: log error opal_output(0, "opal_timing_report: Cannot open %s file for writing timing information!\n",fname);
rc = OPAL_ERROR; rc = OPAL_ERROR;
goto err_exit; goto err_exit;
} }
@ -237,8 +237,8 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref
buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1); buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1);
if( buf == NULL ){ if( buf == NULL ){
// TODO: log error opal_output(0, "opal_timing_report: Out of memory!\n");
rc = OPAL_ERROR; rc = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit; goto err_exit;
} }
buf[0] = '\0'; buf[0] = '\0';
@ -269,14 +269,16 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref
ev->descr, nodename, jobid, ev->func, file_name, ev->line); ev->descr, nodename, jobid, ev->func, file_name, ev->line);
} }
if( rc < 0 ){ if( rc < 0 ){
// TODO: log mem allocation problems opal_output(0, "opal_timing_report: Cannot asprintf!\n");
rc = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit; goto err_exit;
} }
rc = 0; rc = 0;
if( strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){ if( strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){
// TODO: log buffer overflow opal_output(0, "opal_timing_report: timing output buffer overflow!\n");
free(line); free(line);
rc = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit; goto err_exit;
} }
if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){ if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){
@ -313,6 +315,7 @@ err_exit:
free(buf); free(buf);
} }
if( fp != NULL ){ if( fp != NULL ){
fflush(fp);
fclose(fp); fclose(fp);
} }
return rc; return rc;
@ -346,5 +349,3 @@ void opal_timing_release(opal_timing_t *t)
OBJ_RELEASE(t->events); OBJ_RELEASE(t->events);
t->events = NULL; t->events = NULL;
} }
#endif

Просмотреть файл

@ -8,9 +8,8 @@
* $HEADER$ * $HEADER$
*/ */
#ifndef OPAL_UTIL_TIMING_H
#ifndef OPAL_SYS_TIMING_H #define OPAL_UTIL_TIMING_H
#define OPAL_SYS_TIMING_H
#include "opal/class/opal_list.h" #include "opal/class/opal_list.h"
#include "opal/runtime/opal_params.h" #include "opal/runtime/opal_params.h"
@ -47,40 +46,149 @@ typedef struct opal_timing_t
typedef struct { typedef struct {
opal_timing_t *t; opal_timing_t *t;
opal_timing_event_t *ev; opal_timing_event_t *ev;
int errcode;
} opal_timing_prep_t; } opal_timing_prep_t;
int opal_timing_clksync_read(char *opal_clksync_file); /**
* Read synchronisation information from the file
* provided through the MCA parameter.
* Should not be directly used, for service purposes.
*
* @param sync_file Name of the file to read
*
* @retval OPAL_SUCCESS On success
* @retval OPAL_ERROR On failure
*/
int opal_timing_clocksync_read(char *sync_file);
/**
* Pass string representation of ORTE job ID down to the OPAL.
* Should not be directly used, for service purposes.
*
* @param jid job id
*
* @retval OPAL_SUCCESS On success
* @retval OPAL_ERROR On failure
*/
int opal_timing_set_jobid(char *jid); int opal_timing_set_jobid(char *jid);
/**
* Initialize timing structure.
*
* @param t pointer to the timing handler structure
*
* @retval OPAL_SUCCESS On success
* @retval OPAL_ERROR On failure
*/
void opal_timing_init(opal_timing_t *t); void opal_timing_init(opal_timing_t *t);
/**
* Prepare timing event, do all printf-like processing.
* Should not be directly used, for service purposes.
*
* @param t pointer to the timing handler structure
* @param fmt printf-like format
* @param ... other parameters that should be converted to string representation
*
* @retval partly filled opal_timing_prep_t structure
*/
opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...); opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...);
/**
* Enqueue timing event into the list of events in handler 't'.
*
* @param p result of opal_timing_prep_ev
* @param func function name where event occurs
* @param file file name where event occurs
* @param line line number in the file
*
* @retval
*/
void opal_timing_add_step(opal_timing_prep_t p, void opal_timing_add_step(opal_timing_prep_t p,
const char *func, const char *file, int line); const char *func, const char *file, int line);
/* /**
opal_timing_prep_t opal_timing_prep_end(opal_timing_t *t, int id, const char *fmt, ...); * Report all events that were enqueued in the timing handler 't'.
int opal_timing_begin(opal_timing_t *t, char *file, int line); * - if fname == NULL the output will be done using opal_output and
void opal_timing_end(opal_timing_prep_t p, char *file, int line); * each line will be prefixed with "prefix" to ease grep'ing.
*/ * - otherwise the corresponding file will be used for output in "append" mode
* WARRNING: not all filesystems provide enough support for that feature, some records may
* disappear.
*
* @param t timing handler
* @param accovh consider malloc overhead introduced by timing code
* @param prefix prefix to use when no fname was specifyed to ease grep'ing
* @param fname name of the output file (may be NULL)
*
* @retval OPAL_SUCCESS On success
* @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure
*/
int opal_timing_report(opal_timing_t *t, bool accovh, const char *prefix, char *fname);
int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname); /**
* Release all memory allocated for the timing handler 't'.
*
* @param t timing handler
*
* @retval
*/
void opal_timing_release(opal_timing_t *t); void opal_timing_release(opal_timing_t *t);
/**
* Main macro for use in declaring opal timing handler;
* will be "compiled out" when OPAL is configured without
* --enable-timing.
*
*/
#define OPAL_TIMING_DECLARE(t) opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled #define OPAL_TIMING_DECLARE(t) opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled
/**
* Main macro for use in declaring external opal timing handler;
* will be "compiled out" when OPAL is configured without
* --enable-timing.
*
*/
#define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled #define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled
/**
* Main macro for use in initializing opal timing handler;
* will be "compiled out" when OPAL is configured without
* --enable-timing.
*
* @see opal_timing_init()
*/
#define OPAL_TIMING_INIT(t) opal_timing_init(t) #define OPAL_TIMING_INIT(t) opal_timing_init(t)
/**
* Main macro for use in adding new timing event for the specifyed timing handler;
* will be "compiled out" when OPAL is configured without
* --enable-timing.
*
* @see opal_timing_add_step()
*/
#define OPAL_TIMING_EVENT(x) opal_timing_add_step( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) #define OPAL_TIMING_EVENT(x) opal_timing_add_step( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__)
/**
* Main macro for use in reporting collected events;
* will be "compiled out" when OPAL is configured without
* --enable-timing.
*
* @param enable flag that enables/disables reporting. Used for fine-grained timing.
* @see opal_timing_report()
*/
#define OPAL_TIMING_REPORT(enable, t, prefix) { \ #define OPAL_TIMING_REPORT(enable, t, prefix) { \
if( enable ) { \ if( enable ) { \
opal_timing_report(t, opal_timing_account_overhead, prefix, opal_timing_file); \ opal_timing_report(t, opal_timing_overhead, prefix, opal_timing_output); \
} \ } \
} }
/**
* Main macro for use in releasing allocated resources;
* will be "compiled out" when OPAL is configured without
* --enable-timing.
*
* @see opal_timing_release()
*/
#define OPAL_TIMING_RELEASE(t) opal_timing_release(t) #define OPAL_TIMING_RELEASE(t) opal_timing_release(t)
#else #else