From 8ffad75a0a75ff37674df97416c7673c4fd54034 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Mon, 8 Dec 2014 14:52:37 +0600 Subject: [PATCH] Introduce timing interval measurement facility in timing framework --- ompi/runtime/ompi_mpi_finalize.c | 8 +- ompi/runtime/ompi_mpi_init.c | 34 +- ompi/tools/mpisync/mpirun_prof | 5 +- ompi/tools/mpisync/ompi_timing_post | 68 ++-- opal/util/timings.c | 550 ++++++++++++++++++++++++---- opal/util/timings.h | 253 ++++++++++++- orte/mca/oob/base/oob_base_frame.c | 2 +- orte/mca/rml/base/rml_base_frame.c | 2 +- 8 files changed, 775 insertions(+), 147 deletions(-) diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index 1ae880dc7e..e9f10a9640 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -89,6 +89,7 @@ #include "ompi/runtime/ompi_cr.h" extern bool ompi_enable_timing; +extern bool ompi_enable_timing_ext; int ompi_mpi_finalize(void) { @@ -151,7 +152,7 @@ int ompi_mpi_finalize(void) opal_progress_event_users_increment(); /* check to see if we want timing information */ - OPAL_TIMING_EVENT((&tm,"Start barrier")); + OPAL_TIMING_MSTART((&tm,"time to execute finalize barrier")); /* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across *all* connected processes. This only means that all processes @@ -232,8 +233,9 @@ int ompi_mpi_finalize(void) /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_EVENT((&tm,"Finish barrier")); - OPAL_TIMING_REPORT(ompi_enable_timing, &tm, "MPI_Finish"); + OPAL_TIMING_MSTOP(&tm); + OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); + OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); OPAL_TIMING_RELEASE(&tm); /* diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 35509cfb01..a351b7fbe3 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -297,7 +297,7 @@ opal_hash_table_t ompi_mpi_f90_complex_hashtable; */ opal_list_t ompi_registered_datareps; -bool ompi_enable_timing; +bool ompi_enable_timing, ompi_enable_timing_ext; extern bool ompi_mpi_yield_when_idle; extern int ompi_mpi_event_tick_rate; @@ -366,6 +366,13 @@ static int ompi_register_mca_variables(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_enable_timing); + ompi_enable_timing_ext = false; + (void) mca_base_var_register("ompi", "ompi", NULL, "timing_ext", + "Request that critical timing loops be measured", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_9, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_enable_timing_ext); return OMPI_SUCCESS; } @@ -436,7 +443,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) mca_base_var_set_value(ret, allvalue, 4, MCA_BASE_VAR_SOURCE_DEFAULT, NULL); } - OPAL_TIMING_EVENT((&tm,"Start")); + OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); /* if we were not externally started, then we need to setup * some envars so the MPI_INFO_ENV can get the cmd name @@ -470,7 +477,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_initialized = true; /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_EVENT((&tm,"rte_init complete")); + OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); #if OPAL_HAVE_HWLOC /* if hwloc is available but didn't get setup for some @@ -608,7 +615,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_EVENT((&tm,"Start modex")); + OPAL_TIMING_MNEXT((&tm,"time to execute modex")); /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs @@ -617,7 +624,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) */ OPAL_FENCE(NULL, 0, NULL, NULL); - OPAL_TIMING_EVENT((&tm,"End modex")); + OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); /* select buffered send allocator component to be used */ if( OMPI_SUCCESS != @@ -773,9 +780,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Do we need to wait for a debugger? */ ompi_rte_wait_for_debugger(); - /* check for timing request - get stop time and report elapsed - time if so, then start the clock again */ - OPAL_TIMING_EVENT((&tm,"Start barrier")); + /* Next timing measurement */ + OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -784,7 +790,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* check for timing request - get stop time and report elapsed time if so, then start the clock again */ - OPAL_TIMING_EVENT((&tm,"End barrier")); + OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); #if OPAL_ENABLE_PROGRESS_THREADS == 0 /* Start setting up the event engine for MPI operations. Don't @@ -853,8 +859,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - - /* Check whether we have been spawned or not. We introduce that at the very end, since we need collectives, datatypes, ptls etc. up and running here.... */ @@ -930,9 +934,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_mpi_initialized = true; - /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_EVENT((&tm,"Finish")); - OPAL_TIMING_REPORT(ompi_enable_timing, &tm,"MPI Init"); + /* Finish last measurement, output results + * and clear timing structure */ + OPAL_TIMING_MSTOP(&tm); + OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); + OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); OPAL_TIMING_RELEASE(&tm); return MPI_SUCCESS; diff --git a/ompi/tools/mpisync/mpirun_prof b/ompi/tools/mpisync/mpirun_prof index 42f8b14dd4..9acb8b65b6 100755 --- a/ompi/tools/mpisync/mpirun_prof +++ b/ompi/tools/mpisync/mpirun_prof @@ -1,4 +1,3 @@ -#!/bin/sh #!/bin/bash # # Copyright (c) 2014 Artem Polyakov @@ -54,8 +53,8 @@ syncfile=`pwd`"/ompi_clock_sync_data.$jobid" tmp_timings=`pwd`"/ompi_timing_temp_file.$jobid" tmp_out=`pwd`"/ompi_mpirun_prof.$jobid" -timing_bkp=$OMPI_MCA_opal_timing_file -export OMPI_MCA_opal_timing_file=$tmp_timings +timing_bkp=$OMPI_MCA_opal_timing_output +export OMPI_MCA_opal_timing_output=$tmp_timings opts=`extract_nodeset_opts $@` ${ompi_instdir}/mpirun --npernode 1 $opts ${ompi_instdir}/mpisync -o $syncfile >$tmp_out 2>&1 diff --git a/ompi/tools/mpisync/ompi_timing_post b/ompi/tools/mpisync/ompi_timing_post index 86d06249e5..0df0b9109c 100755 --- a/ompi/tools/mpisync/ompi_timing_post +++ b/ompi/tools/mpisync/ompi_timing_post @@ -10,45 +10,49 @@ ##################################################################### # Evaluate a floating point number expression. -function float_eval(){ - float_scale=9 - local stat=0 - local result=0.0 - if [[ $# -gt 0 ]]; then - result=$(echo "scale=$float_scale; $*" | bc -q 2>/dev/null) - stat=$? - if [[ $stat -eq 0 && -z "$result" ]]; then - stat=1; - fi - fi - echo $result - return $stat -} - if [ -z "$1" ] || [ -z "$2" ]; then echo "Need the name of a timing file and the output file" exit 0 fi -thefile=$1 +initfile=$1 postfile=$2 -sed '/^$/d' $thefile > ${thefile}_tmp -sort ${thefile}_tmp > ${postfile} -read line < ${postfile} -first_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'` -prev_ts=$first_ts -echo $first_ts +# 1. Filter OPAL_TRACE entrieas only +# and put the timestamp to the first place +#.2. Sort considering that we dealing with +# floating point numbers +# 3. Return to initial field order and count relative fields +cat $initfile | \ +awk 'BEGIN { FPAT = "([^ ]+)|(\"[^\"]+\")" } +{ + if( $0 ~ /\[OPAL_TRACE\]/ ){ + x = $NF + $NF = $1 + $1 = x + print $0 + } +}' | sort --general-numeric-sort | \ +awk 'BEGIN { + FPAT = "([^ ]+)|(\"[^\"]+\")" + first = 0 + prev = 0 +} +{ + if( first == 0 ){ + first = $1 + prev = $1 + } + x = $1 + $1 = $NF + $NF = x + rel_to_first = x - first + rel_to_prev = x - prev + prev = x + print $0, " ", rel_to_prev, " ", rel_to_first +}' > $postfile + + -while read line ; do - cur_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'` - dif1=`float_eval "$cur_ts - $first_ts"` - dif2=`float_eval "$cur_ts - $prev_ts"` - newline=`echo $line | sed -e "s/$cur_ts/$dif1:$dif2/"` - prev_ts=$cur_ts - echo $newline -done < ${postfile} > ${thefile}_tmp -cat ${thefile}_tmp > ${postfile} -rm -f ${thefile}_tmp diff --git a/opal/util/timings.c b/opal/util/timings.c index 2573a592ed..b0b927c118 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -38,21 +38,31 @@ #include "opal/class/opal_list.h" #include "opal/util/timings.h" #include "opal/util/output.h" +#include "opal/util/basename.h" +#include "opal/mca/timer/timer.h" +#include MCA_timer_IMPLEMENTATION_HEADER + +#define DELTAS_SANE_LIMIT (10*1024*1024) + +/* static void debug_hang(int i) { while( i ){ sleep(1); } } +*/ + +struct interval_descr{ + opal_timing_event_t *descr_ev, *begin_ev; + double interval, overhead; +}; -double opal_timing_get_ts(void); opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t); void opal_timing_init(opal_timing_t *t); opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...); -void opal_timing_release(opal_timing_t *t); - static OBJ_CLASS_INSTANCE(opal_timing_event_t, opal_list_item_t, NULL, NULL); @@ -71,12 +81,13 @@ int opal_timing_clocksync_read(char *fname) bool found = false; char *ptr = NULL; - char hname[1024]; + char hname[1024] = "NA"; if( gethostname(hname, 1024) ){ - opal_output(0, "opal_timing_clocksync_read(%s): Cannot gethostname\n",fname); - return OPAL_ERROR; + opal_output(0, "opal_timing_clocksync_read(%s): Cannot gethostname", + fname); } nodename = strdup(hname); + // Strip domain name ptr = strchr(nodename,'.'); if( ptr != NULL ){ *ptr = '\0'; @@ -88,7 +99,7 @@ int opal_timing_clocksync_read(char *fname) fp = fopen(fname,"r"); if( fp == NULL ){ - opal_output(0, "opal_timing_clocksync_read(%s): Cannot open the file\n",fname); + opal_output(0, "opal_timing_clocksync_read(%s): Cannot open the file",fname); return OPAL_ERROR; } @@ -111,7 +122,7 @@ int opal_timing_clocksync_read(char *fname) } if( !found ){ - opal_output(0,"opal_timing_clocksync_read: Can't find my host %s in %s\n", hname, fname); + opal_output(0,"opal_timing_clocksync_read: Can't find my host %s in %s", hname, fname); rc = OPAL_ERROR; } @@ -136,28 +147,58 @@ int opal_timing_set_jobid(char *jid) return 0; } -/* Get current timestamp */ -double opal_timing_get_ts(void){ +/* Get current timestamp. Derived from MPI_Wtime */ + +static double get_ts_gettimeofday(void) +{ + double ret; + /* Use gettimeofday() if we opal wasn't initialized */ struct timeval tv; - gettimeofday(&tv,NULL); - double ret = tv.tv_sec + tv.tv_usec*1E-6; + gettimeofday(&tv, NULL); + ret = tv.tv_sec; + ret += (double)tv.tv_usec / 1000000.0; return ret; } +#if OPAL_TIMER_CYCLE_NATIVE +static double get_ts_cycle(void) +{ + return ((double) opal_timer_base_get_cycles()) / opal_timer_base_get_freq(); +} +#elif OPAL_TIMER_USEC_NATIVE +static double get_ts_usec(void) +{ + return ((double) opal_timer_base_get_usec()) / 1000000.0; +} +#endif + +static get_ts_t _init_timestamping(void) +{ + if( !opal_initialized ){ + return get_ts_gettimeofday; + } +#if OPAL_TIMER_CYCLE_NATIVE + return get_ts_cycle; +#elif OPAL_TIMER_USEC_NATIVE + return get_ts_usec; +#endif + return get_ts_gettimeofday; +} + + opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t) { if( t->buffer_offset >= t->buffer_size ){ // notch timings overhead - double alloc_begin = opal_timing_get_ts(); + double alloc_begin = t->get_ts(); t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size); if( t->buffer == NULL ){ - opal_output(0, "opal_timing_event_alloc: Out of memory!\n"); return NULL; } memset(t->buffer, 0, sizeof(opal_timing_event_t)*t->buffer_size); - double alloc_end = opal_timing_get_ts(); + double alloc_end = t->get_ts(); t->buffer_offset = 0; t->buffer[0].fib = 1; @@ -172,16 +213,18 @@ void opal_timing_init(opal_timing_t *t) { memset(t,0,sizeof(*t)); - t->cur_id = 0; - // initialize events list + t->next_id_cntr = 0; + t->current_id = -1; + /* initialize events list */ t->events = OBJ_NEW(opal_list_t); - // Set buffer size + /* Set buffer size */ t->buffer_size = OPAL_TIMING_BUFSIZE; - // Set buffer_offset = buffer_size so new buffer - // will be allocated at first event report + /* Set buffer_offset = buffer_size so new buffer + * will be allocated at first event report */ t->buffer_offset = t->buffer_size; + /* initialize gettime function */ + t->get_ts = _init_timestamping(); - OPAL_TIMING_EVENT((t,"%p: Created, events = %p, buffer: ptr = %p, offs = %d", t, t->events, t->buffer, t->buffer_size)); } opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...) @@ -192,7 +235,7 @@ opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...) return p; } OBJ_CONSTRUCT(ev, opal_timing_event_t); - ev->ts = opal_timing_get_ts(); + ev->ts = t->get_ts(); va_list args; va_start( args, fmt ); vsnprintf(ev->descr, OPAL_TIMING_DESCR_MAX - 1, fmt, args); @@ -202,6 +245,23 @@ opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...) return p; } +opal_timing_prep_t opal_timing_prep_ev_end(opal_timing_t *t, const char *fmt, ...) +{ + opal_timing_prep_t p = { t, NULL, 0 }; + + if( t->current_id >= 0 ){ + opal_timing_event_t *ev = opal_timing_event_alloc(t); + if( ev == NULL ){ + opal_timing_prep_t p = { t, NULL, OPAL_ERR_OUT_OF_RESOURCE }; + return p; + } + OBJ_CONSTRUCT(ev, opal_timing_event_t); + ev->ts = t->get_ts(); + p.ev = ev; + } + return p; +} + void opal_timing_add_step(opal_timing_prep_t p, const char *func, const char *file, int line) { @@ -209,35 +269,184 @@ void opal_timing_add_step(opal_timing_prep_t p, p.ev->func = func; p.ev->file = file; p.ev->line = line; - p.ev->type = TEVENT; + p.ev->type = OPAL_TIMING_TRACE; opal_list_append(p.t->events, (opal_list_item_t*)p.ev); } } -int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname) +/* Add description of the interval */ +int opal_timing_descr(opal_timing_prep_t p, + const char *func, const char *file, int line) +{ + if( !p.errcode ){ + p.ev->func = func; + p.ev->file = file; + p.ev->line = line; + p.ev->type = OPAL_TIMING_INTDESCR; + p.ev->id = p.t->next_id_cntr; + (p.t->next_id_cntr)++; + opal_list_append(p.t->events, (opal_list_item_t*)p.ev); + return p.ev->id; + } + return -1; +} + +void opal_timing_start_id(opal_timing_t *t, int id, const char *func, const char *file, int line) +{ + /* No description is needed. If everything is OK + * it'll be included in opal_timing_start_init */ + opal_timing_event_t *ev = opal_timing_event_alloc(t); + if( ev == NULL ){ + return; + } + OBJ_CONSTRUCT(ev, opal_timing_event_t); + + t->current_id = id; + ev->ts = t->get_ts(); + ev->func = func; + ev->file = file; + ev->line = line; + ev->type = OPAL_TIMING_INTBEGIN; + ev->id = id; + opal_list_append(t->events, (opal_list_item_t*)ev); +} + +void opal_timing_end(opal_timing_t *t, int id, const char *func, const char *file, int line ) +{ + /* No description is needed. If everything is OK + * it'll be included in opal_timing_start_init */ + opal_timing_event_t *ev = opal_timing_event_alloc(t); + if( ev == NULL ){ + return; + } + OBJ_CONSTRUCT(ev, opal_timing_event_t); + + if( id < 0 ){ + ev->id = t->current_id; + t->current_id = -1; + } else { + if( t->current_id == id ){ + t->current_id = -1; + } + ev->id = id; + } + ev->ts = t->get_ts(); + ev->func = func; + ev->file = file; + ev->line = line; + ev->type = OPAL_TIMING_INTEND; + opal_list_append(t->events, (opal_list_item_t*)ev); +} + +void opal_timing_end_prep(opal_timing_prep_t p, + const char *func, const char *file, int line) +{ + opal_timing_event_t *ev = p.ev; + + if( !p.errcode && ( NULL != ev ) ){ + assert( p.t->current_id >=0 ); + ev->id = p.t->current_id; + p.t->current_id = -1; + ev->func = func; + ev->file = file; + ev->line = line; + ev->type = OPAL_TIMING_INTEND; + opal_list_append(p.t->events, (opal_list_item_t*)ev); + } +} + +static int _prepare_descriptions(opal_timing_t *t, struct interval_descr **__descr) +{ + struct interval_descr *descr; + opal_timing_event_t *ev, *next; + + if( t->next_id_cntr == 0 ){ + return 0; + } + + *__descr = malloc(sizeof(struct interval_descr) * t->next_id_cntr); + descr = *__descr; + memset(descr, 0, sizeof(struct interval_descr) * t->next_id_cntr); + + OPAL_LIST_FOREACH_SAFE(ev, next, t->events, opal_timing_event_t){ + + /* opal_output(0,"EVENT: type = %d, id=%d, ts = %.12le, ovh = %.12le %s", + ev->type, ev->id, ev->ts, ev->ts_ovh, + ev->descr ); + */ + switch(ev->type){ + case OPAL_TIMING_INTDESCR:{ + if( ev->id >= t->next_id_cntr){ + char *file = opal_basename(ev->file); + opal_output(0,"opal_timing: bad event id at %s:%d:%s, ignore and remove", + file, ev->line, ev->func); + free(file); + opal_list_remove_item(t->events, (opal_list_item_t *)ev); + continue; + } + if( NULL != descr[ev->id].descr_ev ){ + opal_timing_event_t *prev = descr[ev->id].descr_ev; + char *file = opal_basename(ev->file); + char *file_prev = opal_basename(prev->file); + opal_output(0,"opal_timing: duplicated description at %s:%d:%s, " + "previous: %s:%d:%s, ignore and remove", file, ev->line, ev->func, + file_prev, prev->line, prev->func); + free(file); + free(file_prev); + opal_list_remove_item(t->events, (opal_list_item_t *)ev); + continue; + } + + descr[ev->id].descr_ev = ev; + descr[ev->id].begin_ev = NULL; + descr[ev->id].interval = 0; + descr[ev->id].overhead = 0; + break; + } + case OPAL_TIMING_INTBEGIN: + case OPAL_TIMING_INTEND:{ + if( ev->id >= t->next_id_cntr || (NULL == descr[ev->id].descr_ev ) ){ + char *file = opal_basename(ev->file); + opal_output(0,"opal_timing: bad event id at %s:%d:%s, ignore and remove", + file, ev->line, ev->func); + free(file); + opal_list_remove_item(t->events, (opal_list_item_t *)ev); + continue; + } + break; + } + case OPAL_TIMING_TRACE: + break; + } + } + return t->next_id_cntr; +} + +/* Output lines in portions that doesn't + * exceed OPAL_TIMING_OUTBUF_SIZE for later automatic processing */ +int opal_timing_report(opal_timing_t *t, char *fname) { opal_timing_event_t *ev; - int count = 0; FILE *fp = NULL; char *buf = NULL; int buf_size = 0; + struct interval_descr *descr = NULL; int rc = OPAL_SUCCESS; - debug_hang(0); - if( fname != NULL ){ fp = fopen(fname,"a"); if( fp == NULL ){ - opal_output(0, "opal_timing_report: Cannot open %s file for writing timing information!\n",fname); + opal_output(0, "opal_timing_report: Cannot open %s file" + " for writing timing information!",fname); rc = OPAL_ERROR; goto err_exit; } - prefix=NULL; } + + _prepare_descriptions(t, &descr); buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1); if( buf == NULL ){ - opal_output(0, "opal_timing_report: Out of memory!\n"); rc = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } @@ -245,57 +454,59 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref double overhead = 0; OPAL_LIST_FOREACH(ev, t->events, opal_timing_event_t){ - count++; - if( ev->fib && account_overhead ){ + char *line, *file; + if( ev->fib && opal_timing_overhead ){ overhead += ev->ts_ovh; } - - if( count > 1){ - char *line; - const char *file_name = ev->file; - const char *ptr = file_name; - for( ; *ptr != '\0' ; ptr++ ){ - if( *ptr == '/'){ - file_name = ptr+1; - } - } - if( prefix != NULL ){ - rc = asprintf(&line,"%s:\t%lfs\t\"%s\"\t|\t%s\t%s\t%s\t%s:%d\n", - prefix,ev->ts + hnp_offs + overhead, - ev->descr, nodename, jobid, ev->func, file_name, ev->line); - } else { - rc = asprintf(&line,"%lfs\t\"%s\"\t|\t%s\t%s\t%s\t%s:%d\n", - ev->ts + hnp_offs + overhead, - ev->descr, nodename, jobid, ev->func, file_name, ev->line); - } - if( rc < 0 ){ - opal_output(0, "opal_timing_report: Cannot asprintf!\n"); - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - rc = 0; - - if( strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){ - opal_output(0, "opal_timing_report: timing output buffer overflow!\n"); - free(line); - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){ - // flush buffer to the file - if( fp != NULL ){ - fprintf(fp,"%s", buf); - fprintf(fp,"\n"); - } else { - opal_output(0,"\n%s", buf); - } - buf[0] = '\0'; - buf_size = 0; - } - sprintf(buf,"%s%s", buf, line); - buf_size += strlen(line); - free(line); + file = opal_basename(ev->file); + switch( ev->type ){ + case OPAL_TIMING_INTDESCR: + // Service event, skip it. + continue; + case OPAL_TIMING_TRACE: + rc = asprintf(&line,"[%s:%d] %s \"%s\" [OPAL_TRACE] %s:%d %.10lf\n", + nodename, getpid(), jobid, ev->descr, file, ev->line, + ev->ts + hnp_offs + overhead); + break; + case OPAL_TIMING_INTBEGIN: + rc = asprintf(&line,"[%s:%d] %s \"%s [start]\" [OPAL_TRACE] %s:%d %.10lf\n", + nodename, getpid(), jobid, descr[ev->id].descr_ev->descr, + file, ev->line, ev->ts + hnp_offs + overhead); + break; + case OPAL_TIMING_INTEND: + rc = asprintf(&line,"[%s:%d] %s \"%s [stop]\" [OPAL_TRACE] %s:%d %.10lf\n", + nodename, getpid(), jobid, descr[ev->id].descr_ev->descr, + file, ev->line, ev->ts + hnp_offs + overhead); + break; } + free(file); + + if( rc < 0 ){ + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; + } + rc = 0; + + /* Sanity check: this shouldn't happen since description + * is event only 1KB long and other fields should never + * exceed 9KB */ + assert( strlen(line) <= OPAL_TIMING_OUTBUF_SIZE ); + + + if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){ + // flush buffer to the file + if( fp != NULL ){ + fprintf(fp,"%s", buf); + fprintf(fp,"\n"); + } else { + opal_output(0,"\n%s", buf); + } + buf[0] = '\0'; + buf_size = 0; + } + sprintf(buf,"%s%s", buf, line); + buf_size += strlen(line); + free(line); } if( buf_size > 0 ){ @@ -311,6 +522,9 @@ int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *pref } err_exit: + if( NULL != descr ){ + free(descr); + } if( buf != NULL ){ free(buf); } @@ -321,6 +535,186 @@ err_exit: return rc; } +/* Output events as one buffer so the data won't be mixed + * with other output. This function is supposed to be human readable. + * The output goes only to stdout. */ +int opal_timing_deltas(opal_timing_t *t, char *fname) +{ + opal_timing_event_t *ev; + FILE *fp = NULL; + char *buf = NULL; + struct interval_descr *descr = NULL; + int i, rc = OPAL_SUCCESS; + size_t buf_size = 0, buf_used = 0; + + if( fname != NULL ){ + fp = fopen(fname,"a"); + if( fp == NULL ){ + opal_output(0, "opal_timing_report: Cannot open %s file" + " for writing timing information!",fname); + rc = OPAL_ERROR; + goto err_exit; + } + } + + _prepare_descriptions(t, &descr); + + OPAL_LIST_FOREACH(ev, t->events, opal_timing_event_t){ + int id; + if( ev->fib ){ + /* this event caused buffered memory allocation + * for events. Account the overhead for all active + * intervals. */ + int i; + for( i = 0; i < t->next_id_cntr; i++){ + if( (NULL != descr[i].descr_ev) && (NULL != descr[i].begin_ev) ){ + if( opal_timing_overhead ){ + descr[i].overhead += ev->ts_ovh; + } + } + } + } + + /* we already process all OPAL_TIMING_DESCR events + * and we ignore OPAL_TIMING_EVENT */ + if( ev->type == OPAL_TIMING_INTDESCR || + ev->type == OPAL_TIMING_TRACE){ + /* skip */ + continue; + } + + id = ev->id; + if( id < 0 || id >= t->next_id_cntr ){ + char *file = opal_basename(ev->file); + opal_output(0,"opal_timing_deltas: bad interval event id: %d at %s:%d:%s (maxid=%d)", + id, file, ev->line, ev->func, t->next_id_cntr - 1 ); + free(file); + /* skip */ + continue; + } + + /* id's assigned auomatically. Ther shouldn't be any gaps in descr[] */ + assert( NULL != descr[id].descr_ev); + + if( ev->type == OPAL_TIMING_INTBEGIN ){ + if( NULL != descr[id].begin_ev ){ + /* the measurement on this interval was already + * started! */ + opal_timing_event_t *prev = descr[ev->id].begin_ev; + char *file = opal_basename(ev->file); + char *file_prev = opal_basename(prev->file); + opal_output(0,"opal_timing_deltas: duplicated start statement at %s:%d:%s, " + "previous: %s:%d:%s", file, ev->line, ev->func, + file_prev, prev->line, prev->func); + free(file); + free(file_prev); + } else { + /* save pointer to the start of measurement event */ + descr[id].begin_ev = ev; + } + /* done, go to the next event */ + continue; + } + + if( ev->type == OPAL_TIMING_INTEND ){ + if( NULL == descr[id].begin_ev ){ + /* the measurement on this interval wasn't started! */ + char *file = opal_basename(ev->file); + opal_output(0,"opal_timing_deltas: inteval end without start at %s:%d:%s", + file, ev->line, ev->func ); + free(file); + } else { + descr[id].interval += ev->ts - descr[id].begin_ev->ts; + descr[id].begin_ev = NULL; + if( ev->fib ){ + descr[id].overhead += ev->ts_ovh; + } + } + continue; + } + + /* shouldn't ever get here: bad ev->type */ + opal_output(0, "opal_timing_deltas: bad event type %d", ev->type); + assert(0); + } + + buf = malloc(OPAL_TIMING_OUTBUF_SIZE + 1); + if( buf == NULL ){ + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; + } + buf[0] = '\0'; + buf_size = OPAL_TIMING_OUTBUF_SIZE + 1; + buf_used = 0; + for(i = 0; i < t->next_id_cntr; i++){ + char *line = NULL; + size_t line_size; + rc = asprintf(&line,"[%s:%d] %s \"%s\" [OPAL_OVHD] %le\n", + nodename, getpid(), jobid, descr[i].descr_ev->descr, + descr[i].interval - descr[i].overhead); + if( rc < 0 ){ + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; + } + rc = 0; + line_size = strlen(line); + + /* Sanity check: this shouldn't happen since description + * is event only 1KB long and other fields should never + * exceed 9KB */ + assert( line_size <= OPAL_TIMING_OUTBUF_SIZE ); + + if( buf_used + strlen(line) > buf_size ){ + // Increase output buffer + while( buf_used + line_size > buf_size && buf_size < DELTAS_SANE_LIMIT){ + buf_size += OPAL_TIMING_OUTBUF_SIZE + 1; + } + if( buf_size > DELTAS_SANE_LIMIT ){ + opal_output(0, "opal_timing_report: delta sane limit overflow (%u > %u)!\n", + (unsigned int)buf_size, DELTAS_SANE_LIMIT); + free(line); + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; + } + buf = realloc(buf, buf_size); + if( buf == NULL ){ + opal_output(0, "opal_timing_deltas: Out of memory!\n"); + rc = OPAL_ERR_OUT_OF_RESOURCE; + goto err_exit; + } + } + sprintf(buf,"%s%s", buf, line); + buf_used += line_size; + free(line); + } + + + if( buf_used > 0 ){ + // flush buffer to the file + if( fp != NULL ){ + fprintf(fp,"%s", buf); + fprintf(fp,"\n"); + } else { + opal_output(0,"\n%s", buf); + } + buf[0] = '\0'; + buf_size = 0; + } + +err_exit: + if( NULL != descr ){ + free(descr); + } + if( NULL != buf ){ + free(buf); + } + if( fp != NULL ){ + fflush(fp); + fclose(fp); + } + return rc; +} + void opal_timing_release(opal_timing_t *t) { int cnt = opal_list_get_size(t->events); diff --git a/opal/util/timings.h b/opal/util/timings.h index 3cdc07faef..33e0d9ce21 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -11,6 +11,8 @@ #ifndef OPAL_UTIL_TIMING_H #define OPAL_UTIL_TIMING_H +#include "opal_config.h" + #include "opal/class/opal_list.h" #include "opal/runtime/opal_params.h" @@ -20,8 +22,12 @@ #define OPAL_TIMING_BUFSIZE 32 #define OPAL_TIMING_OUTBUF_SIZE (10*1024) -typedef enum { TEVENT, TBEGIN, TEND } opal_event_type_t; - +typedef enum { + OPAL_TIMING_TRACE, + OPAL_TIMING_INTDESCR, + OPAL_TIMING_INTBEGIN, + OPAL_TIMING_INTEND +} opal_event_type_t; typedef struct { opal_list_item_t super; @@ -35,12 +41,20 @@ typedef struct { int id; } opal_timing_event_t; +typedef double (*get_ts_t)(void); + typedef struct opal_timing_t { - int cur_id; + int next_id_cntr; + // not thread safe! + // The whole implementation is not thread safe now + // since it is supposed to be used in orte service + // thread only. Fix in the future or now? + int current_id; opal_list_t *events; opal_timing_event_t *buffer; size_t buffer_offset, buffer_size; + get_ts_t get_ts; } opal_timing_t; typedef struct { @@ -84,7 +98,7 @@ void opal_timing_init(opal_timing_t *t); /** * Prepare timing event, do all printf-like processing. - * Should not be directly used, for service purposes. + * Should not be directly used - for service purposes only. * * @param t pointer to the timing handler structure * @param fmt printf-like format @@ -94,6 +108,18 @@ void opal_timing_init(opal_timing_t *t); */ opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...); +/** + * Prepare timing event, ignore printf-like processing. + * Should not be directly used - for service purposes only. + * + * @param t pointer to the timing handler structure + * @param fmt printf-like format + * @param ... other parameters that should be converted to string representation + * + * @retval partly filled opal_timing_prep_t structure + */ +opal_timing_prep_t opal_timing_prep_ev_end(opal_timing_t *t, const char *fmt, ...); + /** * Enqueue timing event into the list of events in handler 't'. * @@ -104,8 +130,86 @@ opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...); * * @retval */ -void opal_timing_add_step(opal_timing_prep_t p, - const char *func, const char *file, int line); +void opal_timing_add_step(opal_timing_prep_t p, const char *func, + const char *file, int line); + +/** + * Enqueue the description of the interval into a list of events + * in handler 't'. + * + * @param p result of opal_timing_prep_ev + * @param func function name where event occurs + * @param file file name where event occurs + * @param line line number in the file + * + * @retval id of event interval + */ +int opal_timing_descr(opal_timing_prep_t p, const char *func, + const char *file, int line); + +/** + * Enqueue the beginning of timing interval that already has the + * description and assigned id into the list of events + * in handler 't'. + * + * @param p result of opal_timing_prep_ev + * @param func function name where event occurs + * @param file file name where event occurs + * @param line line number in the file + * + * @retval + */ +void opal_timing_start_id(opal_timing_t *t, int id, const char *func, + const char *file, int line); + +/** + * Enqueue the end of timing interval that already has + * description and assigned id into the list of events + * in handler 't'. + * + * @param p result of opal_timing_prep_ev + * @param func function name where event occurs + * @param file file name where event occurs + * @param line line number in the file + * + * @retval + */ +void opal_timing_end(opal_timing_t *t, int id, const char *func, + const char *file, int line ); + +/** + * Enqueue both description and start of timing interval + * into the list of events and assign its id. + * + * @param p result of opal_timing_prep_ev + * @param func function name where event occurs + * @param file file name where event occurs + * @param line line number in the file + * + * @retval interval id + */ +static inline int opal_timing_start_init(opal_timing_prep_t p, + const char *func, const char *file, int line) +{ + int id = opal_timing_descr(p, func, file, line); + if( id < 0 ) + return id; + opal_timing_start_id(p.t, id, func, file, line); + return id; +} + +/** + * The wrapper that is used to stop last measurement in OPAL_TIMING_MNEXT. + * + * @param p result of opal_timing_prep_ev + * @param func function name where event occurs + * @param file file name where event occurs + * @param line line number in the file + * + * @retval interval id + */ +void opal_timing_end_prep(opal_timing_prep_t p, + const char *func, const char *file, int line); /** * Report all events that were enqueued in the timing handler 't'. @@ -116,14 +220,31 @@ void opal_timing_add_step(opal_timing_prep_t p, * disappear. * * @param t timing handler - * @param accovh consider malloc overhead introduced by timing code + * @param account_overhead consider malloc overhead introduced by timing code * @param prefix prefix to use when no fname was specifyed to ease grep'ing * @param fname name of the output file (may be NULL) * * @retval OPAL_SUCCESS On success * @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure */ -int opal_timing_report(opal_timing_t *t, bool accovh, const char *prefix, char *fname); +int opal_timing_report(opal_timing_t *t, char *fname); + +/** + * Report all intervals that were enqueued in the timing handler 't'. + * - if fname == NULL the output will be done using opal_output and + * each line will be prefixed with "prefix" to ease grep'ing. + * - otherwise the corresponding file will be used for output in "append" mode + * WARRNING: not all filesystems provide enough support for that feature, some records may + * disappear. + * + * @param t timing handler + * @param account_overhead consider malloc overhead introduced by timing code + * @param fname name of the output file (may be NULL) + * + * @retval OPAL_SUCCESS On success + * @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure + */ +int opal_timing_deltas(opal_timing_t *t, char *fname); /** * Release all memory allocated for the timing handler 't'. @@ -140,7 +261,7 @@ void opal_timing_release(opal_timing_t *t); * --enable-timing. * */ -#define OPAL_TIMING_DECLARE(t) opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled +#define OPAL_TIMING_DECLARE(t) opal_timing_t t; /* need semicolon here to avoid warnings when not enabled */ /** * Main macro for use in declaring external opal timing handler; @@ -148,7 +269,7 @@ void opal_timing_release(opal_timing_t *t); * --enable-timing. * */ -#define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled +#define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; /* need semicolon here to avoid warnings when not enabled */ /** * Main macro for use in initializing opal timing handler; @@ -160,7 +281,8 @@ void opal_timing_release(opal_timing_t *t); #define OPAL_TIMING_INIT(t) opal_timing_init(t) /** - * Main macro for use in adding new timing event for the specifyed timing handler; + * Macro that enqueues event with its description to the specified + * timing handler; * will be "compiled out" when OPAL is configured without * --enable-timing. * @@ -169,16 +291,103 @@ void opal_timing_release(opal_timing_t *t); #define OPAL_TIMING_EVENT(x) opal_timing_add_step( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) /** - * Main macro for use in reporting collected events; + * MDESCR: Measurement DESCRiption + * Introduce new timing measurement with string description for the specified + * timing handler; + * will be "compiled out" when OPAL is configured without + * --enable-timing. + * + * @see opal_timing_descr() + */ +#define OPAL_TIMING_MDESCR(x) opal_timing_descr( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) + +/** + * MSTART_ID: Measurement START by ID. + * Marks the beginning of the measurement with ID=id on the + * specified timing handler; + * will be "compiled out" when OPAL is configured without + * --enable-timing. + * + * @see opal_timing_start_id() + */ +#define OPAL_TIMING_MSTART_ID(t, id) opal_timing_start_id(t, id, __FUNCTION__, __FILE__, __LINE__) + +/** + * MSTART: Measurement START + * Introduce new timing measurement conjuncted with its start + * on the specifyed timing handler; + * will be "compiled out" when OPAL is configured without + * --enable-timing. + * + * @see opal_timing_start_init() + */ +#define OPAL_TIMING_MSTART(x) opal_timing_start_init( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) + +/** + * MSTOP: STOP Measurement + * Finishes the most recent measurement on the specifyed timing handler; + * will be "compiled out" when OPAL is configured without + * --enable-timing. + * + * @see opal_timing_end() + */ +#define OPAL_TIMING_MSTOP(t) opal_timing_end(t, -1, __FUNCTION__, __FILE__, __LINE__) + +/** + * MSTOP_ID: STOP Measurement with ID=id. + * Finishes the measurement with give ID on the specifyed timing handler; + * will be "compiled out" when OPAL is configured without + * --enable-timing. + * + * @see opal_timing_end() + */ +#define OPAL_TIMING_MSTOP_ID(t, id) opal_timing_end(t, id, __FUNCTION__, __FILE__, __LINE__) + +/** + * MNEXT: start NEXT Measurement + * Convinient macro, may be implemented with the sequence of three previously + * defined macroses: + * - finish current measurement (OPAL_TIMING_MSTOP); + * - introduce new timing measurement (OPAL_TIMING_MDESCR); + * - starts next measurement (OPAL_TIMING_MSTART_ID) + * on the specifyed timing handler; + * will be "compiled out" when OPAL is configured without + * --enable-timing. + * + * @see opal_timing_start_init() + */ +#define OPAL_TIMING_MNEXT(x) ( \ + opal_timing_end_prep(opal_timing_prep_ev_end x, \ + __FUNCTION__, __FILE__, __LINE__ ), \ + opal_timing_start_init( opal_timing_prep_ev x, \ + __FUNCTION__, __FILE__, __LINE__) \ +) + +/** + * The macro for use in reporting collected events with absolute values; * will be "compiled out" when OPAL is configured without * --enable-timing. * * @param enable flag that enables/disables reporting. Used for fine-grained timing. * @see opal_timing_report() */ -#define OPAL_TIMING_REPORT(enable, t, prefix) { \ +#define OPAL_TIMING_REPORT(enable, t) { \ if( enable ) { \ - opal_timing_report(t, opal_timing_overhead, prefix, opal_timing_output); \ + opal_timing_report(t, opal_timing_output); \ + } \ +} + +/** + * The macro for use in reporting collected events with relative times; + * will be "compiled out" when OPAL is configured without + * --enable-timing. + * + * @param enable flag that enables/disables reporting. Used for fine-grained timing. + * @see opal_timing_deltas() + */ +#define OPAL_TIMING_DELTAS(enable, t) { \ + if( enable ) { \ + opal_timing_deltas(t, opal_timing_output); \ } \ } @@ -201,7 +410,21 @@ void opal_timing_release(opal_timing_t *t); #define OPAL_TIMING_EVENT(x) -#define OPAL_TIMING_REPORT(enable, t, prefix) +#define OPAL_TIMING_MDESCR(x) + +#define OPAL_TIMING_MSTART_ID(t, id) + +#define OPAL_TIMING_MSTART(x) + +#define OPAL_TIMING_MSTOP(t) + +#define OPAL_TIMING_MSTOP_ID(t, id) + +#define OPAL_TIMING_MNEXT(x) + +#define OPAL_TIMING_REPORT(enable, t) + +#define OPAL_TIMING_DELTAS(enable, t) #define OPAL_TIMING_RELEASE(t) diff --git a/orte/mca/oob/base/oob_base_frame.c b/orte/mca/oob/base/oob_base_frame.c index cab8e6e460..757e1d5340 100644 --- a/orte/mca/oob/base/oob_base_frame.c +++ b/orte/mca/oob/base/oob_base_frame.c @@ -107,7 +107,7 @@ static int orte_oob_base_close(void) OPAL_TIMING_EVENT((&tm_oob, "Finish")); - OPAL_TIMING_REPORT(orte_oob_base.timing, &tm_oob, "COMM"); + OPAL_TIMING_REPORT(orte_oob_base.timing, &tm_oob); return mca_base_framework_components_close(&orte_oob_base_framework, NULL); } diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 3e558a5b27..c973af8198 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -83,7 +83,7 @@ static int orte_rml_base_close(void) } OBJ_DESTRUCT(&orte_rml_base.posted_recvs); - OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml, "RML"); + OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml); return mca_base_framework_components_close(&orte_rml_base_framework, NULL); }