From 79100de0142302336a632bfc7e840db861f70c5b Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Tue, 28 Mar 2017 23:05:27 +0700 Subject: [PATCH 1/9] opal/timing: Remove oob tracing Signed-off-by: Artem Polyakov --- orte/mca/oob/base/base.h | 2 -- orte/mca/oob/base/oob_base_frame.c | 7 ------- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 13 ------------- 3 files changed, 22 deletions(-) diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 21595f26ad..322ba0be1b 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -49,8 +49,6 @@ BEGIN_C_DECLS -OPAL_TIMING_DECLARE_EXT(ORTE_DECLSPEC, tm_oob) - /* * Convenience Typedef */ diff --git a/orte/mca/oob/base/oob_base_frame.c b/orte/mca/oob/base/oob_base_frame.c index a20d20010e..56ec2ad8fc 100644 --- a/orte/mca/oob/base/oob_base_frame.c +++ b/orte/mca/oob/base/oob_base_frame.c @@ -52,8 +52,6 @@ * Global variables */ orte_oob_base_t orte_oob_base = {0}; -OPAL_TIMING_DECLARE(tm_oob) - static int orte_oob_base_register(mca_base_register_flag_t flags) { @@ -109,9 +107,6 @@ static int orte_oob_base_close(void) OBJ_DESTRUCT(&orte_oob_base.peers); - OPAL_TIMING_EVENT((&tm_oob, "Finish")); - OPAL_TIMING_REPORT(orte_oob_base.timing, &tm_oob); - return mca_base_framework_components_close(&orte_oob_base_framework, NULL); } @@ -137,8 +132,6 @@ static int orte_oob_base_open(mca_base_open_flag_t flags) orte_state.add_job_state(ORTE_JOB_STATE_FT_RESTART, orte_oob_base_ft_event, ORTE_ERROR_PRI); #endif - OPAL_TIMING_INIT(&tm_oob); - /* Open up all available components */ return mca_base_framework_components_open(&orte_oob_base_framework, flags); } diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 3021f55faa..dcb3c9fafd 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -110,9 +110,6 @@ static int send_msg(mca_oob_tcp_peer_t* peer, mca_oob_tcp_send_t* msg) int iov_count, retries = 0; ssize_t remain = msg->sdbytes, rc; - OPAL_TIMING_EVENT((&tm_oob, "to %s %d bytes", - ORTE_NAME_PRINT(&(peer->name)), msg->sdbytes)); - iov[0].iov_base = msg->sdptr; iov[0].iov_len = msg->sdbytes; if (!msg->hdr_sent) { @@ -420,9 +417,6 @@ static int read_bytes(mca_oob_tcp_peer_t* peer) peer->recv_msg->rdptr += rc; } - OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes", - ORTE_NAME_PRINT(&(peer->name)), to_read)); - /* we read the full data block */ return ORTE_SUCCESS; } @@ -516,8 +510,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) #if OPAL_ENABLE_TIMING timing_same_as_hdr = true; #endif - OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes [header]", - ORTE_NAME_PRINT(&(peer->name)), to_recv)); /* completed reading the header */ peer->recv_msg->hdr_recvd = true; /* convert the header */ @@ -570,11 +562,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst), peer->recv_msg->hdr.tag); - OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes [body:%s]", - ORTE_NAME_PRINT(&(peer->name)), - (int)peer->recv_msg->hdr.nbytes, - (timing_same_as_hdr) ? "same" : "next")); - /* am I the intended recipient (header was already converted back to host order)? */ if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid && peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { From 482d7c93223e41cc914af11cb9f3b414d3dcc10a Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Tue, 28 Mar 2017 23:38:33 +0700 Subject: [PATCH 2/9] opal/timing: remove RML timings Signed-off-by: Artem Polyakov --- orte/mca/rml/base/base.h | 2 -- orte/mca/rml/base/rml_base_frame.c | 4 ---- orte/mca/rml/base/rml_base_msg_handlers.c | 3 --- orte/mca/rml/oob/rml_oob_send.c | 4 ---- 4 files changed, 13 deletions(-) diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index ead6f1d10d..f8cc4b1c0b 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -55,8 +55,6 @@ BEGIN_C_DECLS -OPAL_TIMING_DECLARE_EXT(ORTE_DECLSPEC, tm_rml) - /* * MCA Framework */ diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 61e4f4cd9a..803bf2db97 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -56,7 +56,6 @@ orte_rml_base_API_t orte_rml = { }; orte_rml_base_t orte_rml_base = {{{0}}}; -OPAL_TIMING_DECLARE(tm_rml) orte_rml_component_t *orte_rml_component = NULL; @@ -136,8 +135,6 @@ static int orte_rml_base_close(void) cleanup(0, 0, NULL); } - OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml); - return mca_base_framework_components_close(&orte_rml_base_framework, NULL); } @@ -151,7 +148,6 @@ static int orte_rml_base_open(mca_base_open_flag_t flags) OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t); opal_pointer_array_init(&orte_rml_base.conduits,1,INT_MAX,1); - OPAL_TIMING_INIT(&tm_rml); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); } diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 6652f9ad52..0772a5d3a6 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -165,9 +165,6 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) ORTE_NAME_PRINT(&msg->sender), msg->tag)); - OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes", - ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len)); - /* if this message is just to warmup the connection, then drop it */ if (ORTE_RML_TAG_WARMUP_CONNECTION == msg->tag) { OBJ_RELEASE(msg); diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index be11359486..7b56c60bda 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -99,8 +99,6 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, return ORTE_ERR_BAD_PARAM; } - OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer))); - /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ @@ -207,8 +205,6 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, return ORTE_ERR_BAD_PARAM; } - OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer))); - /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ From 1063c0d5679eae95d81a4288588cce0d9dfc262b Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 02:33:41 +0700 Subject: [PATCH 3/9] opal/timing: remove timings from MPI_Init and MPI_Finalize Signed-off-by: Artem Polyakov --- ompi/runtime/ompi_mpi_finalize.c | 12 ++++-------- ompi/runtime/ompi_mpi_init.c | 30 ++++++++++-------------------- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index efa3f7fbb2..3425329038 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -91,7 +91,6 @@ #include "ompi/runtime/ompi_cr.h" extern bool ompi_enable_timing; -extern bool ompi_enable_timing_ext; static void fence_cbfunc(int status, void *cbdata) { @@ -108,8 +107,8 @@ int ompi_mpi_finalize(void) volatile bool active; uint32_t key; ompi_datatype_t * datatype; - OPAL_TIMING_DECLARE(tm); - OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); + //OPAL_TIMING_DECLARE(tm); + //OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); ompi_hook_base_mpi_finalize_top(); @@ -176,7 +175,7 @@ int ompi_mpi_finalize(void) opal_progress_event_users_increment(); /* check to see if we want timing information */ - OPAL_TIMING_MSTART((&tm,"time to execute finalize barrier")); + //OPAL_TIMING_MSTART((&tm,"time to execute finalize barrier")); /* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across *all* connected processes. This only means that all processes @@ -279,10 +278,7 @@ int ompi_mpi_finalize(void) /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_MSTOP(&tm); - OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); - OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); - OPAL_TIMING_RELEASE(&tm); + //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); /* * Shutdown the Checkpoint/Restart Mech. diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index a39424ff80..ef8ecfecb3 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -279,7 +279,7 @@ opal_hash_table_t ompi_mpi_f90_complex_hashtable = {{0}}; */ opal_list_t ompi_registered_datareps = {{0}}; -bool ompi_enable_timing = false, ompi_enable_timing_ext = false; +bool ompi_enable_timing = false; extern bool ompi_mpi_yield_when_idle; extern int ompi_mpi_event_tick_rate; @@ -356,13 +356,6 @@ static int ompi_register_mca_variables(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_enable_timing); - ompi_enable_timing_ext = false; - (void) mca_base_var_register("ompi", "ompi", NULL, "timing_ext", - "Request that critical timing loops be measured", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &ompi_enable_timing_ext); return OMPI_SUCCESS; } @@ -382,8 +375,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) volatile bool active; opal_list_t info; opal_value_t *kv; - OPAL_TIMING_DECLARE(tm); - OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); + //OPAL_TIMING_DECLARE(tm); + //OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); /* bitflag of the thread level support provided. To be used * for the modex in order to work in heterogeneous environments. */ @@ -486,7 +479,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); - OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); + //OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); /* if we were not externally started, then we need to setup * some envars so the MPI_INFO_ENV can get the cmd name @@ -518,7 +511,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_initialized = true; /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); + //OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); /* Register the default errhandler callback */ errtrk.status = OPAL_ERROR; @@ -646,7 +639,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } /* check for timing request - get stop time and report elapsed time if so */ - OPAL_TIMING_MNEXT((&tm,"time to execute modex")); + //OPAL_TIMING_MNEXT((&tm,"time to execute modex")); /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs @@ -664,7 +657,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } } - OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); + //OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); /* select buffered send allocator component to be used */ if( OMPI_SUCCESS != @@ -825,7 +818,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_wait_for_debugger(); /* Next timing measurement */ - OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); + //OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -843,7 +836,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* check for timing request - get stop time and report elapsed time if so, then start the clock again */ - OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); + //OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); #if OPAL_ENABLE_PROGRESS_THREADS == 0 /* Start setting up the event engine for MPI operations. Don't @@ -976,10 +969,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Finish last measurement, output results * and clear timing structure */ - OPAL_TIMING_MSTOP(&tm); - OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); - OPAL_TIMING_REPORT(ompi_enable_timing_ext, &tm); - OPAL_TIMING_RELEASE(&tm); + //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); From 88ed79ea25d14f03bdeb39d683b5056b7be8734c Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 02:47:06 +0700 Subject: [PATCH 4/9] opal/timing: remove old framework Signed-off-by: Artem Polyakov --- opal/runtime/opal_params.c | 28 -- opal/util/timings.c | 664 +------------------------------------ opal/util/timings.h | 421 +---------------------- orte/runtime/orte_init.c | 4 - 4 files changed, 3 insertions(+), 1114 deletions(-) diff --git a/opal/runtime/opal_params.c b/opal/runtime/opal_params.c index 9be92b8086..8d1cebc21e 100644 --- a/opal/runtime/opal_params.c +++ b/opal/runtime/opal_params.c @@ -272,34 +272,6 @@ int opal_register_params(void) true); } -#if OPAL_ENABLE_TIMING - opal_timing_sync_file = NULL; - (void) mca_base_var_register ("opal", "opal", NULL, "timing_sync_file", - "Clock synchronisation information generated by mpisync tool. You don't need to touch this if you use mpirun_prof tool.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &opal_timing_sync_file); - if( opal_timing_clocksync_read(opal_timing_sync_file) ){ - opal_output(0, "Cannot read file %s containing clock synchronisation information\n", opal_timing_sync_file); - } - - opal_timing_output = NULL; - (void) mca_base_var_register ("opal", "opal", NULL, "timing_output", - "The name of output file for timing information. If this parameter is not set then output will be directed into OPAL debug channel.", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &opal_timing_output); - - opal_timing_overhead = true; - (void) mca_base_var_register ("opal", "opal", NULL, "timing_overhead", - "Timing framework introduce additional overhead (malloc's mostly)." - " The time spend in such costly routines is measured and may be accounted" - " (subtracted from timestamps). 'true' means consider overhead, 'false' - ignore (default: true).", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL, - &opal_timing_overhead); -#endif - opal_warn_on_fork = true; (void) mca_base_var_register("ompi", "mpi", NULL, "warn_on_fork", "If nonzero, issue a warning if program forks under conditions that could cause system errors", diff --git a/opal/util/timings.c b/opal/util/timings.c index e2e0e45bba..2e75e3bfcd 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -41,107 +41,7 @@ #include MCA_timer_IMPLEMENTATION_HEADER -#define DELTAS_SANE_LIMIT (10*1024*1024) - -/* -static void debug_hang(int i) -{ - while( i ){ - sleep(1); - } -} -*/ - -struct interval_descr{ - opal_timing_event_t *descr_ev, *begin_ev; - double interval, overhead; -}; - -static OBJ_CLASS_INSTANCE(opal_timing_event_t, opal_list_item_t, NULL, NULL); - - -opal_mutex_t tm_lock; -static char *nodename = NULL; -static char *jobid = ""; -static double hnp_offs = 0; -static double hnp_rtt = 0; - -int opal_timing_clocksync_read(char *fname) -{ - int rc = 0; - FILE *fp = NULL; - char *line = NULL; - size_t n; - bool found = false; - char *ptr = NULL; - - char hname[OPAL_MAXHOSTNAMELEN] = "NA"; - if( gethostname(hname, sizeof(hname)) ){ - opal_output(0, "opal_timing_clocksync_read(%s): Cannot gethostname", - fname); - } - nodename = strdup(hname); - // Strip domain name - ptr = strchr(nodename,'.'); - if( ptr != NULL ){ - *ptr = '\0'; - } - - if( fname == NULL ){ - return 0; - } - - fp = fopen(fname,"r"); - if( fp == NULL ){ - opal_output(0, "opal_timing_clocksync_read(%s): Cannot open the file",fname); - return OPAL_ERROR; - } - - while( getline(&line,&n,fp) > 0 ){ - ptr = strchr(line,' '); - if( ptr == NULL ){ - rc = -1; - goto err_exit; - } - *ptr = '\0'; - ptr++; - if( strcmp(line, hname) == 0 ){ - if( sscanf(ptr,"%lf %lf", &hnp_rtt, &hnp_offs) != 2 ){ - rc = -1; - goto err_exit; - } - found = true; - break; - } - } - - if( !found ){ - opal_output(0,"opal_timing_clocksync_read: Can't find my host %s in %s", hname, fname); - rc = OPAL_ERROR; - } - -err_exit: - - if( line != NULL ){ - free(line); - } - - if( fp != NULL ){ - fclose(fp); - } - return rc; -} - -int opal_timing_set_jobid(char *jid) -{ - jobid = strdup(jid); - if( jobid == NULL ){ - return OPAL_ERROR; - } - return 0; -} - -/* Get current timestamp. Derived from MPI_Wtime */ +static opal_mutex_t tm_lock; static double get_ts_gettimeofday(void) { @@ -168,7 +68,7 @@ static double get_ts_usec(void) } #endif -static get_ts_t _init_timestamping(opal_timer_type_t type) +opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type) { switch (type) { case OPAL_TIMING_GET_TIME_OF_DAY: @@ -199,563 +99,3 @@ static get_ts_t _init_timestamping(opal_timer_type_t type) } } -static opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t) -{ - if( t->buffer_offset >= t->buffer_size ){ - // notch timings overhead - double alloc_begin = t->get_ts(); - - t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size); - if( t->buffer == NULL ){ - return NULL; - } - memset(t->buffer, 0, sizeof(opal_timing_event_t)*t->buffer_size); - - double alloc_end = t->get_ts(); - - t->buffer_offset = 0; - t->buffer[0].fib = 1; - t->buffer[0].ts_ovh = alloc_end - alloc_begin; - } - int tmp = t->buffer_offset; - (t->buffer_offset)++; - return t->buffer + tmp; -} - -int opal_timing_init(opal_timing_t *t, opal_timer_type_t type) -{ - memset(t,0,sizeof(*t)); - - t->next_id_cntr = 0; - t->current_id = -1; - /* initialize events list */ - t->events = OBJ_NEW(opal_list_t); - /* Set buffer size */ - t->buffer_size = OPAL_TIMING_BUFSIZE; - /* Set buffer_offset = buffer_size so new buffer - * will be allocated at first event report */ - t->buffer_offset = t->buffer_size; - /* initialize gettime function */ - t->get_ts = _init_timestamping(type); - if (NULL == t->get_ts) { - return OPAL_ERR_BAD_PARAM; - } - return OPAL_SUCCESS; -} - -opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...) -{ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - opal_timing_prep_t p = { t, NULL, OPAL_ERR_OUT_OF_RESOURCE }; - return p; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - ev->ts = t->get_ts(); - va_list args; - va_start( args, fmt ); - vsnprintf(ev->descr, OPAL_TIMING_DESCR_MAX - 1, fmt, args); - ev->descr[OPAL_TIMING_DESCR_MAX-1] = '\0'; - va_end( args ); - opal_timing_prep_t p = { t, ev, 0 }; - return p; -} - -opal_timing_prep_t opal_timing_prep_ev_end(opal_timing_t *t, const char *fmt, ...) -{ - opal_timing_prep_t p = { t, NULL, 0 }; - - if( t->current_id >= 0 ){ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - opal_timing_prep_t p = { t, NULL, OPAL_ERR_OUT_OF_RESOURCE }; - return p; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - ev->ts = t->get_ts(); - p.ev = ev; - } - return p; -} - -void opal_timing_add_step(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - if( !p.errcode ) { - p.ev->func = func; - p.ev->file = file; - p.ev->line = line; - p.ev->type = OPAL_TIMING_TRACE; - opal_list_append(p.t->events, (opal_list_item_t*)p.ev); - } -} - -/* Add description of the interval */ -int opal_timing_descr(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - if( !p.errcode ){ - p.ev->func = func; - p.ev->file = file; - p.ev->line = line; - p.ev->type = OPAL_TIMING_INTDESCR; - p.ev->id = p.t->next_id_cntr; - (p.t->next_id_cntr)++; - opal_list_append(p.t->events, (opal_list_item_t*)p.ev); - return p.ev->id; - } - return -1; -} - -void opal_timing_start_id(opal_timing_t *t, int id, const char *func, const char *file, int line) -{ - /* No description is needed. If everything is OK - * it'll be included in opal_timing_start_init */ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - return; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - - t->current_id = id; - ev->ts = t->get_ts(); - ev->func = func; - ev->file = file; - ev->line = line; - ev->type = OPAL_TIMING_INTBEGIN; - ev->id = id; - opal_list_append(t->events, (opal_list_item_t*)ev); -} - -void opal_timing_end(opal_timing_t *t, int id, const char *func, const char *file, int line ) -{ - /* No description is needed. If everything is OK - * it'll be included in opal_timing_start_init */ - opal_timing_event_t *ev = opal_timing_event_alloc(t); - if( ev == NULL ){ - return; - } - OBJ_CONSTRUCT(ev, opal_timing_event_t); - - if( id < 0 ){ - ev->id = t->current_id; - t->current_id = -1; - } else { - if( t->current_id == id ){ - t->current_id = -1; - } - ev->id = id; - } - ev->ts = t->get_ts(); - ev->func = func; - ev->file = file; - ev->line = line; - ev->type = OPAL_TIMING_INTEND; - opal_list_append(t->events, (opal_list_item_t*)ev); -} - -void opal_timing_end_prep(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - opal_timing_event_t *ev = p.ev; - - if( !p.errcode && ( NULL != ev ) ){ - assert( p.t->current_id >=0 ); - ev->id = p.t->current_id; - p.t->current_id = -1; - ev->func = func; - ev->file = file; - ev->line = line; - ev->type = OPAL_TIMING_INTEND; - opal_list_append(p.t->events, (opal_list_item_t*)ev); - } -} - -static int _prepare_descriptions(opal_timing_t *t, struct interval_descr **__descr) -{ - struct interval_descr *descr; - opal_timing_event_t *ev, *next; - - if( t->next_id_cntr == 0 ){ - return 0; - } - - *__descr = malloc(sizeof(struct interval_descr) * t->next_id_cntr); - descr = *__descr; - memset(descr, 0, sizeof(struct interval_descr) * t->next_id_cntr); - - OPAL_LIST_FOREACH_SAFE(ev, next, t->events, opal_timing_event_t){ - - /* opal_output(0,"EVENT: type = %d, id=%d, ts = %.12le, ovh = %.12le %s", - ev->type, ev->id, ev->ts, ev->ts_ovh, - ev->descr ); - */ - switch(ev->type){ - case OPAL_TIMING_INTDESCR:{ - if( ev->id >= t->next_id_cntr){ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing: bad event id at %s:%d:%s, ignore and remove", - file, ev->line, ev->func); - free(file); - opal_list_remove_item(t->events, (opal_list_item_t *)ev); - continue; - } - if( NULL != descr[ev->id].descr_ev ){ - opal_timing_event_t *prev = descr[ev->id].descr_ev; - char *file = opal_basename(ev->file); - char *file_prev = opal_basename(prev->file); - opal_output(0,"opal_timing: duplicated description at %s:%d:%s, " - "previous: %s:%d:%s, ignore and remove", file, ev->line, ev->func, - file_prev, prev->line, prev->func); - free(file); - free(file_prev); - opal_list_remove_item(t->events, (opal_list_item_t *)ev); - continue; - } - - descr[ev->id].descr_ev = ev; - descr[ev->id].begin_ev = NULL; - descr[ev->id].interval = 0; - descr[ev->id].overhead = 0; - break; - } - case OPAL_TIMING_INTBEGIN: - case OPAL_TIMING_INTEND:{ - if( ev->id >= t->next_id_cntr || (NULL == descr[ev->id].descr_ev ) ){ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing: bad event id at %s:%d:%s, ignore and remove", - file, ev->line, ev->func); - free(file); - opal_list_remove_item(t->events, (opal_list_item_t *)ev); - continue; - } - break; - } - case OPAL_TIMING_TRACE: - break; - } - } - return t->next_id_cntr; -} - -/* Output lines in portions that doesn't - * exceed OPAL_TIMING_OUTBUF_SIZE for later automatic processing */ -int opal_timing_report(opal_timing_t *t, char *fname) -{ - opal_timing_event_t *ev; - FILE *fp = NULL; - char *buf = NULL; - int buf_size = 0; - struct interval_descr *descr = NULL; - int rc = OPAL_SUCCESS; - - if( fname != NULL ){ - fp = fopen(fname,"a"); - if( fp == NULL ){ - opal_output(0, "opal_timing_report: Cannot open %s file" - " for writing timing information!",fname); - rc = OPAL_ERROR; - goto err_exit; - } - } - - _prepare_descriptions(t, &descr); - - buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1); - if( buf == NULL ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - buf[0] = '\0'; - - double overhead = 0; - OPAL_LIST_FOREACH(ev, t->events, opal_timing_event_t){ - char *line, *file; - if( ev->fib && opal_timing_overhead ){ - overhead += ev->ts_ovh; - } - file = opal_basename(ev->file); - switch( ev->type ){ - case OPAL_TIMING_INTDESCR: - // Service event, skip it. - continue; - case OPAL_TIMING_TRACE: - rc = asprintf(&line,"[%s:%d] %s \"%s\" [OPAL_TRACE] %s:%d %.10lf\n", - nodename, getpid(), jobid, ev->descr, file, ev->line, - ev->ts + hnp_offs + overhead); - break; - case OPAL_TIMING_INTBEGIN: - rc = asprintf(&line,"[%s:%d] %s \"%s [start]\" [OPAL_TRACE] %s:%d %.10lf\n", - nodename, getpid(), jobid, descr[ev->id].descr_ev->descr, - file, ev->line, ev->ts + hnp_offs + overhead); - break; - case OPAL_TIMING_INTEND: - rc = asprintf(&line,"[%s:%d] %s \"%s [stop]\" [OPAL_TRACE] %s:%d %.10lf\n", - nodename, getpid(), jobid, descr[ev->id].descr_ev->descr, - file, ev->line, ev->ts + hnp_offs + overhead); - break; - } - free(file); - - if( rc < 0 ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - rc = 0; - - /* Sanity check: this shouldn't happen since description - * is event only 1KB long and other fields should never - * exceed 9KB */ - assert( strlen(line) <= OPAL_TIMING_OUTBUF_SIZE ); - - - if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){ - // flush buffer to the file - if( fp != NULL ){ - fprintf(fp,"%s", buf); - fprintf(fp,"\n"); - } else { - opal_output(0,"\n%s", buf); - } - buf[0] = '\0'; - buf_size = 0; - } - sprintf(buf,"%s%s", buf, line); - buf_size += strlen(line); - free(line); - } - - if( buf_size > 0 ){ - // flush buffer to the file - if( fp != NULL ){ - fprintf(fp,"%s", buf); - fprintf(fp,"\n"); - } else { - opal_output(0,"\n%s", buf); - } - buf[0] = '\0'; - buf_size = 0; - } - -err_exit: - if( NULL != descr ){ - free(descr); - } - if( buf != NULL ){ - free(buf); - } - if( fp != NULL ){ - fflush(fp); - fclose(fp); - } - return rc; -} - -/* Output events as one buffer so the data won't be mixed - * with other output. This function is supposed to be human readable. - * The output goes only to stdout. */ -int opal_timing_deltas(opal_timing_t *t, char *fname) -{ - opal_timing_event_t *ev; - FILE *fp = NULL; - char *buf = NULL; - struct interval_descr *descr = NULL; - int i, rc = OPAL_SUCCESS; - size_t buf_size = 0, buf_used = 0; - - if( fname != NULL ){ - fp = fopen(fname,"a"); - if( fp == NULL ){ - opal_output(0, "opal_timing_report: Cannot open %s file" - " for writing timing information!",fname); - rc = OPAL_ERROR; - goto err_exit; - } - } - - _prepare_descriptions(t, &descr); - - OPAL_LIST_FOREACH(ev, t->events, opal_timing_event_t){ - int id; - if( ev->fib ){ - /* this event caused buffered memory allocation - * for events. Account the overhead for all active - * intervals. */ - int i; - for( i = 0; i < t->next_id_cntr; i++){ - if( (NULL != descr[i].descr_ev) && (NULL != descr[i].begin_ev) ){ - if( opal_timing_overhead ){ - descr[i].overhead += ev->ts_ovh; - } - } - } - } - - /* we already process all OPAL_TIMING_DESCR events - * and we ignore OPAL_TIMING_EVENT */ - if( ev->type == OPAL_TIMING_INTDESCR || - ev->type == OPAL_TIMING_TRACE){ - /* skip */ - continue; - } - - id = ev->id; - if( id < 0 || id >= t->next_id_cntr ){ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing_deltas: bad interval event id: %d at %s:%d:%s (maxid=%d)", - id, file, ev->line, ev->func, t->next_id_cntr - 1 ); - free(file); - /* skip */ - continue; - } - - /* id's assigned auomatically. Ther shouldn't be any gaps in descr[] */ - assert( NULL != descr[id].descr_ev); - - if( ev->type == OPAL_TIMING_INTBEGIN ){ - if( NULL != descr[id].begin_ev ){ - /* the measurement on this interval was already - * started! */ - opal_timing_event_t *prev = descr[ev->id].begin_ev; - char *file = opal_basename(ev->file); - char *file_prev = opal_basename(prev->file); - opal_output(0,"opal_timing_deltas: duplicated start statement at %s:%d:%s, " - "previous: %s:%d:%s", file, ev->line, ev->func, - file_prev, prev->line, prev->func); - free(file); - free(file_prev); - } else { - /* save pointer to the start of measurement event */ - descr[id].begin_ev = ev; - } - /* done, go to the next event */ - continue; - } - - if( ev->type == OPAL_TIMING_INTEND ){ - if( NULL == descr[id].begin_ev ){ - /* the measurement on this interval wasn't started! */ - char *file = opal_basename(ev->file); - opal_output(0,"opal_timing_deltas: inteval end without start at %s:%d:%s", - file, ev->line, ev->func ); - free(file); - } else { - descr[id].interval += ev->ts - descr[id].begin_ev->ts; - descr[id].begin_ev = NULL; - if( ev->fib ){ - descr[id].overhead += ev->ts_ovh; - } - } - continue; - } - - /* shouldn't ever get here: bad ev->type */ - opal_output(0, "opal_timing_deltas: bad event type %d", ev->type); - assert(0); - } - - buf = malloc(OPAL_TIMING_OUTBUF_SIZE + 1); - if( buf == NULL ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - buf[0] = '\0'; - buf_size = OPAL_TIMING_OUTBUF_SIZE + 1; - buf_used = 0; - for(i = 0; i < t->next_id_cntr; i++){ - char *line = NULL; - size_t line_size; - rc = asprintf(&line,"[%s:%d] %s \"%s\" [OPAL_OVHD] %le\n", - nodename, getpid(), jobid, descr[i].descr_ev->descr, - descr[i].interval - descr[i].overhead); - if( rc < 0 ){ - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - rc = 0; - line_size = strlen(line); - - /* Sanity check: this shouldn't happen since description - * is event only 1KB long and other fields should never - * exceed 9KB */ - assert( line_size <= OPAL_TIMING_OUTBUF_SIZE ); - - if( buf_used + strlen(line) > buf_size ){ - // Increase output buffer - while( buf_used + line_size > buf_size && buf_size < DELTAS_SANE_LIMIT){ - buf_size += OPAL_TIMING_OUTBUF_SIZE + 1; - } - if( buf_size > DELTAS_SANE_LIMIT ){ - opal_output(0, "opal_timing_report: delta sane limit overflow (%u > %u)!\n", - (unsigned int)buf_size, DELTAS_SANE_LIMIT); - free(line); - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - buf = realloc(buf, buf_size); - if( buf == NULL ){ - opal_output(0, "opal_timing_deltas: Out of memory!\n"); - rc = OPAL_ERR_OUT_OF_RESOURCE; - goto err_exit; - } - } - sprintf(buf,"%s%s", buf, line); - buf_used += line_size; - free(line); - } - - - if( buf_used > 0 ){ - // flush buffer to the file - if( fp != NULL ){ - fprintf(fp,"%s", buf); - fprintf(fp,"\n"); - } else { - opal_output(0,"\n%s", buf); - } - buf[0] = '\0'; - buf_size = 0; - } - -err_exit: - if( NULL != descr ){ - free(descr); - } - if( NULL != buf ){ - free(buf); - } - if( fp != NULL ){ - fflush(fp); - fclose(fp); - } - return rc; -} - -void opal_timing_release(opal_timing_t *t) -{ - int cnt = opal_list_get_size(t->events); - - if( cnt > 0 ){ - opal_list_t *tmp = OBJ_NEW(opal_list_t); - int i; - for(i=0; ievents); - if( ev->fib ){ - opal_list_append(tmp,(opal_list_item_t*)ev); - } - } - - cnt = opal_list_get_size(tmp); - for(i=0; ievents); - t->events = NULL; -} diff --git a/opal/util/timings.h b/opal/util/timings.h index 7e6a803cad..02bff6f3c4 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -25,426 +25,7 @@ typedef enum { #if OPAL_ENABLE_TIMING -#define OPAL_TIMING_DESCR_MAX 1024 -#define OPAL_TIMING_BUFSIZE 32 -#define OPAL_TIMING_OUTBUF_SIZE (10*1024) - -typedef enum { - OPAL_TIMING_TRACE, - OPAL_TIMING_INTDESCR, - OPAL_TIMING_INTBEGIN, - OPAL_TIMING_INTEND -} opal_event_type_t; - -typedef struct { - opal_list_item_t super; - int fib; - opal_event_type_t type; - const char *func; - const char *file; - int line; - double ts, ts_ovh; - char descr[OPAL_TIMING_DESCR_MAX]; - int id; -} opal_timing_event_t; - -typedef double (*get_ts_t)(void); - -typedef struct opal_timing_t -{ - int next_id_cntr; - // not thread safe! - // The whole implementation is not thread safe now - // since it is supposed to be used in orte service - // thread only. Fix in the future or now? - int current_id; - opal_list_t *events; - opal_timing_event_t *buffer; - size_t buffer_offset, buffer_size; - get_ts_t get_ts; -} opal_timing_t; - -typedef struct { - opal_timing_t *t; - opal_timing_event_t *ev; - int errcode; -} opal_timing_prep_t; - -/** - * Read synchronisation information from the file - * provided through the MCA parameter. - * Should not be directly used, for service purposes. - * - * @param sync_file Name of the file to read - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR On failure - */ -int opal_timing_clocksync_read(char *sync_file); - -/** - * Pass string representation of ORTE job ID down to the OPAL. - * Should not be directly used, for service purposes. - * - * @param jid job id - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR On failure - */ -int opal_timing_set_jobid(char *jid); - -/** - * Initialize timing structure. - * - * @param t pointer to the timing handler structure - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR On failure - */ -int opal_timing_init(opal_timing_t *t, opal_timer_type_t type); - -/** - * Prepare timing event, do all printf-like processing. - * Should not be directly used - for service purposes only. - * - * @param t pointer to the timing handler structure - * @param fmt printf-like format - * @param ... other parameters that should be converted to string representation - * - * @retval partly filled opal_timing_prep_t structure - */ -opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...); - -/** - * Prepare timing event, ignore printf-like processing. - * Should not be directly used - for service purposes only. - * - * @param t pointer to the timing handler structure - * @param fmt printf-like format - * @param ... other parameters that should be converted to string representation - * - * @retval partly filled opal_timing_prep_t structure - */ -opal_timing_prep_t opal_timing_prep_ev_end(opal_timing_t *t, const char *fmt, ...); - -/** - * Enqueue timing event into the list of events in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval - */ -void opal_timing_add_step(opal_timing_prep_t p, const char *func, - const char *file, int line); - -/** - * Enqueue the description of the interval into a list of events - * in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval id of event interval - */ -int opal_timing_descr(opal_timing_prep_t p, const char *func, - const char *file, int line); - -/** - * Enqueue the beginning of timing interval that already has the - * description and assigned id into the list of events - * in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval - */ -void opal_timing_start_id(opal_timing_t *t, int id, const char *func, - const char *file, int line); - -/** - * Enqueue the end of timing interval that already has - * description and assigned id into the list of events - * in handler 't'. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval - */ -void opal_timing_end(opal_timing_t *t, int id, const char *func, - const char *file, int line ); - -/** - * Enqueue both description and start of timing interval - * into the list of events and assign its id. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval interval id - */ -static inline int opal_timing_start_init(opal_timing_prep_t p, - const char *func, const char *file, int line) -{ - int id = opal_timing_descr(p, func, file, line); - if( id < 0 ) - return id; - opal_timing_start_id(p.t, id, func, file, line); - return id; -} - -/** - * The wrapper that is used to stop last measurement in OPAL_TIMING_MNEXT. - * - * @param p result of opal_timing_prep_ev - * @param func function name where event occurs - * @param file file name where event occurs - * @param line line number in the file - * - * @retval interval id - */ -void opal_timing_end_prep(opal_timing_prep_t p, - const char *func, const char *file, int line); - -/** - * Report all events that were enqueued in the timing handler 't'. - * - if fname == NULL the output will be done using opal_output and - * each line will be prefixed with "prefix" to ease grep'ing. - * - otherwise the corresponding file will be used for output in "append" mode - * WARRNING: not all filesystems provide enough support for that feature, some records may - * disappear. - * - * @param t timing handler - * @param account_overhead consider malloc overhead introduced by timing code - * @param prefix prefix to use when no fname was specifyed to ease grep'ing - * @param fname name of the output file (may be NULL) - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure - */ -int opal_timing_report(opal_timing_t *t, char *fname); - -/** - * Report all intervals that were enqueued in the timing handler 't'. - * - if fname == NULL the output will be done using opal_output and - * each line will be prefixed with "prefix" to ease grep'ing. - * - otherwise the corresponding file will be used for output in "append" mode - * WARRNING: not all filesystems provide enough support for that feature, some records may - * disappear. - * - * @param t timing handler - * @param account_overhead consider malloc overhead introduced by timing code - * @param fname name of the output file (may be NULL) - * - * @retval OPAL_SUCCESS On success - * @retval OPAL_ERROR or OPAL_ERR_OUT_OF_RESOURCE On failure - */ -int opal_timing_deltas(opal_timing_t *t, char *fname); - -/** - * Release all memory allocated for the timing handler 't'. - * - * @param t timing handler - * - * @retval - */ -void opal_timing_release(opal_timing_t *t); - -/** - * Main macro for use in declaring opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - */ -#define OPAL_TIMING_DECLARE(t) opal_timing_t t; /* need semicolon here to avoid warnings when not enabled */ - -/** - * Main macro for use in declaring external opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - */ -#define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; /* need semicolon here to avoid warnings when not enabled */ - -/** - * Main macro for use in initializing opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_init() - */ -#define OPAL_TIMING_INIT(t) opal_timing_init(t, OPAL_TIMING_AUTOMATIC_TIMER) - -/** - * Main macro for use in initializing opal timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_init() - */ -#define OPAL_TIMING_INIT_EXT(t, type) opal_timing_init(t, type) - -/** - * Macro that enqueues event with its description to the specified - * timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_add_step() - */ -#define OPAL_TIMING_EVENT(x) opal_timing_add_step( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) - -/** - * MDESCR: Measurement DESCRiption - * Introduce new timing measurement with string description for the specified - * timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_descr() - */ -#define OPAL_TIMING_MDESCR(x) opal_timing_descr( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTART_ID: Measurement START by ID. - * Marks the beginning of the measurement with ID=id on the - * specified timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_start_id() - */ -#define OPAL_TIMING_MSTART_ID(t, id) opal_timing_start_id(t, id, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTART: Measurement START - * Introduce new timing measurement conjuncted with its start - * on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_start_init() - */ -#define OPAL_TIMING_MSTART(x) opal_timing_start_init( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTOP: STOP Measurement - * Finishes the most recent measurement on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_end() - */ -#define OPAL_TIMING_MSTOP(t) opal_timing_end(t, -1, __FUNCTION__, __FILE__, __LINE__) - -/** - * MSTOP_ID: STOP Measurement with ID=id. - * Finishes the measurement with give ID on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_end() - */ -#define OPAL_TIMING_MSTOP_ID(t, id) opal_timing_end(t, id, __FUNCTION__, __FILE__, __LINE__) - -/** - * MNEXT: start NEXT Measurement - * Convinient macro, may be implemented with the sequence of three previously - * defined macroses: - * - finish current measurement (OPAL_TIMING_MSTOP); - * - introduce new timing measurement (OPAL_TIMING_MDESCR); - * - starts next measurement (OPAL_TIMING_MSTART_ID) - * on the specifyed timing handler; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_start_init() - */ -#define OPAL_TIMING_MNEXT(x) ( \ - opal_timing_end_prep(opal_timing_prep_ev_end x, \ - __FUNCTION__, __FILE__, __LINE__ ), \ - opal_timing_start_init( opal_timing_prep_ev x, \ - __FUNCTION__, __FILE__, __LINE__) \ -) - -/** - * The macro for use in reporting collected events with absolute values; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @param enable flag that enables/disables reporting. Used for fine-grained timing. - * @see opal_timing_report() - */ -#define OPAL_TIMING_REPORT(enable, t) { \ - if( enable ) { \ - opal_timing_report(t, opal_timing_output); \ - } \ -} - -/** - * The macro for use in reporting collected events with relative times; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @param enable flag that enables/disables reporting. Used for fine-grained timing. - * @see opal_timing_deltas() - */ -#define OPAL_TIMING_DELTAS(enable, t) { \ - if( enable ) { \ - opal_timing_deltas(t, opal_timing_output); \ - } \ -} - -/** - * Main macro for use in releasing allocated resources; - * will be "compiled out" when OPAL is configured without - * --enable-timing. - * - * @see opal_timing_release() - */ -#define OPAL_TIMING_RELEASE(t) opal_timing_release(t) - -#else - -#define OPAL_TIMING_DECLARE(t) - -#define OPAL_TIMING_DECLARE_EXT(x, t) - -#define OPAL_TIMING_INIT(t) - -#define OPAL_TIMING_INIT_EXT(t, type) - -#define OPAL_TIMING_EVENT(x) - -#define OPAL_TIMING_MDESCR(x) - -#define OPAL_TIMING_MSTART_ID(t, id) - -#define OPAL_TIMING_MSTART(x) - -#define OPAL_TIMING_MSTOP(t) - -#define OPAL_TIMING_MSTOP_ID(t, id) - -#define OPAL_TIMING_MNEXT(x) - -#define OPAL_TIMING_REPORT(enable, t) - -#define OPAL_TIMING_DELTAS(enable, t) - -#define OPAL_TIMING_RELEASE(t) +typedef double (*opal_timing_ts_func_t)(void); #endif diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 8e5ccb82be..4e6c7d8e26 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -255,10 +255,6 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) opal_process_info.my_local_rank = (int32_t)orte_process_info.my_local_rank; opal_process_info.cpuset = orte_process_info.cpuset; -#if OPAL_ENABLE_TIMING - opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); -#endif - if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { /* start listening - will be ignored if no listeners * were registered */ From 45898a9c65e210e3577029ac123479d6b1587d68 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 04:19:09 +0700 Subject: [PATCH 5/9] opal/timing: add the draft of env-based timings This commit adds new timing feature that uses environment variables to expose timing information. This allows easy access to this data (if timing is enabled) from any other part of the application for the subsequent postprocessing. In particular this will be integrated with OMPI-level timing framework that whill use MPI_Reduce functionality to provide more compact and easy-to use information. This commit also adds the example of usage of this framework by annotating rte_init function. The result is not used anywhere for now. It will be postprocessed in subsequent commits. NOTE: that functionality is currently disabled untill it will be verified at runtime Signed-off-by: Artem Polyakov --- opal/util/timings.c | 2 - opal/util/timings.h | 187 +++++++++++++++++++++++++++++++++++++++ orte/runtime/orte_init.c | 31 +++++++ 3 files changed, 218 insertions(+), 2 deletions(-) diff --git a/opal/util/timings.c b/opal/util/timings.c index 2e75e3bfcd..775c08135d 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -41,8 +41,6 @@ #include MCA_timer_IMPLEMENTATION_HEADER -static opal_mutex_t tm_lock; - static double get_ts_gettimeofday(void) { double ret; diff --git a/opal/util/timings.h b/opal/util/timings.h index 02bff6f3c4..d67931ba59 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -27,6 +27,193 @@ typedef enum { typedef double (*opal_timing_ts_func_t)(void); +#define OPAL_TIMING_STR_LEN 256 +typedef struct { + char id[OPAL_TIMING_STR_LEN], cntr_env[OPAL_TIMING_STR_LEN]; + int enabled, error; + int cntr; + double ts; + opal_timing_ts_func_t get_ts; +} opal_timing_env_t; + +opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); + + +/* TODO: turn as much as possible into macro's + * once debugged + */ + +static inline opal_timing_env_t +OPAL_TIMING_ENV_START_TYPE(char *func, opal_timer_type_t type, char *prefix) +{ + opal_timing_env_t h; + int n; + + /* TODO: remove this when tested! */ + h.enabled = 0; + return h; + + if( NULL == prefix ){ + prefix = ""; + } + + h.error = 0; + n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", prefix, func); + if( n > OPAL_TIMING_STR_LEN ){ + /* TODO: output truncated: + * disable this timing and set the error + * sign + */ + } + + /* TODO same length check here */ + sprintf(h.cntr_env,"%s_CNT", h.id); + h.get_ts = opal_timing_ts_func(type); + h.ts = h.get_ts(); + h.enabled = 1; + + char *ptr = getenv(h.id); + if( NULL == ptr || strcmp(ptr, "1")){ + h.enabled = 0; + } + ptr = getenv(h.cntr_env); + h.cntr = 0; + if( NULL != ptr ){ + h.cntr = atoi(ptr); + } + return h; +} + +#define OPAL_TIMING_ENV_INIT(name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, ""); + +/* We use function names for identification + * however this might be a problem for the private + * functions declared as static as their names may + * conflict. + * Use prefix to do a finer-grained identification if needed + */ +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); + + +/* TODO: according to https://en.wikipedia.org/wiki/C99 + * varadic macroses are part of C99 and C11. Is it safe to use them here? + */ +static inline void +OPAL_TIMING_ENV_NEXT(opal_timing_env_t *h, char *fmt, ... ) +{ + if( !h->enabled ){ + return; + } + /* enabled codepath */ + va_list ap; + int n; + char buf[256], buf2[256]; + double time = h->get_ts() - h->ts; + + sprintf(buf, "%s_DESC_%d", h->id, h->cntr); + /* TODO: check that write succeded */ + + va_start(ap, fmt); + n= vsnprintf(buf2, 256, fmt, ap); + /* TODO: check that write succeded */ + va_end(ap); + + setenv(buf, buf2, 1); + + sprintf(buf, "%s_VAL_%d", h->id, h->cntr); + /* TODO: check that write succeded */ + sprintf(buf2, "%lf", time); + /* TODO: check that write succeded */ + setenv(buf, buf2, 1); + + h->cntr++; + sprintf(buf, "%d", h->cntr); + setenv(h->cntr_env, buf, 1); + + /* We don't include env operations into the consideration. + * Hopefully this will help to make measurements more accurate. + */ + h->ts = h->get_ts(); +} + +/* This function supposed to be called from the code that will + * do the postprocessing, i.e. OMPI timing portion that will + * do the reduction of accumulated values + */ +/* TODO: turn into a macro */ +static inline int OPAL_TIMING_ENV_CNT_PREFIX(char *prefix, char *func) +{ + char ename[256]; + sprintf(ename, "%s%s_CNT", prefix, func); + char *ptr = getenv(ename); + if( !ptr ){ + return 0; + } + return atoi(ptr); +} + +#define OPAL_TIMING_ENV_CNT(func) \ + OPAL_TIMING_ENV_CNT_PREFIX("", char *func) + +/* TODO: make a macro */ +static inline double +OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) +{ + char vname[256]; + double ts; + sprintf(vname, "%s_INT_%d_DESC", prefix, i); + *desc = getenv(vname); + sprintf(vname, "%s_INT_%d_VAL",prefix, i); + char *ptr = getenv(vname); + sscanf(ptr,"%lf", &ts); + return ts; +} + +#define OPAL_TIMING_ENV_GETDESC(func, index, desc) \ + OPAL_TIMING_ENV_GETDESC_PREFIX("", func, index, desc) + +#define OSHTMNG_ENV_APPEND(prefix) { \ + char *enabled; \ + int cnt = OSHTMNG_ENV_COUNT(prefix); \ + enabled = getenv(prefix); \ + if( NULL != enabled && !strcmp(enabled, "1") ) { \ + char ename[256]; \ + sprintf(ename, "OSHTMNG_%s", OSHTMNG_prefix); \ + setenv(ename, "1", 1); \ + } \ + int i; \ + for(i = 0; i < cnt; i++){ \ + char *desc; \ + double ts = OSHTMNG_ENV_GETBYIDX(prefix, i, &desc); \ + OSHTMNG_END1(desc, ts); \ + } \ +} + +#else + +#define OPAL_TIMING_ENV_START_TYPE(func, type, prefix) + +#define OPAL_TIMING_ENV_INIT(name) + +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix) + +/* TODO: according to https://en.wikipedia.org/wiki/C99 + * varadic macroses are part of C99 and C11. Is it safe to use them here? + */ +#define OPAL_TIMING_ENV_NEXT(h, fmt, ... ) + +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) + +#define OPAL_TIMING_ENV_CNT(func) + +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, func, i, desc) + +#define OPAL_TIMING_ENV_GETDESC(func, index, desc) + #endif #endif diff --git a/orte/runtime/orte_init.c b/orte/runtime/orte_init.c index 4e6c7d8e26..4a885f1088 100644 --- a/orte/runtime/orte_init.c +++ b/orte/runtime/orte_init.c @@ -134,6 +134,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) { int ret; char *error = NULL; + OPAL_TIMING_ENV_INIT(tmng); if (0 < orte_initialized) { /* track number of times we have been called */ @@ -152,12 +153,16 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) opal_snprintf_jobid = orte_util_snprintf_jobid; opal_convert_string_to_jobid = _convert_string_to_jobid; + OPAL_TIMING_ENV_NEXT(tmng, "initializations"); + /* initialize the opal layer */ if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) { error = "opal_init"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "opal_init"); + /* ensure we know the type of proc for when we finalize */ orte_process_info.proc_type = flags; @@ -167,27 +172,37 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_locks_init"); + /* Register all MCA Params */ if (ORTE_SUCCESS != (ret = orte_register_params())) { error = "orte_register_params"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_register_params"); + /* setup the orte_show_help system */ if (ORTE_SUCCESS != (ret = orte_show_help_init())) { error = "opal_output_init"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_show_help_init"); + /* register handler for errnum -> string conversion */ opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str); + OPAL_TIMING_ENV_NEXT(tmng, "opal_error_register"); + /* Ensure the rest of the process info structure is initialized */ if (ORTE_SUCCESS != (ret = orte_proc_info())) { error = "orte_proc_info"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_proc_info"); + /* we may have modified the local nodename according to * request to retain/strip the FQDN and prefix, so update * it here. The OPAL layer will strdup the hostname, so @@ -201,6 +216,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) { /* let the pmix server register params */ pmix_server_register_params(); + OPAL_TIMING_ENV_NEXT(tmng, "pmix_server_register_params"); } /* open the SCHIZO framework as everyone needs it, and the @@ -210,13 +226,20 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) error = "orte_schizo_base_open"; goto error; } + + OPAL_TIMING_ENV_NEXT(tmng, "framework_open(schizo)"); + if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) { error = "orte_schizo_base_select"; goto error; } + + OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo_base_select"); + /* if we are an app, let SCHIZO help us determine our environment */ if (ORTE_PROC_IS_APP) { (void)orte_schizo.check_launch_environment(); + OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo.check_launch_environment"); } /* open the ESS and select the correct module for this environment */ @@ -225,11 +248,16 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) error = "orte_ess_base_open"; goto error; } + + OPAL_TIMING_ENV_NEXT(tmng, "framework_open(ess)"); + if (ORTE_SUCCESS != (ret = orte_ess_base_select())) { error = "orte_ess_base_select"; goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_ess_base_select"); + if (!ORTE_PROC_IS_APP) { /* ORTE tools "block" in their own loop over the event * base, so no progress thread is required - apps will @@ -245,6 +273,8 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) goto error; } + OPAL_TIMING_ENV_NEXT(tmng, "orte_ess.init"); + /* set the remaining opal_process_info fields. Note that * the OPAL layer will have initialized these to NULL, and * anyone between us would not have strdup'd the string, so @@ -265,6 +295,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags) } } + OPAL_TIMING_ENV_NEXT(tmng, "finalize"); /* All done */ return ORTE_SUCCESS; From e3acf2a339b3301709d72b206f7e553580508592 Mon Sep 17 00:00:00 2001 From: Artem Polyakov Date: Wed, 29 Mar 2017 07:25:17 +0700 Subject: [PATCH 6/9] ompi/timings: add OMPI-level timing framework. This is an extension of OPAL timing framework that allows to use MPI_reduce to provide the compact representation of the collected timings throughout the whole application. NOTE: the functionality is disabled now, it will be enabled after the runtime verification. Signed-off-by: Artem Polyakov --- ompi/runtime/ompi_mpi_init.c | 30 ++++---- ompi/util/timings.h | 133 +++++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+), 12 deletions(-) create mode 100644 ompi/util/timings.h diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index ef8ecfecb3..b3e3b15fa8 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -93,6 +93,7 @@ #include "ompi/dpm/dpm.h" #include "ompi/mpiext/mpiext.h" #include "ompi/mca/hook/base/base.h" +#include "ompi/util/timings.h" #if OPAL_ENABLE_FT_CR == 1 #include "ompi/mca/crcp/crcp.h" @@ -348,6 +349,9 @@ static int ompi_register_mca_variables(void) } /* check to see if we want timing information */ + /* TODO: enable OMPI init and OMPI finalize timings if + * this variable was set to 1! + */ ompi_enable_timing = false; (void) mca_base_var_register("ompi", "ompi", NULL, "timing", "Request that critical timing loops be measured", @@ -375,8 +379,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) volatile bool active; opal_list_t info; opal_value_t *kv; - //OPAL_TIMING_DECLARE(tm); - //OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); + + OMPI_TIMING_INIT(32); /* bitflag of the thread level support provided. To be used * for the modex in order to work in heterogeneous environments. */ @@ -479,7 +483,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); - //OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); + OMPI_TIMING_NEXT("initialization"); /* if we were not externally started, then we need to setup * some envars so the MPI_INFO_ENV can get the cmd name @@ -508,10 +512,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) error = "ompi_mpi_init: ompi_rte_init failed"; goto error; } - ompi_rte_initialized = true; - /* check for timing request - get stop time and report elapsed time if so */ - //OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex")); + OMPI_TIMING_NEXT("rte_init"); + + ompi_rte_initialized = true; /* Register the default errhandler callback */ errtrk.status = OPAL_ERROR; @@ -638,8 +642,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* check for timing request - get stop time and report elapsed time if so */ - //OPAL_TIMING_MNEXT((&tm,"time to execute modex")); + OMPI_TIMING_IMPORT_OPAL("orte_init"); + OMPI_TIMING_NEXT("rte_init-modex"); + /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs @@ -657,7 +662,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } } - //OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); + OMPI_TIMING_NEXT("modex"); /* select buffered send allocator component to be used */ if( OMPI_SUCCESS != @@ -818,7 +823,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_rte_wait_for_debugger(); /* Next timing measurement */ - //OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); + OMPI_TIMING_NEXT("modex-barrier"); /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -836,7 +841,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* check for timing request - get stop time and report elapsed time if so, then start the clock again */ - //OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); + OMPI_TIMING_NEXT("barrier"); #if OPAL_ENABLE_PROGRESS_THREADS == 0 /* Start setting up the event engine for MPI operations. Don't @@ -969,7 +974,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Finish last measurement, output results * and clear timing structure */ - //OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); + OMPI_TIMING_NEXT("barrier-finish"); + OMPI_TIMING_OUT; opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); diff --git a/ompi/util/timings.h b/ompi/util/timings.h new file mode 100644 index 0000000000..80df42cfed --- /dev/null +++ b/ompi/util/timings.h @@ -0,0 +1,133 @@ +#ifndef OMPI_UTIL_TIMING_H +#define OMPI_UTIL_TIMING_H + +#include "opal/util/timings.h" +/* TODO: we need access to MPI_* functions */ + +#if (0 && OPAL_ENABLE_TIMING) + +/* TODO: replace with opal_timing function */ +static inline double OMPI_TIMING_GET_TS(void) +{ + struct timespec ts; + double ret; + clock_gettime(CLOCK_MONOTONIC, &ts); + ret = ts.tv_sec + 1E-9 * ts.tv_nsec; + return ret; +} + +/* TODO: + * - create a structure to hold this variables + * - use dyncamically extendable arrays + */ +#define OMPI_TIMING_INIT(inum) \ + double OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ + const char *OMPI_TIMING_prefix = __FUNCTION__; \ + int OMPI_TIMING_cnt = 0; \ + int OMPI_TIMING_inum = inum; \ + double OMPI_TIMING_in[inum] = { 0.0 }; \ + double OMPI_TIMING_max[inum] = { 0.0 }; \ + double OMPI_TIMING_min[inum] = { 0.0 }; \ + double OMPI_TIMING_avg[inum] = { 0.0 }; \ + char *OMPI_TIMING_desc[inum] = { 0 }; \ + + +/* TODO: provide printf-like interfase allowing to build a string + * at runtime, like OPAL_TIMING_NEXT() + */ +#define OMPI_TIMING_NEXT(desc) { \ + char *ptr = strrchr(__FILE__, '/'); \ + if( NULL == ptr ){ \ + ptr = __FILE__; \ + } else { \ + ptr++; \ + } \ + if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ + printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ + ptr, __LINE__, __FUNCTION__); \ + abort(); \ + } \ + OMPI_TIMING_in[OMPI_TIMING_cnt] = OMPI_TIMING_GET_TS() - OMPI_TIMING_ts; \ + OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ + OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ +} + +#define OMPI_TIMING_APPEND(desc,ts) { \ + char *ptr = strrchr(__FILE__, '/'); \ + if( NULL == ptr ){ \ + ptr = __FILE__; \ + } else { \ + ptr++; \ + } \ + if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ + printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ + ptr, __LINE__, __FUNCTION__); \ + abort(); \ + } \ + OMPI_TIMING_in[OMPI_TIMING_cnt] = ts; \ + OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ +} + +#define OMPI_TIMING_IMPORT_OPAL(func) { \ + char *enabled; \ + int cnt = OPAL_TIMING_ENV_CNT(func); \ + if( 0 < cnt ) { \ + char ename[256]; \ + sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ + setenv(ename, "1", 1); \ + } \ + int i; \ + for(i = 0; i < cnt; i++){ \ + char *desc; \ + double ts = OPAL_TIMING_ENV_GETDESC(prefix, i, &desc); \ + OMPI_TIMING_APPEND(desc, ts); \ + } \ +} + + +#define OMPI_TIMING_OUT { \ + int i, size, rank; \ + MPI_Comm_size(MPI_COMM_WORLD, &size); \ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ + char ename[1024]; \ + sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ + char *ptr = getenv(ename); \ + \ + if( NULL != ptr ) { \ + OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ + MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_avg, OMPI_TIMING_cnt, MPI_DOUBLE, \ + MPI_SUM, 0, MPI_COMM_WORLD); \ + MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_min, OMPI_TIMING_cnt, MPI_DOUBLE, \ + MPI_MIN, 0, MPI_COMM_WORLD); \ + MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_max, OMPI_TIMING_cnt, MPI_DOUBLE, \ + MPI_MAX, 0, MPI_COMM_WORLD); \ + \ + if( 0 == rank ){ \ + printf("------------------ %s ------------------\n", \ + OMPI_TIMING_prefix); \ + for(i=0; i< OMPI_TIMING_cnt; i++){ \ + OMPI_TIMING_avg[i] /= size; \ + printf("[%s:%s]: %lf / %lf / %lf\n", \ + OMPI_TIMING_prefix,OMPI_TIMING_desc[i], \ + OMPI_TIMING_avg[i], OMPI_TIMING_min[i], OMPI_TIMING_max[i]); \ + } \ + printf("[%s:overhead]: %lf \n", OMPI_TIMING_prefix, \ + OMPI_TIMING_GET_TS() - OMPI_TIMING_ts); \ + } \ + } \ +} + +#else +#define OMPI_TIMING_INIT(inum) + +#define OMPI_TIMING_NEXT(desc) + +#define OMPI_TIMING_APPEND(desc,ts) + +#define OMPI_TIMING_OUT + +#define OMPI_TIMING_IMPORT_OPAL(func) + +#endif + +#endif From 36a0e71f2db0e80d3f05cf21f2c44523e4972330 Mon Sep 17 00:00:00 2001 From: Boris Karasev Date: Wed, 29 Mar 2017 20:01:05 +0600 Subject: [PATCH 7/9] ompi/timings: preparing to production state Adds: - enabling/disabling of timings throught environment variable `OMPI_TIMING_ENABLE` - output format: [file name]:[function name]:[description]: avg/min/max - dynamically extending array of results for case then inited size was exhausted - catch and collect errors - cleanup Note: For use feature need to configure with `--enable-timings` and set env `OMPI_TIMING_ENABLE = 1` Signed-off-by: Boris Karasev --- ompi/runtime/ompi_mpi_init.c | 2 + ompi/util/timings.h | 305 +++++++++++++++++++++++------------ opal/util/timings.h | 279 ++++++++++++++++---------------- 3 files changed, 341 insertions(+), 245 deletions(-) diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index b3e3b15fa8..e9beebb4e9 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -950,6 +950,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) } opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); ompi_hook_base_mpi_init_error(argc, argv, requested, provided); + OMPI_TIMING_FINALIZE; return ret; } @@ -976,6 +977,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) * and clear timing structure */ OMPI_TIMING_NEXT("barrier-finish"); OMPI_TIMING_OUT; + OMPI_TIMING_FINALIZE; opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); diff --git a/ompi/util/timings.h b/ompi/util/timings.h index 80df42cfed..339a6c2697 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -4,123 +4,212 @@ #include "opal/util/timings.h" /* TODO: we need access to MPI_* functions */ -#if (0 && OPAL_ENABLE_TIMING) +#if (OPAL_ENABLE_TIMING) -/* TODO: replace with opal_timing function */ -static inline double OMPI_TIMING_GET_TS(void) -{ - struct timespec ts; - double ret; - clock_gettime(CLOCK_MONOTONIC, &ts); - ret = ts.tv_sec + 1E-9 * ts.tv_nsec; - return ret; +typedef struct { + char desc[OPAL_TIMING_STR_LEN]; + double ts; + char *file; + char *prefix; +} ompi_timing_val_t; + + typedef struct { + ompi_timing_val_t *val; + int use; + struct ompi_timing_list_t *next; + } ompi_timing_list_t; + + typedef struct ompi_timing_t { + double ts; + const char *prefix; + int size; + int cnt; + int error; + int enabled; + opal_timing_ts_func_t get_ts; + ompi_timing_list_t *timing; + ompi_timing_list_t *cur_timing; + } ompi_timing_t; + +#define OMPI_TIMING_INIT(_size) \ + ompi_timing_t OMPI_TIMING; \ + OMPI_TIMING.prefix = __FUNCTION__; \ + OMPI_TIMING.size = _size; \ + OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \ + OMPI_TIMING.cnt = 0; \ + OMPI_TIMING.error = 0; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + OMPI_TIMING.enabled = 0; \ + { \ + char *ptr; \ + ptr = getenv("OMPI_TIMING_ENABLE"); \ + if (NULL != ptr) { \ + OMPI_TIMING.enabled = atoi(ptr); \ + } \ + if (OMPI_TIMING.enabled) { \ + setenv("OPAL_TIMING_ENABLE", "1", 1); \ + OMPI_TIMING.timing = (ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ + memset(OMPI_TIMING.timing, 0, sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.timing->val = (ompi_timing_val_t*)malloc(sizeof(ompi_timing_val_t) * _size); \ + OMPI_TIMING.cur_timing = OMPI_TIMING.timing; \ + } \ + } + +#define OMPI_TIMING_ITEM_EXTEND ({ \ + if (OMPI_TIMING.enabled) { \ + OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ + memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \ + } \ +}) + +#define OMPI_TIMING_FINALIZE ({ \ + if (OMPI_TIMING.enabled) { \ + ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ + while ( NULL != t) { \ + tmp = t; \ + t = t->next; \ + free(tmp->val); \ + free(tmp); \ + } \ + OMPI_TIMING.timing = NULL; \ + OMPI_TIMING.cur_timing = NULL; \ + OMPI_TIMING.cnt = 0; \ + } \ +}) + +#define OMPI_TIMING_NEXT(fmt, ...) ({ \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + char *f = strrchr(__FILE__, '/') + 1; \ + int len = 0; \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, fmt, ##__VA_ARGS__); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = f; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = __FUNCTION__; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + } \ +}) + +#define OMPI_TIMING_APPEND(filename,func,desc,ts) { \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, "%s", desc); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ } -/* TODO: - * - create a structure to hold this variables - * - use dyncamically extendable arrays - */ -#define OMPI_TIMING_INIT(inum) \ - double OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ - const char *OMPI_TIMING_prefix = __FUNCTION__; \ - int OMPI_TIMING_cnt = 0; \ - int OMPI_TIMING_inum = inum; \ - double OMPI_TIMING_in[inum] = { 0.0 }; \ - double OMPI_TIMING_max[inum] = { 0.0 }; \ - double OMPI_TIMING_min[inum] = { 0.0 }; \ - double OMPI_TIMING_avg[inum] = { 0.0 }; \ - char *OMPI_TIMING_desc[inum] = { 0 }; \ - - -/* TODO: provide printf-like interfase allowing to build a string - * at runtime, like OPAL_TIMING_NEXT() - */ -#define OMPI_TIMING_NEXT(desc) { \ - char *ptr = strrchr(__FILE__, '/'); \ - if( NULL == ptr ){ \ - ptr = __FILE__; \ - } else { \ - ptr++; \ - } \ - if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ - printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ - ptr, __LINE__, __FUNCTION__); \ - abort(); \ - } \ - OMPI_TIMING_in[OMPI_TIMING_cnt] = OMPI_TIMING_GET_TS() - OMPI_TIMING_ts; \ - OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ - OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ +#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) { \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + int cnt = OPAL_TIMING_ENV_CNT(func); \ + int i; \ + OMPI_TIMING.error = OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func); \ + for(i = 0; i < cnt; i++){ \ + char *desc, *filename; \ + double ts = OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc); \ + OMPI_TIMING_APPEND(filename, func, desc, ts); \ + } \ + } \ } -#define OMPI_TIMING_APPEND(desc,ts) { \ - char *ptr = strrchr(__FILE__, '/'); \ - if( NULL == ptr ){ \ - ptr = __FILE__; \ - } else { \ - ptr++; \ - } \ - if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \ - printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \ - ptr, __LINE__, __FUNCTION__); \ - abort(); \ - } \ - OMPI_TIMING_in[OMPI_TIMING_cnt] = ts; \ - OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \ -} - -#define OMPI_TIMING_IMPORT_OPAL(func) { \ - char *enabled; \ - int cnt = OPAL_TIMING_ENV_CNT(func); \ - if( 0 < cnt ) { \ - char ename[256]; \ - sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ - setenv(ename, "1", 1); \ - } \ - int i; \ - for(i = 0; i < cnt; i++){ \ - char *desc; \ - double ts = OPAL_TIMING_ENV_GETDESC(prefix, i, &desc); \ - OMPI_TIMING_APPEND(desc, ts); \ - } \ -} +#define OMPI_TIMING_IMPORT_OPAL(func) \ + OMPI_TIMING_IMPORT_OPAL_PREFIX("", func) -#define OMPI_TIMING_OUT { \ - int i, size, rank; \ - MPI_Comm_size(MPI_COMM_WORLD, &size); \ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ - char ename[1024]; \ - sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \ - char *ptr = getenv(ename); \ - \ - if( NULL != ptr ) { \ - OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \ - MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_avg, OMPI_TIMING_cnt, MPI_DOUBLE, \ - MPI_SUM, 0, MPI_COMM_WORLD); \ - MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_min, OMPI_TIMING_cnt, MPI_DOUBLE, \ - MPI_MIN, 0, MPI_COMM_WORLD); \ - MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_max, OMPI_TIMING_cnt, MPI_DOUBLE, \ - MPI_MAX, 0, MPI_COMM_WORLD); \ - \ - if( 0 == rank ){ \ - printf("------------------ %s ------------------\n", \ - OMPI_TIMING_prefix); \ - for(i=0; i< OMPI_TIMING_cnt; i++){ \ - OMPI_TIMING_avg[i] /= size; \ - printf("[%s:%s]: %lf / %lf / %lf\n", \ - OMPI_TIMING_prefix,OMPI_TIMING_desc[i], \ - OMPI_TIMING_avg[i], OMPI_TIMING_min[i], OMPI_TIMING_max[i]); \ - } \ - printf("[%s:overhead]: %lf \n", OMPI_TIMING_prefix, \ - OMPI_TIMING_GET_TS() - OMPI_TIMING_ts); \ - } \ - } \ -} + +#define OMPI_TIMING_OUT ({ \ + if (OMPI_TIMING.enabled) { \ + int i, size, rank; \ + MPI_Comm_size(MPI_COMM_WORLD, &size); \ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ + int error = 0; \ + \ + MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ + MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ + \ + if (error) { \ + if (0 == rank) { \ + printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \ + } \ + } \ + else { \ + double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + \ + if( OMPI_TIMING.cnt > 0 ) { \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + ompi_timing_list_t *timing = OMPI_TIMING.timing; \ + i = 0; \ + do { \ + int use; \ + for (use = 0; use < timing->use; use++) { \ + MPI_Reduce(&timing->val[use].ts, avg + i, 1, \ + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, min + i, 1, \ + MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, max + i, 1, \ + MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \ + desc[i] = timing->val[use].desc; \ + prefix[i] = timing->val[use].prefix; \ + file[i] = timing->val[use].file; \ + i++; \ + } \ + timing = (ompi_timing_list_t*)timing->next; \ + } while (timing != NULL); \ + \ + if( 0 == rank ){ \ + if (OMPI_TIMING.timing->next) { \ + printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \ + "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \ + } \ + \ + printf("------------------ %s ------------------\n", \ + OMPI_TIMING.prefix); \ + for(i=0; i< OMPI_TIMING.cnt; i++){ \ + avg[i] /= size; \ + printf("[%s:%s:%s]: %lf / %lf / %lf\n", \ + file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \ + } \ + printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ + } \ + } \ + free(avg); \ + free(min); \ + free(max); \ + free(desc); \ + free(prefix); \ + free(file); \ + } \ + } \ +}) #else -#define OMPI_TIMING_INIT(inum) +#define OMPI_TIMING_INIT(size) -#define OMPI_TIMING_NEXT(desc) +#define OMPI_TIMING_NEXT(fmt, ...) #define OMPI_TIMING_APPEND(desc,ts) @@ -128,6 +217,8 @@ static inline double OMPI_TIMING_GET_TS(void) #define OMPI_TIMING_IMPORT_OPAL(func) +#define OMPI_TIMING_FINALIZE + #endif #endif diff --git a/opal/util/timings.h b/opal/util/timings.h index d67931ba59..2bd41b6a4a 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -28,6 +28,7 @@ typedef enum { typedef double (*opal_timing_ts_func_t)(void); #define OPAL_TIMING_STR_LEN 256 + typedef struct { char id[OPAL_TIMING_STR_LEN], cntr_env[OPAL_TIMING_STR_LEN]; int enabled, error; @@ -38,54 +39,46 @@ typedef struct { opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); +#define OPAL_TIMING_ENV_START_TYPE(func, type, prefix) ({ \ + opal_timing_env_t h; \ + char *ptr = NULL; \ + char *_prefix = prefix; \ + int n; \ + if( NULL == prefix ){ \ + _prefix = ""; \ + } \ + h.error = 0; \ + n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + h.error = 1; \ + } \ + n = sprintf(h.cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, h.id); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + h.error = 1; \ + } \ + ptr = getenv(h.id); \ + if( NULL == ptr || strcmp(ptr, "1")){ \ + h.enabled = 0; \ + } \ + h.get_ts = opal_timing_ts_func(type); \ + ptr = getenv("OPAL_TIMING_ENABLE"); \ + if (NULL != ptr) { \ + h.enabled = atoi(ptr); \ + } \ + h.cntr = 0; \ + ptr = getenv(h.id); \ + if( NULL != ptr ){ \ + h.cntr = atoi(ptr); \ + } \ + h.ts = h.get_ts(); \ + if ( 0 != h.error ){ \ + h.enabled = 0; \ + } \ + h; \ +}) -/* TODO: turn as much as possible into macro's - * once debugged - */ - -static inline opal_timing_env_t -OPAL_TIMING_ENV_START_TYPE(char *func, opal_timer_type_t type, char *prefix) -{ - opal_timing_env_t h; - int n; - - /* TODO: remove this when tested! */ - h.enabled = 0; - return h; - - if( NULL == prefix ){ - prefix = ""; - } - - h.error = 0; - n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", prefix, func); - if( n > OPAL_TIMING_STR_LEN ){ - /* TODO: output truncated: - * disable this timing and set the error - * sign - */ - } - - /* TODO same length check here */ - sprintf(h.cntr_env,"%s_CNT", h.id); - h.get_ts = opal_timing_ts_func(type); - h.ts = h.get_ts(); - h.enabled = 1; - - char *ptr = getenv(h.id); - if( NULL == ptr || strcmp(ptr, "1")){ - h.enabled = 0; - } - ptr = getenv(h.cntr_env); - h.cntr = 0; - if( NULL != ptr ){ - h.cntr = atoi(ptr); - } - return h; -} - -#define OPAL_TIMING_ENV_INIT(name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ +#define OPAL_TIMING_ENV_INIT(name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, ""); /* We use function names for identification @@ -94,104 +87,115 @@ OPAL_TIMING_ENV_START_TYPE(char *func, opal_timer_type_t type, char *prefix) * conflict. * Use prefix to do a finer-grained identification if needed */ -#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); - -/* TODO: according to https://en.wikipedia.org/wiki/C99 - * varadic macroses are part of C99 and C11. Is it safe to use them here? - */ -static inline void -OPAL_TIMING_ENV_NEXT(opal_timing_env_t *h, char *fmt, ... ) -{ - if( !h->enabled ){ - return; - } - /* enabled codepath */ - va_list ap; - int n; - char buf[256], buf2[256]; - double time = h->get_ts() - h->ts; - - sprintf(buf, "%s_DESC_%d", h->id, h->cntr); - /* TODO: check that write succeded */ - - va_start(ap, fmt); - n= vsnprintf(buf2, 256, fmt, ap); - /* TODO: check that write succeded */ - va_end(ap); - - setenv(buf, buf2, 1); - - sprintf(buf, "%s_VAL_%d", h->id, h->cntr); - /* TODO: check that write succeded */ - sprintf(buf2, "%lf", time); - /* TODO: check that write succeded */ - setenv(buf, buf2, 1); - - h->cntr++; - sprintf(buf, "%d", h->cntr); - setenv(h->cntr_env, buf, 1); - - /* We don't include env operations into the consideration. - * Hopefully this will help to make measurements more accurate. - */ - h->ts = h->get_ts(); -} +#define OPAL_TIMING_ENV_NEXT(h, fmt, ...) ({ \ + int n; \ + char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ + double time; \ + char *filename; \ + if( h->enabled ){ \ + /* enabled codepath */ \ + time = h->get_ts() - h->ts; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_DESC_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, fmt, ## __VA_ARGS__ ); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_VAL_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%lf", time); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + filename = strrchr(__FILE__, '/') + 1; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%s", filename); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + h->cntr++; \ + sprintf(buf1, "%d", h->cntr); \ + setenv(h->cntr_env, buf1, 1); \ + /* We don't include env operations into the consideration. + * Hopefully this will help to make measurements more accurate. + */ \ + h->ts = h->get_ts(); \ + } \ + if (h->error) { \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_ERROR", h->id);\ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%d", h->error); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + } \ +}) /* This function supposed to be called from the code that will * do the postprocessing, i.e. OMPI timing portion that will * do the reduction of accumulated values */ -/* TODO: turn into a macro */ -static inline int OPAL_TIMING_ENV_CNT_PREFIX(char *prefix, char *func) -{ - char ename[256]; - sprintf(ename, "%s%s_CNT", prefix, func); - char *ptr = getenv(ename); - if( !ptr ){ - return 0; - } - return atoi(ptr); -} +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) ({ \ + char ename[OPAL_TIMING_STR_LEN]; \ + int cnt = 0; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ cnt = atoi(ptr); }; \ + } \ + cnt; \ +}) -#define OPAL_TIMING_ENV_CNT(func) \ - OPAL_TIMING_ENV_CNT_PREFIX("", char *func) +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func) ({ \ + char ename[OPAL_TIMING_STR_LEN]; \ + int error = 0; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ error = atoi(ptr); }; \ + } \ + error; \ +}) -/* TODO: make a macro */ -static inline double -OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) -{ - char vname[256]; - double ts; - sprintf(vname, "%s_INT_%d_DESC", prefix, i); - *desc = getenv(vname); - sprintf(vname, "%s_INT_%d_VAL",prefix, i); - char *ptr = getenv(vname); - sscanf(ptr,"%lf", &ts); - return ts; -} +#define OPAL_TIMING_ENV_CNT(func) \ + OPAL_TIMING_ENV_CNT_PREFIX("", func) -#define OPAL_TIMING_ENV_GETDESC(func, index, desc) \ - OPAL_TIMING_ENV_GETDESC_PREFIX("", func, index, desc) +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc) ({ \ + char vname[OPAL_TIMING_STR_LEN]; \ + double ts = 0.0; \ + sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ + *filename = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ + *desc = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ + char *ptr = getenv(vname); \ + if ( NULL != ptr ) { \ + sscanf(ptr,"%lf", &ts); \ + } \ + ts; \ +}) -#define OSHTMNG_ENV_APPEND(prefix) { \ - char *enabled; \ - int cnt = OSHTMNG_ENV_COUNT(prefix); \ - enabled = getenv(prefix); \ - if( NULL != enabled && !strcmp(enabled, "1") ) { \ - char ename[256]; \ - sprintf(ename, "OSHTMNG_%s", OSHTMNG_prefix); \ - setenv(ename, "1", 1); \ - } \ - int i; \ - for(i = 0; i < cnt; i++){ \ - char *desc; \ - double ts = OSHTMNG_ENV_GETBYIDX(prefix, i, &desc); \ - OSHTMNG_END1(desc, ts); \ - } \ -} +#define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ + OPAL_TIMING_ENV_GETDESC_PREFIX("", file, func, index, desc) #else @@ -201,9 +205,6 @@ OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) #define OPAL_TIMING_ENV_INIT_PREFIX(prefix) -/* TODO: according to https://en.wikipedia.org/wiki/C99 - * varadic macroses are part of C99 and C11. Is it safe to use them here? - */ #define OPAL_TIMING_ENV_NEXT(h, fmt, ... ) #define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) @@ -214,6 +215,8 @@ OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc) #define OPAL_TIMING_ENV_GETDESC(func, index, desc) +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func) + #endif #endif From 95ae0d1df3dd2e3d4f6b06cda6804d9033df69ac Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 7 Apr 2017 12:15:46 -0700 Subject: [PATCH 8/9] Cleanup timing macros for portability across compilers. Rename the --enable-timing configure option to be --enable-pmix-timing so it doesn't pickup external timing requests. Remove a stale function reference in PMIx so it can compile with timing enabled. Signed-off-by: Ralph Castain --- .gitignore | 1 + ompi/tools/mpisync/sync.c | 3 +- ompi/util/timings.h | 322 +++++++++--------- opal/mca/pmix/pmix2x/configure.m4 | 14 +- opal/mca/pmix/pmix2x/pmix/config/pmix.m4 | 12 +- .../pmix2x/pmix/src/runtime/pmix_params.c | 13 +- opal/util/timings.h | 274 +++++++-------- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 12 - 8 files changed, 325 insertions(+), 326 deletions(-) diff --git a/.gitignore b/.gitignore index b45ab10f92..76c1ab5d15 100644 --- a/.gitignore +++ b/.gitignore @@ -244,6 +244,7 @@ ompi/mpiext/cuda/c/mpiext_cuda_c.h ompi/tools/mpisync/mpisync ompi/tools/mpisync/mpirun_prof ompi/tools/mpisync/ompi_timing_post +ompi/tools/mpisync/mpisync.1 ompi/tools/ompi_info/ompi_info ompi/tools/ompi_info/ompi_info.1 diff --git a/ompi/tools/mpisync/sync.c b/ompi/tools/mpisync/sync.c index 658ada2df7..bcedadcb4a 100644 --- a/ompi/tools/mpisync/sync.c +++ b/ompi/tools/mpisync/sync.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2014 Artem Polyakov - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -139,7 +139,6 @@ int main(int argc, char **argv) MPI_Gather(hname,sizeof(hname),MPI_CHAR,hnames,sizeof(hname),MPI_CHAR, 0, MPI_COMM_WORLD); MPI_Gather(send,2,MPI_DOUBLE,measure,2, MPI_DOUBLE, 0, MPI_COMM_WORLD); - char tmpname[128]; FILE *fp = fopen(filename,"w"); if( fp == NULL ){ fprintf(stderr, "Fail to open the file %s. Abort\n", filename); diff --git a/ompi/util/timings.h b/ompi/util/timings.h index 339a6c2697..eddea59f9a 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -13,27 +13,27 @@ typedef struct { char *prefix; } ompi_timing_val_t; - typedef struct { - ompi_timing_val_t *val; - int use; - struct ompi_timing_list_t *next; - } ompi_timing_list_t; +typedef struct { + ompi_timing_val_t *val; + int use; + struct ompi_timing_list_t *next; +} ompi_timing_list_t; - typedef struct ompi_timing_t { - double ts; - const char *prefix; - int size; - int cnt; - int error; - int enabled; - opal_timing_ts_func_t get_ts; - ompi_timing_list_t *timing; - ompi_timing_list_t *cur_timing; - } ompi_timing_t; +typedef struct ompi_timing_t { + double ts; + const char *prefix; + int size; + int cnt; + int error; + int enabled; + opal_timing_ts_func_t get_ts; + ompi_timing_list_t *timing; + ompi_timing_list_t *cur_timing; +} ompi_timing_t; #define OMPI_TIMING_INIT(_size) \ ompi_timing_t OMPI_TIMING; \ - OMPI_TIMING.prefix = __FUNCTION__; \ + OMPI_TIMING.prefix = __func__; \ OMPI_TIMING.size = _size; \ OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \ OMPI_TIMING.cnt = 0; \ @@ -55,156 +55,164 @@ typedef struct { } \ } -#define OMPI_TIMING_ITEM_EXTEND ({ \ - if (OMPI_TIMING.enabled) { \ - OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ - OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ - memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \ - OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \ - } \ -}) +#define OMPI_TIMING_ITEM_EXTEND \ + do { \ + if (OMPI_TIMING.enabled) { \ + OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ + memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \ + OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \ + } \ + } while(0) -#define OMPI_TIMING_FINALIZE ({ \ - if (OMPI_TIMING.enabled) { \ - ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ - while ( NULL != t) { \ - tmp = t; \ - t = t->next; \ - free(tmp->val); \ - free(tmp); \ - } \ - OMPI_TIMING.timing = NULL; \ - OMPI_TIMING.cur_timing = NULL; \ - OMPI_TIMING.cnt = 0; \ - } \ -}) +#define OMPI_TIMING_FINALIZE \ + do { \ + if (OMPI_TIMING.enabled) { \ + ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ + while ( NULL != t) { \ + tmp = t; \ + t = (ompi_timing_list_t*)t->next; \ + free(tmp->val); \ + free(tmp); \ + } \ + OMPI_TIMING.timing = NULL; \ + OMPI_TIMING.cur_timing = NULL; \ + OMPI_TIMING.cnt = 0; \ + } \ + } while(0) -#define OMPI_TIMING_NEXT(fmt, ...) ({ \ - if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ - char *f = strrchr(__FILE__, '/') + 1; \ - int len = 0; \ - if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ - OMPI_TIMING_ITEM_EXTEND; \ - } \ - len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ - OPAL_TIMING_STR_LEN, fmt, ##__VA_ARGS__); \ - if (len >= OPAL_TIMING_STR_LEN) { \ - OMPI_TIMING.error = 1; \ - } \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = f; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = __FUNCTION__; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ - OMPI_TIMING.cnt++; \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ - } \ -}) +#define OMPI_TIMING_NEXT(...) \ + do { \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + char *f = strrchr(__FILE__, '/') + 1; \ + int len = 0; \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = strdup(f); \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = strdup(__func__); \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + } \ + } while(0) -#define OMPI_TIMING_APPEND(filename,func,desc,ts) { \ - if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ - OMPI_TIMING_ITEM_EXTEND; \ - } \ - int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ - OPAL_TIMING_STR_LEN, "%s", desc); \ - if (len >= OPAL_TIMING_STR_LEN) { \ - OMPI_TIMING.error = 1; \ - } \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ - OMPI_TIMING.cnt++; \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ -} +#define OMPI_TIMING_APPEND(filename,func,desc,ts) \ + do { \ + if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ + OMPI_TIMING_ITEM_EXTEND; \ + } \ + int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ + OPAL_TIMING_STR_LEN, "%s", desc); \ + if (len >= OPAL_TIMING_STR_LEN) { \ + OMPI_TIMING.error = 1; \ + } \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cnt++; \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + } while(0) -#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) { \ - if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ - int cnt = OPAL_TIMING_ENV_CNT(func); \ - int i; \ - OMPI_TIMING.error = OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func); \ - for(i = 0; i < cnt; i++){ \ - char *desc, *filename; \ - double ts = OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc); \ - OMPI_TIMING_APPEND(filename, func, desc, ts); \ - } \ - } \ -} +#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \ + do { \ + if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ + int cnt; \ + int i; \ + double ts; \ + OPAL_TIMING_ENV_CNT(func, cnt); \ + OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \ + for(i = 0; i < cnt; i++){ \ + char *desc, *filename; \ + OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \ + OMPI_TIMING_APPEND(filename, func, desc, ts); \ + } \ + } \ + } while(0) #define OMPI_TIMING_IMPORT_OPAL(func) \ OMPI_TIMING_IMPORT_OPAL_PREFIX("", func) -#define OMPI_TIMING_OUT ({ \ - if (OMPI_TIMING.enabled) { \ - int i, size, rank; \ - MPI_Comm_size(MPI_COMM_WORLD, &size); \ - MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ - int error = 0; \ - \ - MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ - MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ - \ - if (error) { \ - if (0 == rank) { \ - printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \ - } \ - } \ - else { \ - double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ - double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ - double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ - char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ - char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ - char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ - \ - if( OMPI_TIMING.cnt > 0 ) { \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ - ompi_timing_list_t *timing = OMPI_TIMING.timing; \ - i = 0; \ - do { \ - int use; \ - for (use = 0; use < timing->use; use++) { \ - MPI_Reduce(&timing->val[use].ts, avg + i, 1, \ - MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \ - MPI_Reduce(&timing->val[use].ts, min + i, 1, \ - MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \ - MPI_Reduce(&timing->val[use].ts, max + i, 1, \ - MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \ - desc[i] = timing->val[use].desc; \ - prefix[i] = timing->val[use].prefix; \ - file[i] = timing->val[use].file; \ - i++; \ - } \ - timing = (ompi_timing_list_t*)timing->next; \ - } while (timing != NULL); \ - \ - if( 0 == rank ){ \ - if (OMPI_TIMING.timing->next) { \ - printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \ - "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \ - } \ - \ - printf("------------------ %s ------------------\n", \ - OMPI_TIMING.prefix); \ - for(i=0; i< OMPI_TIMING.cnt; i++){ \ - avg[i] /= size; \ - printf("[%s:%s:%s]: %lf / %lf / %lf\n", \ - file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \ - } \ - printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ - } \ - } \ - free(avg); \ - free(min); \ - free(max); \ - free(desc); \ - free(prefix); \ - free(file); \ - } \ - } \ -}) +#define OMPI_TIMING_OUT \ + do { \ + if (OMPI_TIMING.enabled) { \ + int i, size, rank; \ + MPI_Comm_size(MPI_COMM_WORLD, &size); \ + MPI_Comm_rank(MPI_COMM_WORLD, &rank); \ + int error = 0; \ + \ + MPI_Reduce(&OMPI_TIMING.error, &error, 1, \ + MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \ + \ + if (error) { \ + if (0 == rank) { \ + printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \ + } \ + } \ + else { \ + double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \ + char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \ + \ + if( OMPI_TIMING.cnt > 0 ) { \ + OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ + ompi_timing_list_t *timing = OMPI_TIMING.timing; \ + i = 0; \ + do { \ + int use; \ + for (use = 0; use < timing->use; use++) { \ + MPI_Reduce(&timing->val[use].ts, avg + i, 1, \ + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, min + i, 1, \ + MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \ + MPI_Reduce(&timing->val[use].ts, max + i, 1, \ + MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \ + desc[i] = timing->val[use].desc; \ + prefix[i] = timing->val[use].prefix; \ + file[i] = timing->val[use].file; \ + i++; \ + } \ + timing = (ompi_timing_list_t*)timing->next; \ + } while (timing != NULL); \ + \ + if( 0 == rank ){ \ + if (OMPI_TIMING.timing->next) { \ + printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \ + "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \ + } \ + \ + printf("------------------ %s ------------------\n", \ + OMPI_TIMING.prefix); \ + for(i=0; i< OMPI_TIMING.cnt; i++){ \ + avg[i] /= size; \ + printf("[%s:%s:%s]: %lf / %lf / %lf\n", \ + file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \ + } \ + printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \ + OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \ + } \ + } \ + free(avg); \ + free(min); \ + free(max); \ + free(desc); \ + free(prefix); \ + free(file); \ + } \ + } \ + } while(0) #else #define OMPI_TIMING_INIT(size) diff --git a/opal/mca/pmix/pmix2x/configure.m4 b/opal/mca/pmix/pmix2x/configure.m4 index 928be5e263..043cf361b0 100644 --- a/opal/mca/pmix/pmix2x/configure.m4 +++ b/opal/mca/pmix/pmix2x/configure.m4 @@ -49,7 +49,19 @@ AC_DEFUN([MCA_opal_pmix_pmix2x_CONFIG],[ opal_pmix_pmix2x_sm_flag=--disable-dstore fi - opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag --without-tests-examples --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" + AC_ARG_ENABLE([pmix-timing], + [AC_HELP_STRING([--enable-pmix-timing], + [Enable PMIx timing measurements (default: disabled)])]) + AC_MSG_CHECKING([if PMIx timing is enabled]) + if test "$enable_pmix_timing" == "yes"; then + AC_MSG_RESULT([yes]) + opal_pmix_pmix2x_timing_flag=--enable-pmix-timing + else + AC_MSG_RESULT([no (disabled)]) + opal_pmix_pmix2x_timing_flag=--disable-pmix-timing + fi + + opal_pmix_pmix2x_args="--with-pmix-symbol-rename=OPAL_MCA_PMIX2X_ $opal_pmix_pmix2x_sm_flag $opal_pmix_pmix2x_timing_flag --without-tests-examples --disable-visibility --enable-embedded-libevent --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\"" AS_IF([test "$enable_debug" = "yes"], [opal_pmix_pmix2x_args="--enable-debug $opal_pmix_pmix2x_args" CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g"], diff --git a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 index bde0572d35..8a0bc0abad 100644 --- a/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 +++ b/opal/mca/pmix/pmix2x/pmix/config/pmix.m4 @@ -950,18 +950,18 @@ AC_MSG_RESULT([$with_ident_string]) # Timing support # AC_MSG_CHECKING([if want developer-level timing support]) -AC_ARG_ENABLE(timing, - AC_HELP_STRING([--enable-timing], +AC_ARG_ENABLE(pmix-timing, + AC_HELP_STRING([--enable-pmix-timing], [enable developer-level timing code (default: disabled)])) -if test "$enable_timing" = "yes"; then +if test "$enable_pmix_timing" = "yes"; then AC_MSG_RESULT([yes]) - WANT_TIMING=1 + WANT_PMIX_TIMING=1 else AC_MSG_RESULT([no]) - WANT_TIMING=0 + WANT_PMIX_TIMING=0 fi -AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_TIMING], +AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_PMIX_TIMING], [Whether we want developer-level timing support or not]) # diff --git a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c index eeea597f7d..e2c60025bb 100644 --- a/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c +++ b/opal/mca/pmix/pmix2x/pmix/src/runtime/pmix_params.c @@ -21,7 +21,7 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Mellanox Technologies, Inc. * All rights reserved. - * Copyright (c) 2016 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,7 +37,6 @@ #include "src/util/timings.h" #if PMIX_ENABLE_TIMING -char *pmix_timing_sync_file = NULL; char *pmix_timing_output = NULL; bool pmix_timing_overhead = true; #endif @@ -56,16 +55,6 @@ pmix_status_t pmix_register_params(void) pmix_register_done = true; #if PMIX_ENABLE_TIMING - pmix_timing_sync_file = NULL; - (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "timing_sync_file", - "Clock synchronisation information generated by mpisync tool. You don't need to touch this if you use mpirun_prof tool.", - PMIX_MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - PMIX_INFO_LVL_9, PMIX_MCA_BASE_VAR_SCOPE_ALL, - &pmix_timing_sync_file); - if( pmix_timing_clocksync_read(pmix_timing_sync_file) ){ - pmix_output(0, "Cannot read file %s containing clock synchronisation information\n", pmix_timing_sync_file); - } - pmix_timing_output = NULL; (void) pmix_mca_base_var_register ("pmix", "pmix", NULL, "timing_output", "The name of output file for timing information. If this parameter is not set then output will be directed into PMIX debug channel.", diff --git a/opal/util/timings.h b/opal/util/timings.h index 2bd41b6a4a..1cab4a8742 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2014 Artem Polyakov - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,47 +39,46 @@ typedef struct { opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); -#define OPAL_TIMING_ENV_START_TYPE(func, type, prefix) ({ \ - opal_timing_env_t h; \ - char *ptr = NULL; \ - char *_prefix = prefix; \ - int n; \ - if( NULL == prefix ){ \ - _prefix = ""; \ - } \ - h.error = 0; \ - n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ - if( n > OPAL_TIMING_STR_LEN ){ \ - h.error = 1; \ - } \ - n = sprintf(h.cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, h.id); \ - if( n > OPAL_TIMING_STR_LEN ){ \ - h.error = 1; \ - } \ - ptr = getenv(h.id); \ - if( NULL == ptr || strcmp(ptr, "1")){ \ - h.enabled = 0; \ - } \ - h.get_ts = opal_timing_ts_func(type); \ - ptr = getenv("OPAL_TIMING_ENABLE"); \ - if (NULL != ptr) { \ - h.enabled = atoi(ptr); \ - } \ - h.cntr = 0; \ - ptr = getenv(h.id); \ - if( NULL != ptr ){ \ - h.cntr = atoi(ptr); \ - } \ - h.ts = h.get_ts(); \ - if ( 0 != h.error ){ \ - h.enabled = 0; \ - } \ - h; \ -}) +#define OPAL_TIMING_ENV_START_TYPE(func, _nm, type, prefix) \ + do { \ + char *ptr = NULL; \ + char *_prefix = prefix; \ + int n; \ + if( NULL == prefix ){ \ + _prefix = ""; \ + } \ + (_nm)->error = 0; \ + n = snprintf((_nm)->id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + (_nm)->error = 1; \ + } \ + n = sprintf((_nm)->cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, (_nm)->id); \ + if( n > OPAL_TIMING_STR_LEN ){ \ + (_nm)->error = 1; \ + } \ + ptr = getenv((_nm)->id); \ + if( NULL == ptr || strcmp(ptr, "1")){ \ + (_nm)->enabled = 0; \ + } \ + (_nm)->get_ts = opal_timing_ts_func(type); \ + ptr = getenv("OPAL_TIMING_ENABLE"); \ + if (NULL != ptr) { \ + (_nm)->enabled = atoi(ptr); \ + } \ + (_nm)->cntr = 0; \ + ptr = getenv((_nm)->id); \ + if( NULL != ptr ){ \ + (_nm)->cntr = atoi(ptr); \ + } \ + (_nm)->ts = (_nm)->get_ts(); \ + if ( 0 != (_nm)->error ){ \ + (_nm)->enabled = 0; \ + } \ + } while(0) #define OPAL_TIMING_ENV_INIT(name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, ""); + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, ""); /* We use function names for identification * however this might be a problem for the private @@ -88,111 +87,114 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); * Use prefix to do a finer-grained identification if needed */ #define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - *name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); + do { \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + *name = OPAL_TIMING_ENV_START_TYPE(__func__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \ + } while(0) -#define OPAL_TIMING_ENV_NEXT(h, fmt, ...) ({ \ - int n; \ - char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ - double time; \ - char *filename; \ - if( h->enabled ){ \ - /* enabled codepath */ \ - time = h->get_ts() - h->ts; \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_DESC_%d", h->id, h->cntr); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, fmt, ## __VA_ARGS__ ); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_VAL_%d", h->id, h->cntr); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%lf", time); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - filename = strrchr(__FILE__, '/') + 1; \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%s", filename); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - h->cntr++; \ - sprintf(buf1, "%d", h->cntr); \ - setenv(h->cntr_env, buf1, 1); \ - /* We don't include env operations into the consideration. - * Hopefully this will help to make measurements more accurate. - */ \ - h->ts = h->get_ts(); \ - } \ - if (h->error) { \ - n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_ERROR", h->id);\ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%d", h->error); \ - if ( n > OPAL_TIMING_STR_LEN ){ \ - h->error = 1; \ - } \ - setenv(buf1, buf2, 1); \ - } \ -}) +#define OPAL_TIMING_ENV_NEXT(h, ...) \ + do { \ + int n; \ + char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ + double time; \ + char *filename; \ + if( h->enabled ){ \ + /* enabled codepath */ \ + time = h->get_ts() - h->ts; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_DESC_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, __VA_ARGS__ ); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_VAL_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%lf", time); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + filename = strrchr(__FILE__, '/') + 1; \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_FILE_%d", h->id, h->cntr); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%s", filename); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + h->cntr++; \ + sprintf(buf1, "%d", h->cntr); \ + setenv(h->cntr_env, buf1, 1); \ + /* We don't include env operations into the consideration. + * Hopefully this will help to make measurements more accurate. + */ \ + h->ts = h->get_ts(); \ + } \ + if (h->error) { \ + n = snprintf(buf1, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s_ERROR", h->id);\ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, "%d", h->error); \ + if ( n > OPAL_TIMING_STR_LEN ){ \ + h->error = 1; \ + } \ + setenv(buf1, buf2, 1); \ + } \ + } while(0) /* This function supposed to be called from the code that will * do the postprocessing, i.e. OMPI timing portion that will * do the reduction of accumulated values */ -#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func) ({ \ - char ename[OPAL_TIMING_STR_LEN]; \ - int cnt = 0; \ - char *ptr = NULL; \ - int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ - if ( n <= OPAL_TIMING_STR_LEN ){ \ - ptr = getenv(ename); \ - if( NULL != ptr ){ cnt = atoi(ptr); }; \ - } \ - cnt; \ -}) +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func, _cnt) \ + do { \ + char ename[OPAL_TIMING_STR_LEN]; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ + (_cnt) = 0; \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ (_cnt) = atoi(ptr); }; \ + } \ + } while(0) -#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func) ({ \ - char ename[OPAL_TIMING_STR_LEN]; \ - int error = 0; \ - char *ptr = NULL; \ - int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ - if ( n <= OPAL_TIMING_STR_LEN ){ \ - ptr = getenv(ename); \ - if( NULL != ptr ){ error = atoi(ptr); }; \ - } \ - error; \ -}) +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func, _err) \ + do { \ + char ename[OPAL_TIMING_STR_LEN]; \ + (_err) = 0; \ + char *ptr = NULL; \ + int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ + if ( n <= OPAL_TIMING_STR_LEN ){ \ + ptr = getenv(ename); \ + if( NULL != ptr ){ (_err) = atoi(ptr); }; \ + } \ + } while(0) -#define OPAL_TIMING_ENV_CNT(func) \ - OPAL_TIMING_ENV_CNT_PREFIX("", func) +#define OPAL_TIMING_ENV_CNT(func, _cnt) \ + OPAL_TIMING_ENV_CNT_PREFIX("", func, _cnt) -#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc) ({ \ - char vname[OPAL_TIMING_STR_LEN]; \ - double ts = 0.0; \ - sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ - *filename = getenv(vname); \ - sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ - *desc = getenv(vname); \ - sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ - char *ptr = getenv(vname); \ - if ( NULL != ptr ) { \ - sscanf(ptr,"%lf", &ts); \ - } \ - ts; \ -}) +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc, _t) \ + do { \ + char vname[OPAL_TIMING_STR_LEN]; \ + (_t) = 0.0; \ + sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ + *filename = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ + *desc = getenv(vname); \ + sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ + char *ptr = getenv(vname); \ + if ( NULL != ptr ) { \ + sscanf(ptr,"%lf", &(_t)); \ + } \ + } while(0) #define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ OPAL_TIMING_ENV_GETDESC_PREFIX("", file, func, index, desc) diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index dcb3c9fafd..c5e409c561 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -343,9 +343,6 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata) static int read_bytes(mca_oob_tcp_peer_t* peer) { int rc; -#if OPAL_ENABLE_TIMING - int to_read = peer->recv_msg->rdbytes; -#endif /* read until all bytes recvd or error */ while (0 < peer->recv_msg->rdbytes) { @@ -431,9 +428,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata; int rc; orte_rml_send_t *snd; -#if OPAL_ENABLE_TIMING - bool timing_same_as_hdr = false; -#endif opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called for peer %s", @@ -503,13 +497,7 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler read hdr", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); -#if OPAL_ENABLE_TIMING - int to_recv = peer->recv_msg->rdbytes; -#endif if (ORTE_SUCCESS == (rc = read_bytes(peer))) { -#if OPAL_ENABLE_TIMING - timing_same_as_hdr = true; -#endif /* completed reading the header */ peer->recv_msg->hdr_recvd = true; /* convert the header */ From d132eab4a57a621489a44c8e71784861c68e56a9 Mon Sep 17 00:00:00 2001 From: Boris Karasev Date: Mon, 10 Apr 2017 21:42:51 +0600 Subject: [PATCH 9/9] ompi/timings: fixed the error of opal timings env import Signed-off-by: Boris Karasev --- ompi/Makefile.am | 1 + ompi/util/Makefile.am | 13 ++++++++ ompi/util/timings.h | 56 +++++++++++++++++-------------- opal/util/timings.c | 1 + opal/util/timings.h | 78 ++++++++++++++++++++++--------------------- 5 files changed, 86 insertions(+), 63 deletions(-) create mode 100644 ompi/util/Makefile.am diff --git a/ompi/Makefile.am b/ompi/Makefile.am index f8e9b802f1..abe0f1da14 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -192,6 +192,7 @@ include mpiext/Makefile.am include patterns/net/Makefile.am include patterns/comm/Makefile.am include mca/Makefile.am +include util/Makefile.am # Ensure that the man page directory exists before we try to make man # page files (because ompi/mpi/man/man3 has no config.status-generated diff --git a/ompi/util/Makefile.am b/ompi/util/Makefile.am new file mode 100644 index 0000000000..45f01c7706 --- /dev/null +++ b/ompi/util/Makefile.am @@ -0,0 +1,13 @@ +# -*- makefile -*- +# +# Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Source code files +headers += \ + util/timings.h diff --git a/ompi/util/timings.h b/ompi/util/timings.h index eddea59f9a..8ad8111993 100644 --- a/ompi/util/timings.h +++ b/ompi/util/timings.h @@ -1,3 +1,13 @@ +/* + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2017 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + #ifndef OMPI_UTIL_TIMING_H #define OMPI_UTIL_TIMING_H @@ -33,7 +43,7 @@ typedef struct ompi_timing_t { #define OMPI_TIMING_INIT(_size) \ ompi_timing_t OMPI_TIMING; \ - OMPI_TIMING.prefix = __func__; \ + OMPI_TIMING.prefix = __func__; \ OMPI_TIMING.size = _size; \ OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \ OMPI_TIMING.cnt = 0; \ @@ -55,8 +65,8 @@ typedef struct ompi_timing_t { } \ } -#define OMPI_TIMING_ITEM_EXTEND \ - do { \ +#define OMPI_TIMING_ITEM_EXTEND \ + do { \ if (OMPI_TIMING.enabled) { \ OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \ OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \ @@ -65,8 +75,8 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_FINALIZE \ - do { \ +#define OMPI_TIMING_FINALIZE \ + do { \ if (OMPI_TIMING.enabled) { \ ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \ while ( NULL != t) { \ @@ -81,8 +91,8 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_NEXT(...) \ - do { \ +#define OMPI_TIMING_NEXT(...) \ + do { \ if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ char *f = strrchr(__FILE__, '/') + 1; \ int len = 0; \ @@ -90,7 +100,7 @@ typedef struct ompi_timing_t { OMPI_TIMING_ITEM_EXTEND; \ } \ len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \ - OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \ + OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \ if (len >= OPAL_TIMING_STR_LEN) { \ OMPI_TIMING.error = 1; \ } \ @@ -103,8 +113,8 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_APPEND(filename,func,desc,ts) \ - do { \ +#define OMPI_TIMING_APPEND(filename,func,desc,ts) \ + do { \ if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \ OMPI_TIMING_ITEM_EXTEND; \ } \ @@ -115,20 +125,18 @@ typedef struct ompi_timing_t { } \ OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \ OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \ - OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \ - OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \ + OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = ts; \ OMPI_TIMING.cnt++; \ - OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \ } while(0) -#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \ - do { \ +#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \ + do { \ if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \ - int cnt; \ + int cnt; \ int i; \ - double ts; \ - OPAL_TIMING_ENV_CNT(func, cnt); \ - OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \ + double ts; \ + OPAL_TIMING_ENV_CNT(func, cnt); \ + OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \ for(i = 0; i < cnt; i++){ \ char *desc, *filename; \ OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \ @@ -137,13 +145,11 @@ typedef struct ompi_timing_t { } \ } while(0) -#define OMPI_TIMING_IMPORT_OPAL(func) \ - OMPI_TIMING_IMPORT_OPAL_PREFIX("", func) +#define OMPI_TIMING_IMPORT_OPAL(func) \ + OMPI_TIMING_IMPORT_OPAL_PREFIX("", func); - - -#define OMPI_TIMING_OUT \ - do { \ +#define OMPI_TIMING_OUT \ + do { \ if (OMPI_TIMING.enabled) { \ int i, size, rank; \ MPI_Comm_size(MPI_COMM_WORLD, &size); \ diff --git a/opal/util/timings.c b/opal/util/timings.c index 775c08135d..7e9981fcb3 100644 --- a/opal/util/timings.c +++ b/opal/util/timings.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/util/timings.h b/opal/util/timings.h index 1cab4a8742..a5f4998132 100644 --- a/opal/util/timings.h +++ b/opal/util/timings.h @@ -1,6 +1,7 @@ /* * Copyright (C) 2014 Artem Polyakov * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,7 +40,7 @@ typedef struct { opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); -#define OPAL_TIMING_ENV_START_TYPE(func, _nm, type, prefix) \ +#define OPAL_TIMING_ENV_START_TYPE(func, _nm, type, prefix) \ do { \ char *ptr = NULL; \ char *_prefix = prefix; \ @@ -47,38 +48,39 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); if( NULL == prefix ){ \ _prefix = ""; \ } \ - (_nm)->error = 0; \ - n = snprintf((_nm)->id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ + (_nm)->error = 0; \ + n = snprintf((_nm)->id, OPAL_TIMING_STR_LEN, "%s%s", _prefix, func); \ if( n > OPAL_TIMING_STR_LEN ){ \ - (_nm)->error = 1; \ + (_nm)->error = 1; \ } \ - n = sprintf((_nm)->cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, (_nm)->id); \ + n = sprintf((_nm)->cntr_env,"OMPI_TIMING_%s%s_CNT", prefix, (_nm)->id); \ if( n > OPAL_TIMING_STR_LEN ){ \ - (_nm)->error = 1; \ + (_nm)->error = 1; \ } \ - ptr = getenv((_nm)->id); \ + ptr = getenv((_nm)->id); \ if( NULL == ptr || strcmp(ptr, "1")){ \ - (_nm)->enabled = 0; \ + (_nm)->enabled = 0; \ } \ - (_nm)->get_ts = opal_timing_ts_func(type); \ + (_nm)->get_ts = opal_timing_ts_func(type); \ ptr = getenv("OPAL_TIMING_ENABLE"); \ if (NULL != ptr) { \ - (_nm)->enabled = atoi(ptr); \ + (_nm)->enabled = atoi(ptr); \ } \ - (_nm)->cntr = 0; \ - ptr = getenv((_nm)->id); \ + (_nm)->cntr = 0; \ + ptr = getenv((_nm)->id); \ if( NULL != ptr ){ \ - (_nm)->cntr = atoi(ptr); \ + (_nm)->cntr = atoi(ptr); \ } \ - (_nm)->ts = (_nm)->get_ts(); \ - if ( 0 != (_nm)->error ){ \ - (_nm)->enabled = 0; \ + (_nm)->ts = (_nm)->get_ts(); \ + if ( 0 != (_nm)->error ){ \ + (_nm)->enabled = 0; \ } \ } while(0) -#define OPAL_TIMING_ENV_INIT(name) \ - opal_timing_env_t name ## _val, *name = &(name ## _val); \ - OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, ""); +#define OPAL_TIMING_ENV_INIT(name) \ + opal_timing_env_t name ## _val, *name = &(name ## _val); \ + OPAL_TIMING_ENV_START_TYPE(__func__, name, OPAL_TIMING_AUTOMATIC_TIMER, ""); + /* We use function names for identification * however this might be a problem for the private @@ -86,14 +88,14 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); * conflict. * Use prefix to do a finer-grained identification if needed */ -#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ - do { \ +#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \ + do { \ opal_timing_env_t name ## _val, *name = &(name ## _val); \ *name = OPAL_TIMING_ENV_START_TYPE(__func__, OPAL_TIMING_AUTOMATIC_TIMER, prefix); \ } while(0) -#define OPAL_TIMING_ENV_NEXT(h, ...) \ - do { \ +#define OPAL_TIMING_ENV_NEXT(h, ...) \ + do { \ int n; \ char buf1[OPAL_TIMING_STR_LEN], buf2[OPAL_TIMING_STR_LEN]; \ double time; \ @@ -105,7 +107,7 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); if ( n > OPAL_TIMING_STR_LEN ){ \ h->error = 1; \ } \ - n = snprintf(buf2, OPAL_TIMING_STR_LEN, __VA_ARGS__ ); \ + n = snprintf(buf2, OPAL_TIMING_STR_LEN, __VA_ARGS__ ); \ if ( n > OPAL_TIMING_STR_LEN ){ \ h->error = 1; \ } \ @@ -154,37 +156,37 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); * do the postprocessing, i.e. OMPI timing portion that will * do the reduction of accumulated values */ -#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func, _cnt) \ - do { \ +#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func, _cnt) \ + do { \ char ename[OPAL_TIMING_STR_LEN]; \ char *ptr = NULL; \ int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_CNT", prefix, func); \ - (_cnt) = 0; \ + (_cnt) = 0; \ if ( n <= OPAL_TIMING_STR_LEN ){ \ ptr = getenv(ename); \ - if( NULL != ptr ){ (_cnt) = atoi(ptr); }; \ + if( NULL != ptr ){ (_cnt) = atoi(ptr); }; \ } \ } while(0) -#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func, _err) \ - do { \ +#define OPAL_TIMING_ENV_ERROR_PREFIX(prefix, func, _err) \ + do { \ char ename[OPAL_TIMING_STR_LEN]; \ - (_err) = 0; \ + (_err) = 0; \ char *ptr = NULL; \ int n = snprintf(ename, OPAL_TIMING_STR_LEN, "OMPI_TIMING_%s%s_ERROR", prefix, func); \ if ( n <= OPAL_TIMING_STR_LEN ){ \ ptr = getenv(ename); \ - if( NULL != ptr ){ (_err) = atoi(ptr); }; \ + if( NULL != ptr ){ (_err) = atoi(ptr); }; \ } \ } while(0) -#define OPAL_TIMING_ENV_CNT(func, _cnt) \ +#define OPAL_TIMING_ENV_CNT(func, _cnt) \ OPAL_TIMING_ENV_CNT_PREFIX("", func, _cnt) -#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc, _t) \ - do { \ +#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, filename, func, i, desc, _t) \ + do { \ char vname[OPAL_TIMING_STR_LEN]; \ - (_t) = 0.0; \ + (_t) = 0.0; \ sprintf(vname, "OMPI_TIMING_%s%s_FILE_%d", prefix, func, i); \ *filename = getenv(vname); \ sprintf(vname, "OMPI_TIMING_%s%s_DESC_%d", prefix, func, i); \ @@ -192,11 +194,11 @@ opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type); sprintf(vname, "OMPI_TIMING_%s%s_VAL_%d", prefix, func, i); \ char *ptr = getenv(vname); \ if ( NULL != ptr ) { \ - sscanf(ptr,"%lf", &(_t)); \ + sscanf(ptr,"%lf", &(_t)); \ } \ } while(0) -#define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ +#define OPAL_TIMING_ENV_GETDESC(file, func, index, desc) \ OPAL_TIMING_ENV_GETDESC_PREFIX("", file, func, index, desc) #else