1
1

ompi/timings: add OMPI-level timing framework.

This is an extension of OPAL timing framework that allows to use
MPI_reduce to provide the compact representation of the collected
timings throughout the whole application.

NOTE: the functionality is disabled now, it will be enabled after
the runtime verification.

Signed-off-by: Artem Polyakov <artpol84@gmail.com>
Этот коммит содержится в:
Artem Polyakov 2017-03-29 07:25:17 +07:00 коммит произвёл Boris Karasev
родитель 45898a9c65
Коммит e3acf2a339
2 изменённых файлов: 151 добавлений и 12 удалений

Просмотреть файл

@ -93,6 +93,7 @@
#include "ompi/dpm/dpm.h" #include "ompi/dpm/dpm.h"
#include "ompi/mpiext/mpiext.h" #include "ompi/mpiext/mpiext.h"
#include "ompi/mca/hook/base/base.h" #include "ompi/mca/hook/base/base.h"
#include "ompi/util/timings.h"
#if OPAL_ENABLE_FT_CR == 1 #if OPAL_ENABLE_FT_CR == 1
#include "ompi/mca/crcp/crcp.h" #include "ompi/mca/crcp/crcp.h"
@ -348,6 +349,9 @@ static int ompi_register_mca_variables(void)
} }
/* check to see if we want timing information */ /* check to see if we want timing information */
/* TODO: enable OMPI init and OMPI finalize timings if
* this variable was set to 1!
*/
ompi_enable_timing = false; ompi_enable_timing = false;
(void) mca_base_var_register("ompi", "ompi", NULL, "timing", (void) mca_base_var_register("ompi", "ompi", NULL, "timing",
"Request that critical timing loops be measured", "Request that critical timing loops be measured",
@ -375,8 +379,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
volatile bool active; volatile bool active;
opal_list_t info; opal_list_t info;
opal_value_t *kv; opal_value_t *kv;
//OPAL_TIMING_DECLARE(tm);
//OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY); OMPI_TIMING_INIT(32);
/* bitflag of the thread level support provided. To be used /* bitflag of the thread level support provided. To be used
* for the modex in order to work in heterogeneous environments. */ * for the modex in order to work in heterogeneous environments. */
@ -479,7 +483,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided); ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided);
//OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init")); OMPI_TIMING_NEXT("initialization");
/* if we were not externally started, then we need to setup /* if we were not externally started, then we need to setup
* some envars so the MPI_INFO_ENV can get the cmd name * some envars so the MPI_INFO_ENV can get the cmd name
@ -508,10 +512,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
error = "ompi_mpi_init: ompi_rte_init failed"; error = "ompi_mpi_init: ompi_rte_init failed";
goto error; goto error;
} }
ompi_rte_initialized = true;
/* check for timing request - get stop time and report elapsed time if so */ OMPI_TIMING_NEXT("rte_init");
//OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex"));
ompi_rte_initialized = true;
/* Register the default errhandler callback */ /* Register the default errhandler callback */
errtrk.status = OPAL_ERROR; errtrk.status = OPAL_ERROR;
@ -638,8 +642,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error; goto error;
} }
/* check for timing request - get stop time and report elapsed time if so */ OMPI_TIMING_IMPORT_OPAL("orte_init");
//OPAL_TIMING_MNEXT((&tm,"time to execute modex")); OMPI_TIMING_NEXT("rte_init-modex");
/* exchange connection info - this function may also act as a barrier /* exchange connection info - this function may also act as a barrier
* if data exchange is required. The modex occurs solely across procs * if data exchange is required. The modex occurs solely across procs
@ -657,7 +662,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
} }
} }
//OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); OMPI_TIMING_NEXT("modex");
/* select buffered send allocator component to be used */ /* select buffered send allocator component to be used */
if( OMPI_SUCCESS != if( OMPI_SUCCESS !=
@ -818,7 +823,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
ompi_rte_wait_for_debugger(); ompi_rte_wait_for_debugger();
/* Next timing measurement */ /* Next timing measurement */
//OPAL_TIMING_MNEXT((&tm,"time to execute barrier")); OMPI_TIMING_NEXT("modex-barrier");
/* wait for everyone to reach this point - this is a hard /* wait for everyone to reach this point - this is a hard
* barrier requirement at this time, though we hope to relax * barrier requirement at this time, though we hope to relax
@ -836,7 +841,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* check for timing request - get stop time and report elapsed /* check for timing request - get stop time and report elapsed
time if so, then start the clock again */ time if so, then start the clock again */
//OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init")); OMPI_TIMING_NEXT("barrier");
#if OPAL_ENABLE_PROGRESS_THREADS == 0 #if OPAL_ENABLE_PROGRESS_THREADS == 0
/* Start setting up the event engine for MPI operations. Don't /* Start setting up the event engine for MPI operations. Don't
@ -969,7 +974,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Finish last measurement, output results /* Finish last measurement, output results
* and clear timing structure */ * and clear timing structure */
//OPAL_TIMING_DELTAS(ompi_enable_timing, &tm); OMPI_TIMING_NEXT("barrier-finish");
OMPI_TIMING_OUT;
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex); opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);

133
ompi/util/timings.h Обычный файл
Просмотреть файл

@ -0,0 +1,133 @@
#ifndef OMPI_UTIL_TIMING_H
#define OMPI_UTIL_TIMING_H
#include "opal/util/timings.h"
/* TODO: we need access to MPI_* functions */
#if (0 && OPAL_ENABLE_TIMING)
/* TODO: replace with opal_timing function */
static inline double OMPI_TIMING_GET_TS(void)
{
struct timespec ts;
double ret;
clock_gettime(CLOCK_MONOTONIC, &ts);
ret = ts.tv_sec + 1E-9 * ts.tv_nsec;
return ret;
}
/* TODO:
* - create a structure to hold this variables
* - use dyncamically extendable arrays
*/
#define OMPI_TIMING_INIT(inum) \
double OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \
const char *OMPI_TIMING_prefix = __FUNCTION__; \
int OMPI_TIMING_cnt = 0; \
int OMPI_TIMING_inum = inum; \
double OMPI_TIMING_in[inum] = { 0.0 }; \
double OMPI_TIMING_max[inum] = { 0.0 }; \
double OMPI_TIMING_min[inum] = { 0.0 }; \
double OMPI_TIMING_avg[inum] = { 0.0 }; \
char *OMPI_TIMING_desc[inum] = { 0 }; \
/* TODO: provide printf-like interfase allowing to build a string
* at runtime, like OPAL_TIMING_NEXT()
*/
#define OMPI_TIMING_NEXT(desc) { \
char *ptr = strrchr(__FILE__, '/'); \
if( NULL == ptr ){ \
ptr = __FILE__; \
} else { \
ptr++; \
} \
if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \
printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \
ptr, __LINE__, __FUNCTION__); \
abort(); \
} \
OMPI_TIMING_in[OMPI_TIMING_cnt] = OMPI_TIMING_GET_TS() - OMPI_TIMING_ts; \
OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \
OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \
}
#define OMPI_TIMING_APPEND(desc,ts) { \
char *ptr = strrchr(__FILE__, '/'); \
if( NULL == ptr ){ \
ptr = __FILE__; \
} else { \
ptr++; \
} \
if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \
printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \
ptr, __LINE__, __FUNCTION__); \
abort(); \
} \
OMPI_TIMING_in[OMPI_TIMING_cnt] = ts; \
OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \
}
#define OMPI_TIMING_IMPORT_OPAL(func) { \
char *enabled; \
int cnt = OPAL_TIMING_ENV_CNT(func); \
if( 0 < cnt ) { \
char ename[256]; \
sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \
setenv(ename, "1", 1); \
} \
int i; \
for(i = 0; i < cnt; i++){ \
char *desc; \
double ts = OPAL_TIMING_ENV_GETDESC(prefix, i, &desc); \
OMPI_TIMING_APPEND(desc, ts); \
} \
}
#define OMPI_TIMING_OUT { \
int i, size, rank; \
MPI_Comm_size(MPI_COMM_WORLD, &size); \
MPI_Comm_rank(MPI_COMM_WORLD, &rank); \
char ename[1024]; \
sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \
char *ptr = getenv(ename); \
\
if( NULL != ptr ) { \
OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \
MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_avg, OMPI_TIMING_cnt, MPI_DOUBLE, \
MPI_SUM, 0, MPI_COMM_WORLD); \
MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_min, OMPI_TIMING_cnt, MPI_DOUBLE, \
MPI_MIN, 0, MPI_COMM_WORLD); \
MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_max, OMPI_TIMING_cnt, MPI_DOUBLE, \
MPI_MAX, 0, MPI_COMM_WORLD); \
\
if( 0 == rank ){ \
printf("------------------ %s ------------------\n", \
OMPI_TIMING_prefix); \
for(i=0; i< OMPI_TIMING_cnt; i++){ \
OMPI_TIMING_avg[i] /= size; \
printf("[%s:%s]: %lf / %lf / %lf\n", \
OMPI_TIMING_prefix,OMPI_TIMING_desc[i], \
OMPI_TIMING_avg[i], OMPI_TIMING_min[i], OMPI_TIMING_max[i]); \
} \
printf("[%s:overhead]: %lf \n", OMPI_TIMING_prefix, \
OMPI_TIMING_GET_TS() - OMPI_TIMING_ts); \
} \
} \
}
#else
#define OMPI_TIMING_INIT(inum)
#define OMPI_TIMING_NEXT(desc)
#define OMPI_TIMING_APPEND(desc,ts)
#define OMPI_TIMING_OUT
#define OMPI_TIMING_IMPORT_OPAL(func)
#endif
#endif