ompi/timings: add OMPI-level timing framework.
This is an extension of OPAL timing framework that allows to use MPI_reduce to provide the compact representation of the collected timings throughout the whole application. NOTE: the functionality is disabled now, it will be enabled after the runtime verification. Signed-off-by: Artem Polyakov <artpol84@gmail.com>
Этот коммит содержится в:
родитель
45898a9c65
Коммит
e3acf2a339
@ -93,6 +93,7 @@
|
|||||||
#include "ompi/dpm/dpm.h"
|
#include "ompi/dpm/dpm.h"
|
||||||
#include "ompi/mpiext/mpiext.h"
|
#include "ompi/mpiext/mpiext.h"
|
||||||
#include "ompi/mca/hook/base/base.h"
|
#include "ompi/mca/hook/base/base.h"
|
||||||
|
#include "ompi/util/timings.h"
|
||||||
|
|
||||||
#if OPAL_ENABLE_FT_CR == 1
|
#if OPAL_ENABLE_FT_CR == 1
|
||||||
#include "ompi/mca/crcp/crcp.h"
|
#include "ompi/mca/crcp/crcp.h"
|
||||||
@ -348,6 +349,9 @@ static int ompi_register_mca_variables(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* check to see if we want timing information */
|
/* check to see if we want timing information */
|
||||||
|
/* TODO: enable OMPI init and OMPI finalize timings if
|
||||||
|
* this variable was set to 1!
|
||||||
|
*/
|
||||||
ompi_enable_timing = false;
|
ompi_enable_timing = false;
|
||||||
(void) mca_base_var_register("ompi", "ompi", NULL, "timing",
|
(void) mca_base_var_register("ompi", "ompi", NULL, "timing",
|
||||||
"Request that critical timing loops be measured",
|
"Request that critical timing loops be measured",
|
||||||
@ -375,8 +379,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
volatile bool active;
|
volatile bool active;
|
||||||
opal_list_t info;
|
opal_list_t info;
|
||||||
opal_value_t *kv;
|
opal_value_t *kv;
|
||||||
//OPAL_TIMING_DECLARE(tm);
|
|
||||||
//OPAL_TIMING_INIT_EXT(&tm, OPAL_TIMING_GET_TIME_OF_DAY);
|
OMPI_TIMING_INIT(32);
|
||||||
|
|
||||||
/* bitflag of the thread level support provided. To be used
|
/* bitflag of the thread level support provided. To be used
|
||||||
* for the modex in order to work in heterogeneous environments. */
|
* for the modex in order to work in heterogeneous environments. */
|
||||||
@ -479,7 +483,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided);
|
ompi_hook_base_mpi_init_top_post_opal(argc, argv, requested, provided);
|
||||||
|
|
||||||
|
|
||||||
//OPAL_TIMING_MSTART((&tm,"time from start to completion of rte_init"));
|
OMPI_TIMING_NEXT("initialization");
|
||||||
|
|
||||||
/* if we were not externally started, then we need to setup
|
/* if we were not externally started, then we need to setup
|
||||||
* some envars so the MPI_INFO_ENV can get the cmd name
|
* some envars so the MPI_INFO_ENV can get the cmd name
|
||||||
@ -508,10 +512,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
error = "ompi_mpi_init: ompi_rte_init failed";
|
error = "ompi_mpi_init: ompi_rte_init failed";
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
ompi_rte_initialized = true;
|
|
||||||
|
|
||||||
/* check for timing request - get stop time and report elapsed time if so */
|
OMPI_TIMING_NEXT("rte_init");
|
||||||
//OPAL_TIMING_MNEXT((&tm,"time from completion of rte_init to modex"));
|
|
||||||
|
ompi_rte_initialized = true;
|
||||||
|
|
||||||
/* Register the default errhandler callback */
|
/* Register the default errhandler callback */
|
||||||
errtrk.status = OPAL_ERROR;
|
errtrk.status = OPAL_ERROR;
|
||||||
@ -638,8 +642,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check for timing request - get stop time and report elapsed time if so */
|
OMPI_TIMING_IMPORT_OPAL("orte_init");
|
||||||
//OPAL_TIMING_MNEXT((&tm,"time to execute modex"));
|
OMPI_TIMING_NEXT("rte_init-modex");
|
||||||
|
|
||||||
|
|
||||||
/* exchange connection info - this function may also act as a barrier
|
/* exchange connection info - this function may also act as a barrier
|
||||||
* if data exchange is required. The modex occurs solely across procs
|
* if data exchange is required. The modex occurs solely across procs
|
||||||
@ -657,7 +662,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier"));
|
OMPI_TIMING_NEXT("modex");
|
||||||
|
|
||||||
/* select buffered send allocator component to be used */
|
/* select buffered send allocator component to be used */
|
||||||
if( OMPI_SUCCESS !=
|
if( OMPI_SUCCESS !=
|
||||||
@ -818,7 +823,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
ompi_rte_wait_for_debugger();
|
ompi_rte_wait_for_debugger();
|
||||||
|
|
||||||
/* Next timing measurement */
|
/* Next timing measurement */
|
||||||
//OPAL_TIMING_MNEXT((&tm,"time to execute barrier"));
|
OMPI_TIMING_NEXT("modex-barrier");
|
||||||
|
|
||||||
/* wait for everyone to reach this point - this is a hard
|
/* wait for everyone to reach this point - this is a hard
|
||||||
* barrier requirement at this time, though we hope to relax
|
* barrier requirement at this time, though we hope to relax
|
||||||
@ -836,7 +841,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
|
|
||||||
/* check for timing request - get stop time and report elapsed
|
/* check for timing request - get stop time and report elapsed
|
||||||
time if so, then start the clock again */
|
time if so, then start the clock again */
|
||||||
//OPAL_TIMING_MNEXT((&tm,"time from barrier to complete mpi_init"));
|
OMPI_TIMING_NEXT("barrier");
|
||||||
|
|
||||||
#if OPAL_ENABLE_PROGRESS_THREADS == 0
|
#if OPAL_ENABLE_PROGRESS_THREADS == 0
|
||||||
/* Start setting up the event engine for MPI operations. Don't
|
/* Start setting up the event engine for MPI operations. Don't
|
||||||
@ -969,7 +974,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
|||||||
|
|
||||||
/* Finish last measurement, output results
|
/* Finish last measurement, output results
|
||||||
* and clear timing structure */
|
* and clear timing structure */
|
||||||
//OPAL_TIMING_DELTAS(ompi_enable_timing, &tm);
|
OMPI_TIMING_NEXT("barrier-finish");
|
||||||
|
OMPI_TIMING_OUT;
|
||||||
|
|
||||||
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
|
opal_mutex_unlock(&ompi_mpi_bootstrap_mutex);
|
||||||
|
|
||||||
|
133
ompi/util/timings.h
Обычный файл
133
ompi/util/timings.h
Обычный файл
@ -0,0 +1,133 @@
|
|||||||
|
#ifndef OMPI_UTIL_TIMING_H
|
||||||
|
#define OMPI_UTIL_TIMING_H
|
||||||
|
|
||||||
|
#include "opal/util/timings.h"
|
||||||
|
/* TODO: we need access to MPI_* functions */
|
||||||
|
|
||||||
|
#if (0 && OPAL_ENABLE_TIMING)
|
||||||
|
|
||||||
|
/* TODO: replace with opal_timing function */
|
||||||
|
static inline double OMPI_TIMING_GET_TS(void)
|
||||||
|
{
|
||||||
|
struct timespec ts;
|
||||||
|
double ret;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
|
ret = ts.tv_sec + 1E-9 * ts.tv_nsec;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* TODO:
|
||||||
|
* - create a structure to hold this variables
|
||||||
|
* - use dyncamically extendable arrays
|
||||||
|
*/
|
||||||
|
#define OMPI_TIMING_INIT(inum) \
|
||||||
|
double OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \
|
||||||
|
const char *OMPI_TIMING_prefix = __FUNCTION__; \
|
||||||
|
int OMPI_TIMING_cnt = 0; \
|
||||||
|
int OMPI_TIMING_inum = inum; \
|
||||||
|
double OMPI_TIMING_in[inum] = { 0.0 }; \
|
||||||
|
double OMPI_TIMING_max[inum] = { 0.0 }; \
|
||||||
|
double OMPI_TIMING_min[inum] = { 0.0 }; \
|
||||||
|
double OMPI_TIMING_avg[inum] = { 0.0 }; \
|
||||||
|
char *OMPI_TIMING_desc[inum] = { 0 }; \
|
||||||
|
|
||||||
|
|
||||||
|
/* TODO: provide printf-like interfase allowing to build a string
|
||||||
|
* at runtime, like OPAL_TIMING_NEXT()
|
||||||
|
*/
|
||||||
|
#define OMPI_TIMING_NEXT(desc) { \
|
||||||
|
char *ptr = strrchr(__FILE__, '/'); \
|
||||||
|
if( NULL == ptr ){ \
|
||||||
|
ptr = __FILE__; \
|
||||||
|
} else { \
|
||||||
|
ptr++; \
|
||||||
|
} \
|
||||||
|
if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \
|
||||||
|
printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \
|
||||||
|
ptr, __LINE__, __FUNCTION__); \
|
||||||
|
abort(); \
|
||||||
|
} \
|
||||||
|
OMPI_TIMING_in[OMPI_TIMING_cnt] = OMPI_TIMING_GET_TS() - OMPI_TIMING_ts; \
|
||||||
|
OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \
|
||||||
|
OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OMPI_TIMING_APPEND(desc,ts) { \
|
||||||
|
char *ptr = strrchr(__FILE__, '/'); \
|
||||||
|
if( NULL == ptr ){ \
|
||||||
|
ptr = __FILE__; \
|
||||||
|
} else { \
|
||||||
|
ptr++; \
|
||||||
|
} \
|
||||||
|
if( OMPI_TIMING_inum <= OMPI_TIMING_cnt ){ \
|
||||||
|
printf("OMPI_TIMING [%s:%d %s]: interval count overflow!!\n", \
|
||||||
|
ptr, __LINE__, __FUNCTION__); \
|
||||||
|
abort(); \
|
||||||
|
} \
|
||||||
|
OMPI_TIMING_in[OMPI_TIMING_cnt] = ts; \
|
||||||
|
OMPI_TIMING_desc[OMPI_TIMING_cnt++] = desc; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define OMPI_TIMING_IMPORT_OPAL(func) { \
|
||||||
|
char *enabled; \
|
||||||
|
int cnt = OPAL_TIMING_ENV_CNT(func); \
|
||||||
|
if( 0 < cnt ) { \
|
||||||
|
char ename[256]; \
|
||||||
|
sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \
|
||||||
|
setenv(ename, "1", 1); \
|
||||||
|
} \
|
||||||
|
int i; \
|
||||||
|
for(i = 0; i < cnt; i++){ \
|
||||||
|
char *desc; \
|
||||||
|
double ts = OPAL_TIMING_ENV_GETDESC(prefix, i, &desc); \
|
||||||
|
OMPI_TIMING_APPEND(desc, ts); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define OMPI_TIMING_OUT { \
|
||||||
|
int i, size, rank; \
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &size); \
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &rank); \
|
||||||
|
char ename[1024]; \
|
||||||
|
sprintf(ename, "OMPI_TIMING_%s", OMPI_TIMING_prefix); \
|
||||||
|
char *ptr = getenv(ename); \
|
||||||
|
\
|
||||||
|
if( NULL != ptr ) { \
|
||||||
|
OMPI_TIMING_ts = OMPI_TIMING_GET_TS(); \
|
||||||
|
MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_avg, OMPI_TIMING_cnt, MPI_DOUBLE, \
|
||||||
|
MPI_SUM, 0, MPI_COMM_WORLD); \
|
||||||
|
MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_min, OMPI_TIMING_cnt, MPI_DOUBLE, \
|
||||||
|
MPI_MIN, 0, MPI_COMM_WORLD); \
|
||||||
|
MPI_Reduce(OMPI_TIMING_in, OMPI_TIMING_max, OMPI_TIMING_cnt, MPI_DOUBLE, \
|
||||||
|
MPI_MAX, 0, MPI_COMM_WORLD); \
|
||||||
|
\
|
||||||
|
if( 0 == rank ){ \
|
||||||
|
printf("------------------ %s ------------------\n", \
|
||||||
|
OMPI_TIMING_prefix); \
|
||||||
|
for(i=0; i< OMPI_TIMING_cnt; i++){ \
|
||||||
|
OMPI_TIMING_avg[i] /= size; \
|
||||||
|
printf("[%s:%s]: %lf / %lf / %lf\n", \
|
||||||
|
OMPI_TIMING_prefix,OMPI_TIMING_desc[i], \
|
||||||
|
OMPI_TIMING_avg[i], OMPI_TIMING_min[i], OMPI_TIMING_max[i]); \
|
||||||
|
} \
|
||||||
|
printf("[%s:overhead]: %lf \n", OMPI_TIMING_prefix, \
|
||||||
|
OMPI_TIMING_GET_TS() - OMPI_TIMING_ts); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
#define OMPI_TIMING_INIT(inum)
|
||||||
|
|
||||||
|
#define OMPI_TIMING_NEXT(desc)
|
||||||
|
|
||||||
|
#define OMPI_TIMING_APPEND(desc,ts)
|
||||||
|
|
||||||
|
#define OMPI_TIMING_OUT
|
||||||
|
|
||||||
|
#define OMPI_TIMING_IMPORT_OPAL(func)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
Загрузка…
Ссылка в новой задаче
Block a user