1
1
openmpi/ompi/util/timings.h
Artem Polyakov 253502b1b1 timings: Fix timings when 'prefix' is used
Signed-off-by: Artem Polyakov <artpol84@gmail.com>
(cherry picked from commit 7c17a38c96)
2020-03-11 21:05:30 -07:00

292 строки
19 KiB
C

/*
* Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_UTIL_TIMING_H
#define OMPI_UTIL_TIMING_H
#include "opal/util/timings.h"
/* TODO: we need access to MPI_* functions */
#if (OPAL_ENABLE_TIMING)
typedef struct {
char desc[OPAL_TIMING_STR_LEN];
double ts;
char *file;
char *prefix;
int imported;
} ompi_timing_val_t;
typedef struct {
ompi_timing_val_t *val;
int use;
struct ompi_timing_list_t *next;
} ompi_timing_list_t;
typedef struct ompi_timing_t {
double ts;
const char *prefix;
int size;
int cnt;
int error;
int enabled;
int import_cnt;
opal_timing_ts_func_t get_ts;
ompi_timing_list_t *timing;
ompi_timing_list_t *cur_timing;
} ompi_timing_t;
#define OMPI_TIMING_ENABLED \
(getenv("OMPI_TIMING_ENABLE") ? atoi(getenv("OMPI_TIMING_ENABLE")) : 0)
#define OMPI_TIMING_INIT(_size) \
ompi_timing_t OMPI_TIMING; \
OMPI_TIMING.prefix = __func__; \
OMPI_TIMING.size = _size; \
OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \
OMPI_TIMING.cnt = 0; \
OMPI_TIMING.error = 0; \
OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
OMPI_TIMING.enabled = 0; \
OMPI_TIMING.import_cnt = 0; \
{ \
char *ptr; \
ptr = getenv("OMPI_TIMING_ENABLE"); \
if (NULL != ptr) { \
OMPI_TIMING.enabled = atoi(ptr); \
} \
if (OMPI_TIMING.enabled) { \
setenv("OPAL_TIMING_ENABLE", "1", 1); \
OMPI_TIMING.timing = (ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \
memset(OMPI_TIMING.timing, 0, sizeof(ompi_timing_list_t)); \
OMPI_TIMING.timing->val = (ompi_timing_val_t*)malloc(sizeof(ompi_timing_val_t) * _size); \
OMPI_TIMING.cur_timing = OMPI_TIMING.timing; \
} \
}
#define OMPI_TIMING_ITEM_EXTEND \
do { \
if (OMPI_TIMING.enabled) { \
OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \
OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \
memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \
OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \
} \
} while(0)
#define OMPI_TIMING_FINALIZE \
do { \
if (OMPI_TIMING.enabled) { \
ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \
while ( NULL != t) { \
tmp = t; \
t = (ompi_timing_list_t*)t->next; \
free(tmp->val); \
free(tmp); \
} \
OMPI_TIMING.timing = NULL; \
OMPI_TIMING.cur_timing = NULL; \
OMPI_TIMING.cnt = 0; \
} \
} while(0)
#define OMPI_TIMING_NEXT(...) \
do { \
if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \
char *f = strrchr(__FILE__, '/'); \
f = (f == NULL) ? strdup(__FILE__) : f+1; \
int len = 0; \
if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \
OMPI_TIMING_ITEM_EXTEND; \
} \
len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \
OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \
if (len >= OPAL_TIMING_STR_LEN) { \
OMPI_TIMING.error = 1; \
} \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = strdup(f); \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = strdup(__func__); \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \
OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \
OMPI_TIMING.cnt++; \
OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
} \
} while(0)
#define OMPI_TIMING_APPEND_PREFIX(filename, _prefix, func,desc,ts) \
do { \
if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \
OMPI_TIMING_ITEM_EXTEND; \
} \
int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \
OPAL_TIMING_STR_LEN, "%s", desc); \
if (len >= OPAL_TIMING_STR_LEN) { \
OMPI_TIMING.error = 1; \
} \
if( _prefix[0] ) { \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = \
func "/" _prefix; \
} else { \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func;\
} \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = ts; \
OMPI_TIMING.cnt++; \
} while(0)
#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \
do { \
if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \
int cnt; \
int i; \
double ts; \
OMPI_TIMING.import_cnt++; \
OPAL_TIMING_ENV_CNT_PREFIX(_prefix, func, cnt); \
OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \
for(i = 0; i < cnt; i++){ \
char *desc, *filename; \
OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].imported= \
OMPI_TIMING.import_cnt; \
OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \
OMPI_TIMING_APPEND_PREFIX(filename, _prefix, func, desc, ts); \
} \
} \
} while(0)
#define OMPI_TIMING_IMPORT_OPAL(func) \
OMPI_TIMING_IMPORT_OPAL_PREFIX("", func);
#define OMPI_TIMING_OUT \
do { \
if (OMPI_TIMING.enabled) { \
int i, size, rank; \
MPI_Comm_size(MPI_COMM_WORLD, &size); \
MPI_Comm_rank(MPI_COMM_WORLD, &rank); \
int error = 0; \
int imported = 0; \
\
MPI_Reduce(&OMPI_TIMING.error, &error, 1, \
MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \
\
if (error) { \
if (0 == rank) { \
printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \
} \
} \
else { \
double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \
double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \
double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \
char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
double total_avg = 0, total_min = 0, total_max = 0; \
\
if( OMPI_TIMING.cnt > 0 ) { \
OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
ompi_timing_list_t *timing = OMPI_TIMING.timing; \
i = 0; \
do { \
int use; \
for (use = 0; use < timing->use; use++) { \
MPI_Reduce(&timing->val[use].ts, avg + i, 1, \
MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \
MPI_Reduce(&timing->val[use].ts, min + i, 1, \
MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \
MPI_Reduce(&timing->val[use].ts, max + i, 1, \
MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \
desc[i] = timing->val[use].desc; \
prefix[i] = timing->val[use].prefix; \
file[i] = timing->val[use].file; \
i++; \
} \
timing = (ompi_timing_list_t*)timing->next; \
} while (timing != NULL); \
\
if( 0 == rank ) { \
if (OMPI_TIMING.timing->next) { \
printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \
"==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \
} \
\
printf("------------------ %s ------------------\n", \
OMPI_TIMING.prefix); \
imported = OMPI_TIMING.timing->val[0].imported; \
for(i=0; i< OMPI_TIMING.cnt; i++){ \
bool print_total = 0; \
imported = OMPI_TIMING.timing->val[i].imported; \
avg[i] /= size; \
printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
imported ? " -- " : "", \
file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \
if (OMPI_TIMING.timing->val[i].imported) { \
total_avg += avg[i]; \
total_min += min[i]; \
total_max += max[i]; \
} \
if (i == (OMPI_TIMING.cnt-1)) { \
print_total = true; \
} else { \
print_total = imported != OMPI_TIMING.timing->val[i+1].imported; \
} \
if (print_total && OMPI_TIMING.timing->val[i].imported) { \
printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
imported ? " !! " : "", \
file[i], prefix[i], "total", \
total_avg, total_min, total_max); \
total_avg = 0; total_min = 0; total_max = 0; \
} \
} \
total_avg = 0; total_min = 0; total_max = 0; \
for(i=0; i< OMPI_TIMING.cnt; i++) { \
if (!OMPI_TIMING.timing->val[i].imported) { \
total_avg += avg[i]; \
total_min += min[i]; \
total_max += max[i]; \
} \
} \
printf("[%s:total] %lf / %lf / %lf\n", \
OMPI_TIMING.prefix, \
total_avg, total_min, total_max); \
printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \
OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
} \
} \
free(avg); \
free(min); \
free(max); \
free(desc); \
free(prefix); \
free(file); \
} \
} \
} while(0)
#else
#define OMPI_TIMING_INIT(size)
#define OMPI_TIMING_NEXT(...)
#define OMPI_TIMING_APPEND(desc,ts)
#define OMPI_TIMING_OUT
#define OMPI_TIMING_IMPORT_OPAL(func)
#define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func)
#define OMPI_TIMING_FINALIZE
#define OMPI_TIMING_ENABLED 0
#endif
#endif