1
1

opal/timing: add the draft of env-based timings

This commit adds new timing feature that uses environment variables to
expose timing information. This allows easy access to this data (if
timing is enabled) from any other part of the application for the subsequent
postprocessing.
In particular this will be integrated with OMPI-level timing framework that
whill use MPI_Reduce functionality to provide more compact and easy-to use
information.

This commit also adds the example of usage of this framework by annotating
rte_init function. The result is not used anywhere for now. It will be
postprocessed in subsequent commits.

NOTE: that functionality is currently disabled untill it will be verified at runtime

Signed-off-by: Artem Polyakov <artpol84@gmail.com>
Этот коммит содержится в:
Artem Polyakov 2017-03-29 04:19:09 +07:00 коммит произвёл Boris Karasev
родитель 88ed79ea25
Коммит 45898a9c65
3 изменённых файлов: 218 добавлений и 2 удалений

Просмотреть файл

@ -41,8 +41,6 @@
#include MCA_timer_IMPLEMENTATION_HEADER
static opal_mutex_t tm_lock;
static double get_ts_gettimeofday(void)
{
double ret;

Просмотреть файл

@ -27,6 +27,193 @@ typedef enum {
typedef double (*opal_timing_ts_func_t)(void);
#define OPAL_TIMING_STR_LEN 256
typedef struct {
char id[OPAL_TIMING_STR_LEN], cntr_env[OPAL_TIMING_STR_LEN];
int enabled, error;
int cntr;
double ts;
opal_timing_ts_func_t get_ts;
} opal_timing_env_t;
opal_timing_ts_func_t opal_timing_ts_func(opal_timer_type_t type);
/* TODO: turn as much as possible into macro's
* once debugged
*/
static inline opal_timing_env_t
OPAL_TIMING_ENV_START_TYPE(char *func, opal_timer_type_t type, char *prefix)
{
opal_timing_env_t h;
int n;
/* TODO: remove this when tested! */
h.enabled = 0;
return h;
if( NULL == prefix ){
prefix = "";
}
h.error = 0;
n = snprintf(h.id, OPAL_TIMING_STR_LEN, "%s%s", prefix, func);
if( n > OPAL_TIMING_STR_LEN ){
/* TODO: output truncated:
* disable this timing and set the error
* sign
*/
}
/* TODO same length check here */
sprintf(h.cntr_env,"%s_CNT", h.id);
h.get_ts = opal_timing_ts_func(type);
h.ts = h.get_ts();
h.enabled = 1;
char *ptr = getenv(h.id);
if( NULL == ptr || strcmp(ptr, "1")){
h.enabled = 0;
}
ptr = getenv(h.cntr_env);
h.cntr = 0;
if( NULL != ptr ){
h.cntr = atoi(ptr);
}
return h;
}
#define OPAL_TIMING_ENV_INIT(name) \
opal_timing_env_t name ## _val, *name = &(name ## _val); \
*name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, "");
/* We use function names for identification
* however this might be a problem for the private
* functions declared as static as their names may
* conflict.
* Use prefix to do a finer-grained identification if needed
*/
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix, name) \
opal_timing_env_t name ## _val, *name = &(name ## _val); \
name = OPAL_TIMING_ENV_START_TYPE(__FUNCTION__, OPAL_TIMING_AUTOMATIC_TIMER, prefix);
/* TODO: according to https://en.wikipedia.org/wiki/C99
* varadic macroses are part of C99 and C11. Is it safe to use them here?
*/
static inline void
OPAL_TIMING_ENV_NEXT(opal_timing_env_t *h, char *fmt, ... )
{
if( !h->enabled ){
return;
}
/* enabled codepath */
va_list ap;
int n;
char buf[256], buf2[256];
double time = h->get_ts() - h->ts;
sprintf(buf, "%s_DESC_%d", h->id, h->cntr);
/* TODO: check that write succeded */
va_start(ap, fmt);
n= vsnprintf(buf2, 256, fmt, ap);
/* TODO: check that write succeded */
va_end(ap);
setenv(buf, buf2, 1);
sprintf(buf, "%s_VAL_%d", h->id, h->cntr);
/* TODO: check that write succeded */
sprintf(buf2, "%lf", time);
/* TODO: check that write succeded */
setenv(buf, buf2, 1);
h->cntr++;
sprintf(buf, "%d", h->cntr);
setenv(h->cntr_env, buf, 1);
/* We don't include env operations into the consideration.
* Hopefully this will help to make measurements more accurate.
*/
h->ts = h->get_ts();
}
/* This function supposed to be called from the code that will
* do the postprocessing, i.e. OMPI timing portion that will
* do the reduction of accumulated values
*/
/* TODO: turn into a macro */
static inline int OPAL_TIMING_ENV_CNT_PREFIX(char *prefix, char *func)
{
char ename[256];
sprintf(ename, "%s%s_CNT", prefix, func);
char *ptr = getenv(ename);
if( !ptr ){
return 0;
}
return atoi(ptr);
}
#define OPAL_TIMING_ENV_CNT(func) \
OPAL_TIMING_ENV_CNT_PREFIX("", char *func)
/* TODO: make a macro */
static inline double
OPAL_TIMING_ENV_GETDESC_PREFIX(char *prefix, char *func, int i, char **desc)
{
char vname[256];
double ts;
sprintf(vname, "%s_INT_%d_DESC", prefix, i);
*desc = getenv(vname);
sprintf(vname, "%s_INT_%d_VAL",prefix, i);
char *ptr = getenv(vname);
sscanf(ptr,"%lf", &ts);
return ts;
}
#define OPAL_TIMING_ENV_GETDESC(func, index, desc) \
OPAL_TIMING_ENV_GETDESC_PREFIX("", func, index, desc)
#define OSHTMNG_ENV_APPEND(prefix) { \
char *enabled; \
int cnt = OSHTMNG_ENV_COUNT(prefix); \
enabled = getenv(prefix); \
if( NULL != enabled && !strcmp(enabled, "1") ) { \
char ename[256]; \
sprintf(ename, "OSHTMNG_%s", OSHTMNG_prefix); \
setenv(ename, "1", 1); \
} \
int i; \
for(i = 0; i < cnt; i++){ \
char *desc; \
double ts = OSHTMNG_ENV_GETBYIDX(prefix, i, &desc); \
OSHTMNG_END1(desc, ts); \
} \
}
#else
#define OPAL_TIMING_ENV_START_TYPE(func, type, prefix)
#define OPAL_TIMING_ENV_INIT(name)
#define OPAL_TIMING_ENV_INIT_PREFIX(prefix)
/* TODO: according to https://en.wikipedia.org/wiki/C99
* varadic macroses are part of C99 and C11. Is it safe to use them here?
*/
#define OPAL_TIMING_ENV_NEXT(h, fmt, ... )
#define OPAL_TIMING_ENV_CNT_PREFIX(prefix, func)
#define OPAL_TIMING_ENV_CNT(func)
#define OPAL_TIMING_ENV_GETDESC_PREFIX(prefix, func, i, desc)
#define OPAL_TIMING_ENV_GETDESC(func, index, desc)
#endif
#endif

Просмотреть файл

@ -134,6 +134,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
{
int ret;
char *error = NULL;
OPAL_TIMING_ENV_INIT(tmng);
if (0 < orte_initialized) {
/* track number of times we have been called */
@ -152,12 +153,16 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
opal_snprintf_jobid = orte_util_snprintf_jobid;
opal_convert_string_to_jobid = _convert_string_to_jobid;
OPAL_TIMING_ENV_NEXT(tmng, "initializations");
/* initialize the opal layer */
if (ORTE_SUCCESS != (ret = opal_init(pargc, pargv))) {
error = "opal_init";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "opal_init");
/* ensure we know the type of proc for when we finalize */
orte_process_info.proc_type = flags;
@ -167,27 +172,37 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "orte_locks_init");
/* Register all MCA Params */
if (ORTE_SUCCESS != (ret = orte_register_params())) {
error = "orte_register_params";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "orte_register_params");
/* setup the orte_show_help system */
if (ORTE_SUCCESS != (ret = orte_show_help_init())) {
error = "opal_output_init";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "orte_show_help_init");
/* register handler for errnum -> string conversion */
opal_error_register("ORTE", ORTE_ERR_BASE, ORTE_ERR_MAX, orte_err2str);
OPAL_TIMING_ENV_NEXT(tmng, "opal_error_register");
/* Ensure the rest of the process info structure is initialized */
if (ORTE_SUCCESS != (ret = orte_proc_info())) {
error = "orte_proc_info";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "orte_proc_info");
/* we may have modified the local nodename according to
* request to retain/strip the FQDN and prefix, so update
* it here. The OPAL layer will strdup the hostname, so
@ -201,6 +216,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
/* let the pmix server register params */
pmix_server_register_params();
OPAL_TIMING_ENV_NEXT(tmng, "pmix_server_register_params");
}
/* open the SCHIZO framework as everyone needs it, and the
@ -210,13 +226,20 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
error = "orte_schizo_base_open";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "framework_open(schizo)");
if (ORTE_SUCCESS != (ret = orte_schizo_base_select())) {
error = "orte_schizo_base_select";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo_base_select");
/* if we are an app, let SCHIZO help us determine our environment */
if (ORTE_PROC_IS_APP) {
(void)orte_schizo.check_launch_environment();
OPAL_TIMING_ENV_NEXT(tmng, "orte_schizo.check_launch_environment");
}
/* open the ESS and select the correct module for this environment */
@ -225,11 +248,16 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
error = "orte_ess_base_open";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "framework_open(ess)");
if (ORTE_SUCCESS != (ret = orte_ess_base_select())) {
error = "orte_ess_base_select";
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "orte_ess_base_select");
if (!ORTE_PROC_IS_APP) {
/* ORTE tools "block" in their own loop over the event
* base, so no progress thread is required - apps will
@ -245,6 +273,8 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
goto error;
}
OPAL_TIMING_ENV_NEXT(tmng, "orte_ess.init");
/* set the remaining opal_process_info fields. Note that
* the OPAL layer will have initialized these to NULL, and
* anyone between us would not have strdup'd the string, so
@ -265,6 +295,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
}
}
OPAL_TIMING_ENV_NEXT(tmng, "finalize");
/* All done */
return ORTE_SUCCESS;