[Contribution from Artem - moved it to svn from git for him]
Replace our old, clunky timing setup with a much nicer one that is only available if configured with --enable-timing. Add a tool for profiling clock differences between the nodes so you can get more precise timing measurements. I'll ask Artem to update the Github wiki with full instructions on how to use this setup. This commit was SVN r32738.
Этот коммит содержится в:
родитель
9d39903a53
Коммит
dfb952fa78
@ -47,5 +47,6 @@ AC_DEFUN([OMPI_CONFIG_FILES],[
|
||||
ompi/tools/wrappers/ompi-cxx.pc
|
||||
ompi/tools/wrappers/ompi-fort.pc
|
||||
ompi/tools/wrappers/mpijavac.pl
|
||||
ompi/tools/mpisync/Makefile
|
||||
])
|
||||
])
|
||||
|
@ -168,6 +168,26 @@ else
|
||||
AC_MSG_RESULT([no])
|
||||
WANT_DEBUG=0
|
||||
fi
|
||||
|
||||
|
||||
AC_MSG_CHECKING([if want to developer-level timing framework])
|
||||
AC_ARG_ENABLE(timing,
|
||||
AC_HELP_STRING([--enable-timing],
|
||||
[enable developer-level timing code (not for general MPI users!) (default: disabled)]))
|
||||
if test "$enable_timing" = "yes"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
WANT_TIMING=1
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
WANT_TIMING=0
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED(OPAL_ENABLE_TIMING, $WANT_TIMING,
|
||||
[Whether we want developer-level timing framework or not])
|
||||
|
||||
AM_CONDITIONAL([OPAL_INSTALL_TIMING_BINARIES], [test "$WANT_TIMING" = "1" -a "$enable_binaries" != "no"])
|
||||
|
||||
|
||||
#################### Early development override ####################
|
||||
if test "$WANT_DEBUG" = "0" -a -z "$enable_debug" -a "$OPAL_DEVEL" = 1; then
|
||||
WANT_DEBUG=1
|
||||
|
@ -52,6 +52,7 @@
|
||||
#include "opal/mca/rcache/base/base.h"
|
||||
#include "opal/mca/allocator/base/base.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/constants.h"
|
||||
@ -94,9 +95,11 @@ int ompi_mpi_finalize(void)
|
||||
int ret;
|
||||
static int32_t finalize_has_already_started = 0;
|
||||
opal_list_item_t *item;
|
||||
struct timeval ompistart, ompistop;
|
||||
ompi_proc_t** procs;
|
||||
size_t nprocs;
|
||||
OPAL_TIMING_DECLARE(tm);
|
||||
OPAL_TIMING_INIT(&tm);
|
||||
|
||||
|
||||
/* Be a bit social if an erroneous program calls MPI_FINALIZE in
|
||||
two different threads, otherwise we may deadlock in
|
||||
@ -148,9 +151,7 @@ int ompi_mpi_finalize(void)
|
||||
opal_progress_event_users_increment();
|
||||
|
||||
/* check to see if we want timing information */
|
||||
if (ompi_enable_timing != 0 && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"Start barrier"));
|
||||
|
||||
/* NOTE: MPI-2.1 requires that MPI_FINALIZE is "collective" across
|
||||
*all* connected processes. This only means that all processes
|
||||
@ -231,13 +232,9 @@ int ompi_mpi_finalize(void)
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
time if so */
|
||||
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_finalize[%ld]: time to execute barrier %ld usec",
|
||||
(long)OMPI_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"Finish barrier"));
|
||||
OPAL_TIMING_REPORT(ompi_enable_timing, &tm, "MPI_Finish");
|
||||
OPAL_TIMING_RELEASE(&tm);
|
||||
|
||||
/*
|
||||
* Shutdown the Checkpoint/Restart Mech.
|
||||
|
@ -56,6 +56,7 @@
|
||||
#include "opal/mca/rcache/rcache.h"
|
||||
#include "opal/mca/mpool/base/base.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/mpi/fortran/base/constants.h"
|
||||
@ -386,8 +387,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
ompi_proc_t** procs;
|
||||
size_t nprocs;
|
||||
char *error = NULL;
|
||||
struct timeval ompistart, ompistop;
|
||||
char *cmd=NULL, *av=NULL;
|
||||
OPAL_TIMING_DECLARE(tm);
|
||||
OPAL_TIMING_INIT(&tm);
|
||||
|
||||
/* bitflag of the thread level support provided. To be used
|
||||
* for the modex in order to work in heterogeneous environments. */
|
||||
@ -448,9 +450,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
mca_base_var_set_value(ret, allvalue, 4, MCA_BASE_VAR_SOURCE_DEFAULT, NULL);
|
||||
}
|
||||
|
||||
if (ompi_enable_timing) {
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"Start"));
|
||||
|
||||
/* if we were not externally started, then we need to setup
|
||||
* some envars so the MPI_INFO_ENV can get the cmd name
|
||||
@ -484,14 +484,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
ompi_rte_initialized = true;
|
||||
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init [%ld]: time from start to completion of rte_init %ld usec",
|
||||
(long)OMPI_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"rte_init complete"));
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* if hwloc is available but didn't get setup for some
|
||||
@ -638,15 +631,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
}
|
||||
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from completion of rte_init to modex %ld usec",
|
||||
(long)OMPI_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
|
||||
OPAL_TIMING_EVENT((&tm,"Start modex"));
|
||||
|
||||
/* exchange connection info - this function may also act as a barrier
|
||||
* if data exchange is required. The modex occurs solely across procs
|
||||
* in our job, so no proc array is passed. If a barrier is required,
|
||||
@ -654,14 +640,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
*/
|
||||
OPAL_FENCE(NULL, 0, NULL, NULL);
|
||||
|
||||
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time to execute modex %ld usec",
|
||||
(long)OMPI_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"End modex"));
|
||||
|
||||
/* select buffered send allocator component to be used */
|
||||
if( OMPI_SUCCESS !=
|
||||
@ -819,14 +798,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
time if so, then start the clock again */
|
||||
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from modex to first barrier %ld usec",
|
||||
(long)OMPI_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"Start barrier"));
|
||||
|
||||
/* wait for everyone to reach this point - this is a hard
|
||||
* barrier requirement at this time, though we hope to relax
|
||||
@ -835,14 +807,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
|
||||
/* check for timing request - get stop time and report elapsed
|
||||
time if so, then start the clock again */
|
||||
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time to execute barrier %ld usec",
|
||||
(long)OMPI_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
gettimeofday(&ompistart, NULL);
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"End barrier"));
|
||||
|
||||
#if OPAL_ENABLE_PROGRESS_THREADS == 0
|
||||
/* Start setting up the event engine for MPI operations. Don't
|
||||
@ -989,13 +954,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
ompi_mpi_initialized = true;
|
||||
|
||||
/* check for timing request - get stop time and report elapsed time if so */
|
||||
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
|
||||
gettimeofday(&ompistop, NULL);
|
||||
opal_output(0, "ompi_mpi_init[%ld]: time from barrier to complete mpi_init %ld usec",
|
||||
(long)OMPI_PROC_MY_NAME->vpid,
|
||||
(long int)((ompistop.tv_sec - ompistart.tv_sec)*1000000 +
|
||||
(ompistop.tv_usec - ompistart.tv_usec)));
|
||||
}
|
||||
OPAL_TIMING_EVENT((&tm,"Finish"));
|
||||
OPAL_TIMING_REPORT(ompi_enable_timing, &tm,"MPI Init");
|
||||
OPAL_TIMING_RELEASE(&tm);
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
@ -21,8 +21,10 @@
|
||||
|
||||
SUBDIRS += \
|
||||
tools/ompi_info \
|
||||
tools/wrappers
|
||||
tools/wrappers \
|
||||
tools/mpisync
|
||||
|
||||
DIST_SUBDIRS += \
|
||||
tools/ompi_info \
|
||||
tools/wrappers
|
||||
tools/wrappers \
|
||||
tools/mpisync
|
||||
|
71
ompi/tools/mpisync/Makefile.am
Обычный файл
71
ompi/tools/mpisync/Makefile.am
Обычный файл
@ -0,0 +1,71 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2014 Artem Polyakov <artpol84@gmail.com>
|
||||
#
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
|
||||
AM_CFLAGS = \
|
||||
-DOPAL_CONFIGURE_USER="\"@OPAL_CONFIGURE_USER@\"" \
|
||||
-DOPAL_CONFIGURE_HOST="\"@OPAL_CONFIGURE_HOST@\"" \
|
||||
-DOPAL_CONFIGURE_DATE="\"@OPAL_CONFIGURE_DATE@\"" \
|
||||
-DOMPI_BUILD_USER="\"$$USER\"" \
|
||||
-DOMPI_BUILD_HOST="\"`hostname`\"" \
|
||||
-DOMPI_BUILD_DATE="\"`date`\"" \
|
||||
-DOMPI_BUILD_CFLAGS="\"@CFLAGS@\"" \
|
||||
-DOMPI_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \
|
||||
-DOMPI_BUILD_CXXFLAGS="\"@CXXFLAGS@\"" \
|
||||
-DOMPI_BUILD_CXXCPPFLAGS="\"@CXXCPPFLAGS@\"" \
|
||||
-DOMPI_BUILD_FFLAGS="\"@FFLAGS@\"" \
|
||||
-DOMPI_BUILD_FCFLAGS="\"@FCFLAGS@\"" \
|
||||
-DOMPI_BUILD_LDFLAGS="\"@LDFLAGS@\"" \
|
||||
-DOMPI_BUILD_LIBS="\"@LIBS@\"" \
|
||||
-DOPAL_CC_ABSOLUTE="\"@OPAL_CC_ABSOLUTE@\"" \
|
||||
-DOMPI_CXX_ABSOLUTE="\"@OMPI_CXX_ABSOLUTE@\""
|
||||
|
||||
#if (OPAL_INSTALL_BINARIES && OPAL_ENABLE_TIMING)
|
||||
|
||||
if OPAL_INSTALL_TIMING_BINARIES
|
||||
|
||||
bin_PROGRAMS = mpisync
|
||||
bin_SCRIPTS = mpirun_prof ompi_timing_post
|
||||
|
||||
endif
|
||||
|
||||
mpisync_SOURCES = \
|
||||
hpctimer.h \
|
||||
hpctimer.c \
|
||||
mpigclock.h \
|
||||
mpigclock.c \
|
||||
sync.c
|
||||
|
||||
mpisync_LDADD = $(top_builddir)/ompi/libmpi.la
|
||||
if OMPI_RTE_ORTE
|
||||
mpisync_LDADD += $(top_builddir)/orte/libopen-rte.la
|
||||
endif
|
||||
mpisync_LDADD += $(top_builddir)/opal/libopen-pal.la
|
||||
|
||||
clean-local:
|
||||
test -z "$(OMPI_CXX_TEMPLATE_REPOSITORY)" || rm -rf $(OMPI_CXX_TEMPLATE_REPOSITORY)
|
||||
|
||||
distclean-local:
|
||||
rm -f $(man_pages)
|
332
ompi/tools/mpisync/hpctimer.c
Обычный файл
332
ompi/tools/mpisync/hpctimer.c
Обычный файл
@ -0,0 +1,332 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2011, Siberian State University of Telecommunications
|
||||
* and Information Sciences. All rights reserved.
|
||||
* Copyright (c) 2010-2011, A.V. Rzhanov Institute of Semiconductor Physics SB RAS.
|
||||
* All rights reserved.
|
||||
*
|
||||
* hpctimer.c: High-Resolution timers library.
|
||||
*
|
||||
* Copyright (C) 2011 Mikhail Kurnosov <mkurnosov@gmail.com>
|
||||
*
|
||||
* This source code is part of MPIPerf project: http://mpiperf.cpct.sibsutis.ru/index.php/Main/Documentation
|
||||
*/
|
||||
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
#include <math.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include "hpctimer.h"
|
||||
|
||||
#define NELEMS(v) (sizeof(v) / sizeof((v)[0]))
|
||||
|
||||
/*
|
||||
* Compilers macro:
|
||||
* __GNUC__ - GCC
|
||||
* __SUNPRO_C - Solaris Studio
|
||||
* __INTEL_COMPILER - Intel C++ Compiler
|
||||
* __xlC__ || __IBMC__ - IBM C Compiler
|
||||
* __PATHSCALE__ - PathScale Compiler
|
||||
* __PGI - PGI Compiler
|
||||
* __DECC - DEC Compiler
|
||||
* __HP_cc - HP Compiler
|
||||
* __SX - NEC SX Compiler
|
||||
* __COMO__ - Comeau C++
|
||||
* _CRAYC - Cray C Compiler
|
||||
* sgi || __sgi - SGI Compiler
|
||||
*/
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define __inline__ __inline__
|
||||
# define __asm__ __asm__
|
||||
# define __volatile__ __volatile__
|
||||
#elif defined(__SUNPRO_C)
|
||||
# define __inline__ __inline__
|
||||
# define __asm__ __asm__
|
||||
# define __volatile__ __volatile__
|
||||
#endif
|
||||
|
||||
typedef int (*hpctimer_initialize_func_ptr_t)(void);
|
||||
typedef void (*hpctimer_finalize_func_ptr_t)(void);
|
||||
typedef int (*hpctimer_isimplemented_func_ptr_t)(void);
|
||||
typedef double (*hpctimer_wtime_func_ptr_t)(void);
|
||||
|
||||
typedef struct hpctimer {
|
||||
char *name;
|
||||
hpctimer_initialize_func_ptr_t initialize;
|
||||
hpctimer_finalize_func_ptr_t finalize;
|
||||
hpctimer_isimplemented_func_ptr_t isimplemented;
|
||||
hpctimer_wtime_func_ptr_t wtime;
|
||||
} hpctimer_t;
|
||||
|
||||
static uint64_t hpctimer_overhead; /* Timer overhead (seconds) */
|
||||
static uint64_t hpctimer_freq; /* Timer frequency (ticks per usec) */
|
||||
|
||||
static double hpctimer_wtime_tsc(void);
|
||||
static int hpctimer_tsc_initialize(void);
|
||||
static __inline__ uint64_t hpctimer_gettsc(void);
|
||||
static uint64_t hpctimer_measure_overhead(void);
|
||||
static uint64_t hpctimer_calibrate_sleep(uint64_t overhead);
|
||||
static double hpctimer_wtime_gettimeofday(void);
|
||||
|
||||
/*
|
||||
* Timers
|
||||
*/
|
||||
static hpctimer_t hpctimer_timers[] = {
|
||||
{"MPI_Wtime", NULL, NULL, NULL, MPI_Wtime},
|
||||
{"gettimeofday", NULL, NULL, NULL, hpctimer_wtime_gettimeofday},
|
||||
{"tsc", hpctimer_tsc_initialize, NULL, NULL, hpctimer_wtime_tsc}
|
||||
};
|
||||
|
||||
static hpctimer_wtime_func_ptr_t hpctimer_wtime_func_ptr = NULL;
|
||||
static int hpctimer_timer = -1;
|
||||
|
||||
/* hpctimer_initialize: */
|
||||
int hpctimer_initialize(const char *timername)
|
||||
{
|
||||
hpctimer_wtime_func_ptr = NULL;
|
||||
hpctimer_timer = -1;
|
||||
unsigned int i;
|
||||
for (i = 0; i < NELEMS(hpctimer_timers); i++) {
|
||||
if (hpctimer_timers[i].isimplemented != NULL) {
|
||||
if (!hpctimer_timers[i].isimplemented()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (strcasecmp(timername, hpctimer_timers[i].name) == 0) {
|
||||
hpctimer_wtime_func_ptr = hpctimer_timers[i].wtime;
|
||||
hpctimer_timer = i;
|
||||
if (hpctimer_timers[i].initialize) {
|
||||
return hpctimer_timers[i].initialize();
|
||||
}
|
||||
return HPCTIMER_SUCCESS;
|
||||
}
|
||||
}
|
||||
return HPCTIMER_FAILURE;
|
||||
}
|
||||
|
||||
/* hpctimer_finalize: */
|
||||
void hpctimer_finalize(void)
|
||||
{
|
||||
if (hpctimer_timers[hpctimer_timer].finalize) {
|
||||
hpctimer_timers[hpctimer_timer].finalize();
|
||||
}
|
||||
hpctimer_wtime_func_ptr = NULL;
|
||||
}
|
||||
|
||||
/* hpctimer_print_timers: */
|
||||
void hpctimer_print_timers(void)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
printf("Supported timers:\n");
|
||||
for (i = 0; i < NELEMS(hpctimer_timers); i++) {
|
||||
if (hpctimer_timers[i].isimplemented != NULL) {
|
||||
if (!hpctimer_timers[i].isimplemented()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
printf(" %s\n", hpctimer_timers[i].name);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* hpctimer_sanity_check: Returns 1 if the results of measures
|
||||
* by timer are correct.
|
||||
*/
|
||||
int hpctimer_sanity_check(void)
|
||||
{
|
||||
enum { NTESTS = 4 };
|
||||
double start, stop, currtime, prevtime = 0.0, err = 0.05;
|
||||
int sanity = 1;
|
||||
|
||||
int delay = 0;
|
||||
for (delay = 1; delay < NTESTS; delay++) {
|
||||
start = hpctimer_wtime();
|
||||
sleep(delay);
|
||||
stop = hpctimer_wtime();
|
||||
currtime = stop - start;
|
||||
if (delay > 1) {
|
||||
if (fabs(prevtime - currtime / delay) > prevtime * err) {
|
||||
sanity = 0;
|
||||
}
|
||||
/*
|
||||
printf("# timer sleep %d sec.; timer result: %.6f; diff: %.6f\n",
|
||||
delay - 1, currtime / delay, fabs(prevtime - currtime / delay));
|
||||
*/
|
||||
}
|
||||
prevtime = currtime / delay;
|
||||
}
|
||||
return sanity;
|
||||
}
|
||||
|
||||
/* hpctimer_wtime: Returns walltime in seconds. */
|
||||
double hpctimer_wtime(void)
|
||||
{
|
||||
return hpctimer_wtime_func_ptr();
|
||||
}
|
||||
|
||||
/* hpctimer_wtime_gettimeofday: */
|
||||
static double hpctimer_wtime_gettimeofday(void)
|
||||
{
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (double)tv.tv_sec + 1E-6 * tv.tv_usec;
|
||||
}
|
||||
|
||||
/*
|
||||
* hpctimer_wtime_tsc: Returns TSC-based walltime in seconds.
|
||||
*/
|
||||
static double hpctimer_wtime_tsc(void)
|
||||
{
|
||||
return (double)(hpctimer_gettsc() - hpctimer_overhead) / (double)hpctimer_freq;
|
||||
}
|
||||
|
||||
/*
|
||||
* hpctimer_tsc_initialize: Initializes TSC-based timer.
|
||||
*
|
||||
* The code is based on recommendations from manual of Intel Corp.
|
||||
* "Using the RDTSC Instruction for Performance Monitoring".
|
||||
*/
|
||||
static int hpctimer_tsc_initialize(void)
|
||||
{
|
||||
hpctimer_overhead = hpctimer_measure_overhead();
|
||||
hpctimer_freq = hpctimer_calibrate_sleep(hpctimer_overhead);
|
||||
return HPCTIMER_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* hpctimer_gettsc: Returns TSC value.
|
||||
*/
|
||||
static __inline__ uint64_t hpctimer_gettsc(void)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
uint32_t low, high;
|
||||
__asm__ __volatile__(
|
||||
"xorl %%eax, %%eax\n"
|
||||
"cpuid\n"
|
||||
::: "%rax", "%rbx", "%rcx", "%rdx"
|
||||
);
|
||||
__asm__ __volatile__(
|
||||
"rdtsc\n"
|
||||
: "=a" (low), "=d" (high)
|
||||
);
|
||||
return ((uint64_t)high << 32) | low;
|
||||
|
||||
#elif defined(__i386__)
|
||||
uint64_t tsc;
|
||||
__asm__ __volatile__(
|
||||
"xorl %%eax, %%eax\n"
|
||||
"cpuid\n"
|
||||
::: "%eax", "%ebx", "%ecx", "%edx"
|
||||
);
|
||||
__asm__ __volatile__(
|
||||
"rdtsc\n"
|
||||
: "=A" (tsc)
|
||||
);
|
||||
return tsc;
|
||||
#else
|
||||
# error "Unsupported platform"
|
||||
#endif
|
||||
}
|
||||
|
||||
/* hpctimer_measure_overhead: Returns overhead of TSC reading (in tics). */
|
||||
static uint64_t hpctimer_measure_overhead(void)
|
||||
{
|
||||
enum {
|
||||
TSC_OVERHEAD_NTESTS = 10
|
||||
};
|
||||
int i;
|
||||
uint64_t count, overhead = (uint64_t)~0x01;
|
||||
|
||||
/* Make warm-up passes and determine timer overhead */
|
||||
for (i = 0; i < TSC_OVERHEAD_NTESTS; i++) {
|
||||
count = hpctimer_gettsc();
|
||||
count = hpctimer_gettsc() - count;
|
||||
if (count < overhead) {
|
||||
overhead = count;
|
||||
}
|
||||
}
|
||||
return overhead;
|
||||
}
|
||||
|
||||
/*
|
||||
* hpctimer_calibrate_adaptive: Returns number of TSC tics per second.
|
||||
* Adaptive algorithm based on sleep.
|
||||
*/
|
||||
/*
|
||||
static uint64_t hpctimer_calibrate_adaptive(uint64_t overhead)
|
||||
{
|
||||
enum {
|
||||
TSC_CALIBRATE_NTESTS = 2
|
||||
};
|
||||
int i;
|
||||
uint64_t count, freq;
|
||||
|
||||
freq = (uint64_t)(~0x01);
|
||||
for (i = 0; i < TSC_CALIBRATE_NTESTS; i++) {
|
||||
count = hpctimer_gettsc();
|
||||
sleep(1);
|
||||
count = hpctimer_gettsc() - count - overhead;
|
||||
if (count < 0)
|
||||
count = 0;
|
||||
if (count < freq) {
|
||||
freq = count;
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
return freq;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
* hpctimer_calibrate_sleep: Returns number of TSC tics per second.
|
||||
*/
|
||||
static uint64_t hpctimer_calibrate_sleep(uint64_t overhead)
|
||||
{
|
||||
uint64_t count;
|
||||
int delay = 3;
|
||||
|
||||
count = hpctimer_gettsc();
|
||||
sleep(delay);
|
||||
count = hpctimer_gettsc() - count - overhead;
|
||||
return count / delay;
|
||||
}
|
||||
|
||||
/*
|
||||
* hpctimer_calibrate_loop: Returns number of TSC tics per second.
|
||||
*/
|
||||
/*
|
||||
static uint64_t hpctimer_calibrate_loop(uint64_t overhead)
|
||||
{
|
||||
enum {
|
||||
TSC_CALIBRATE_NTESTS = 2
|
||||
};
|
||||
uint64_t count, countmin = (uint64_t)~0x01;
|
||||
struct timeval tv1, tv2;
|
||||
int i, j;
|
||||
__volatile__ int dummy = 0;
|
||||
|
||||
for (i = 0; i < TSC_CALIBRATE_NTESTS; i++) {
|
||||
gettimeofday(&tv1, NULL);
|
||||
count = hpctimer_gettsc();
|
||||
for (j = 0; j < 10000000; j++) {
|
||||
dummy++;
|
||||
}
|
||||
count = hpctimer_gettsc() - count - overhead;
|
||||
gettimeofday(&tv2, NULL);
|
||||
if (count < 0)
|
||||
count = 0;
|
||||
if (count < countmin)
|
||||
countmin = count;
|
||||
}
|
||||
return countmin * 1000000 / (tv2.tv_sec * 1000000 + tv2.tv_usec -
|
||||
tv1.tv_sec * 1000000 - tv1.tv_usec);
|
||||
}
|
||||
*/
|
36
ompi/tools/mpisync/hpctimer.h
Обычный файл
36
ompi/tools/mpisync/hpctimer.h
Обычный файл
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2011, Siberian State University of Telecommunications
|
||||
* and Information Sciences. All rights reserved.
|
||||
* Copyright (c) 2010-2011, A.V. Rzhanov Institute of Semiconductor Physics SB RAS.
|
||||
* All rights reserved.
|
||||
*
|
||||
* hpctimer.h: High-Resolution timers library.
|
||||
*
|
||||
* Copyright (C) 2011 Mikhail Kurnosov <mkurnosov@gmail.com>
|
||||
*
|
||||
* This source code is part of MPIPerf project: http://mpiperf.cpct.sibsutis.ru/index.php/Main/Documentation
|
||||
*/
|
||||
|
||||
#ifndef HPCTIMER_H
|
||||
#define HPCTIMER_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
HPCTIMER_SUCCESS = 0,
|
||||
HPCTIMER_FAILURE = 1
|
||||
};
|
||||
|
||||
int hpctimer_initialize(const char *timername);
|
||||
void hpctimer_finalize(void);
|
||||
double hpctimer_wtime(void);
|
||||
int hpctimer_sanity_check(void);
|
||||
void hpctimer_print_timers(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* HPCTIMER_H */
|
104
ompi/tools/mpisync/mpigclock.c
Обычный файл
104
ompi/tools/mpisync/mpigclock.c
Обычный файл
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2011, Siberian State University of Telecommunications
|
||||
* and Information Sciences. All rights reserved.
|
||||
* Copyright (c) 2010-2011, A.V. Rzhanov Institute of Semiconductor Physics SB RAS.
|
||||
* All rights reserved.
|
||||
*
|
||||
* mpigclock.c: MPI clock synchronization.
|
||||
*
|
||||
* Copyright (C) 2011 Mikhail Kurnosov <mkurnosov@gmail.com>
|
||||
*
|
||||
* This source code is part of MPIPerf project: http://mpiperf.cpct.sibsutis.ru/index.php/Main/Documentation
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#include "mpigclock.h"
|
||||
#include "hpctimer.h"
|
||||
|
||||
#define INVALIDTIME -1.0
|
||||
#define MPIGCLOCK_RTTMIN_NOTCHANGED_MAX 100
|
||||
#define MPIGCLOCK_MSGTAG 128
|
||||
|
||||
static double mpigclock_measure_offset_adaptive(MPI_Comm comm, int root, int peer, double *min_rtt);
|
||||
|
||||
|
||||
/*
|
||||
* mpigclock_sync_linear: Clock synchronization algorithm with O(n) steps.
|
||||
*/
|
||||
double mpigclock_sync_linear(MPI_Comm comm, int root, double *rtt)
|
||||
{
|
||||
int i, rank, commsize;
|
||||
double ret = 0;
|
||||
|
||||
MPI_Comm_rank(comm, &rank);
|
||||
MPI_Comm_size(comm, &commsize);
|
||||
|
||||
if (commsize < 2) {
|
||||
*rtt = 0.0;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
for (i = 1; i < commsize; i++) {
|
||||
MPI_Barrier(comm);
|
||||
if (rank == root || rank == i) {
|
||||
ret = mpigclock_measure_offset_adaptive(comm, root, i, rtt);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* mpigclock_measure_offset_adaptive: Measures clock's offset of peer. */
|
||||
static double mpigclock_measure_offset_adaptive(MPI_Comm comm, int root, int peer, double *min_rtt)
|
||||
{
|
||||
int rank, commsize, rttmin_notchanged = 0;
|
||||
double starttime, stoptime, peertime, rtt, rttmin = 1E12,
|
||||
invalidtime = INVALIDTIME, offset;
|
||||
|
||||
MPI_Comm_rank(comm, &rank);
|
||||
MPI_Comm_size(comm, &commsize);
|
||||
|
||||
offset = 0.0;
|
||||
for (;;) {
|
||||
if (rank != root) {
|
||||
/* Peer process */
|
||||
starttime = hpctimer_wtime();
|
||||
MPI_Send(&starttime, 1, MPI_DOUBLE, root, MPIGCLOCK_MSGTAG, comm);
|
||||
MPI_Recv(&peertime, 1, MPI_DOUBLE, root, MPIGCLOCK_MSGTAG, comm,
|
||||
MPI_STATUS_IGNORE);
|
||||
stoptime = hpctimer_wtime();
|
||||
rtt = stoptime - starttime;
|
||||
|
||||
if (rtt < rttmin) {
|
||||
rttmin = rtt;
|
||||
rttmin_notchanged = 0;
|
||||
offset = peertime - rtt / 2.0 - starttime;
|
||||
} else {
|
||||
if (++rttmin_notchanged == MPIGCLOCK_RTTMIN_NOTCHANGED_MAX) {
|
||||
MPI_Send(&invalidtime, 1, MPI_DOUBLE, root, MPIGCLOCK_MSGTAG,
|
||||
comm);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Root process */
|
||||
MPI_Recv(&starttime, 1, MPI_DOUBLE, peer, MPIGCLOCK_MSGTAG, comm,
|
||||
MPI_STATUS_IGNORE);
|
||||
peertime = hpctimer_wtime();
|
||||
if (starttime < 0.0) {
|
||||
break;
|
||||
}
|
||||
MPI_Send(&peertime, 1, MPI_DOUBLE, peer, MPIGCLOCK_MSGTAG, comm);
|
||||
}
|
||||
} /* for */
|
||||
|
||||
if( rank != root ){
|
||||
*min_rtt = rttmin;
|
||||
} else {
|
||||
rtt = 0.0;
|
||||
}
|
||||
return offset;
|
||||
}
|
14
ompi/tools/mpisync/mpigclock.h
Обычный файл
14
ompi/tools/mpisync/mpigclock.h
Обычный файл
@ -0,0 +1,14 @@
|
||||
#ifndef MPIGCLOCK_H
|
||||
#define MPIGCLOCK_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "hpctimer.h"
|
||||
|
||||
#define INVALIDTIME -1.0
|
||||
#define MPIGCLOCK_RTTMIN_NOTCHANGED_MAX 100
|
||||
#define MPIGCLOCK_MSGTAG 128
|
||||
|
||||
/* mpigclock_measure_offset_adaptive: Measures clock's offset of peer. */
|
||||
double mpigclock_sync_linear(MPI_Comm comm, int root, double *rtt);
|
||||
|
||||
#endif
|
25
ompi/tools/mpisync/mpirun_prof
Исполняемый файл
25
ompi/tools/mpisync/mpirun_prof
Исполняемый файл
@ -0,0 +1,25 @@
|
||||
#!/bin/sh
|
||||
|
||||
ompi_instdir=`dirname $0`
|
||||
syncfile="ompi_clock_sync_data.$$"
|
||||
tmp_timings=mpirun_prof_timings.out
|
||||
tmp_out=mpirun_prof.out
|
||||
|
||||
timing_bkp=$OMPI_MCA_opal_timing_file
|
||||
export OMPI_MCA_opal_timing_file=$tmp_timings
|
||||
${ompi_instdir}/mpirun --npernode 1 ${ompi_instdir}/mpisync -o $syncfile >$tmp_out 2>&1
|
||||
|
||||
export OMPI_MCA_opal_timing_file=$timing_bkp
|
||||
export OMPI_MCA_opal_clksync_file=$syncfile
|
||||
|
||||
# Remove old output
|
||||
rm -f $OMPI_MCA_opal_timing_file
|
||||
|
||||
# Run a program of interest
|
||||
${ompi_instdir}/mpirun $@
|
||||
|
||||
# Cleanup
|
||||
rm -f $syncfile
|
||||
rm -f $tmp_timings
|
||||
rm -f $tmp_out
|
||||
|
46
ompi/tools/mpisync/ompi_timing_post
Исполняемый файл
46
ompi/tools/mpisync/ompi_timing_post
Исполняемый файл
@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
|
||||
#####################################################################
|
||||
# Evaluate a floating point number expression.
|
||||
|
||||
function float_eval(){
|
||||
float_scale=9
|
||||
local stat=0
|
||||
local result=0.0
|
||||
if [[ $# -gt 0 ]]; then
|
||||
result=$(echo "scale=$float_scale; $*" | bc -q 2>/dev/null)
|
||||
stat=$?
|
||||
if [[ $stat -eq 0 && -z "$result" ]]; then
|
||||
stat=1;
|
||||
fi
|
||||
fi
|
||||
echo $result
|
||||
return $stat
|
||||
}
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Need the name of a timing file"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
thefile=$1
|
||||
sed '/^$/d' $thefile > ${thefile}_tmp
|
||||
sort ${thefile}_tmp > ${thefile}
|
||||
|
||||
read line < ${thefile}
|
||||
first_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'`
|
||||
prev_ts=$first_ts
|
||||
echo $first_ts
|
||||
|
||||
while read line ; do
|
||||
cur_ts=`echo $line | awk '{ print $1 }' | sed -e 's/s//'`
|
||||
dif1=`float_eval "$cur_ts - $first_ts"`
|
||||
dif2=`float_eval "$cur_ts - $prev_ts"`
|
||||
newline=`echo $line | sed -e "s/$cur_ts/$dif1:$dif2/"`
|
||||
prev_ts=$cur_ts
|
||||
echo $newline
|
||||
done < ${thefile} > ${thefile}_tmp
|
||||
|
||||
cat ${thefile}_tmp > ${thefile}
|
||||
|
||||
rm -f ${thefile}_tmp
|
133
ompi/tools/mpisync/sync.c
Обычный файл
133
ompi/tools/mpisync/sync.c
Обычный файл
@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Copyright (C) 2014 Artem Polyakov <artpol84@gmail.com>
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <mpi.h>
|
||||
#include <unistd.h>
|
||||
#include <getopt.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "hpctimer.h"
|
||||
#include "mpigclock.h"
|
||||
|
||||
typedef enum { Gen, Chk } prog_mode_t;
|
||||
|
||||
char *filename = NULL;
|
||||
prog_mode_t mode = Gen;
|
||||
double orig_rtt = 0.0, orig_offs = 0.0;
|
||||
|
||||
void print_help(char *progname);
|
||||
int parse_opts(int rank, int argc, char **argv);
|
||||
|
||||
void print_help(char *progname)
|
||||
{
|
||||
printf("%s: ./%s -o <output file>\n", progname, progname);
|
||||
}
|
||||
|
||||
int parse_opts(int rank, int argc, char **argv)
|
||||
{
|
||||
while (1) {
|
||||
int option_index = 0;
|
||||
static struct option long_options[] = {
|
||||
{"output", required_argument, 0, 'o' },
|
||||
{"help", required_argument, 0, 'h' },
|
||||
{ 0, 0, 0, 0 } };
|
||||
|
||||
int c = getopt_long(argc, argv, "o:h",
|
||||
long_options, &option_index);
|
||||
if (c == -1)
|
||||
break;
|
||||
switch (c) {
|
||||
case 'h':
|
||||
if( rank == 0 )
|
||||
print_help(argv[0]);
|
||||
return 1;
|
||||
case 'o':
|
||||
filename = strdup(optarg);
|
||||
if( filename == NULL ){
|
||||
perror("Cannot allocate memory");
|
||||
return -1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
MPI_Init(&argc, &argv);
|
||||
MPI_Comm comm = MPI_COMM_WORLD;
|
||||
int rank, commsize;
|
||||
double offs, rtt;
|
||||
char hname[1024];
|
||||
|
||||
MPI_Comm_rank(comm, &rank);
|
||||
MPI_Comm_size(comm, &commsize);
|
||||
|
||||
int ret = parse_opts(rank, argc, argv);
|
||||
|
||||
if( ret < 0 ){
|
||||
// Error exit
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}else if( ret > 0 ){
|
||||
// Normal exit after help printout
|
||||
MPI_Finalize();
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if( filename == NULL ){
|
||||
if( rank == 0 ){
|
||||
fprintf(stderr, "The name of output file wasn't specified. Abort\n");
|
||||
}
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
if( gethostname(hname, 1024) ){
|
||||
perror("Cannot get hostname");
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Clear output file if it exists
|
||||
if( rank == 0 ){
|
||||
FILE *fp = fopen(filename, "w");
|
||||
if( fp == NULL ){
|
||||
fprintf(stderr,"Cannot open output file %s for writing. Abort: %s\n",
|
||||
filename, strerror(errno));
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
int rc = hpctimer_initialize("gettimeofday");
|
||||
|
||||
if( rc == HPCTIMER_FAILURE ){
|
||||
fprintf(stderr, "Fail to initialize hpc timer. Abort\n");
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
offs = mpigclock_sync_linear(comm, 0, &rtt);
|
||||
|
||||
FILE *fp = fopen(filename,"a");
|
||||
if( fp == NULL ){
|
||||
fprintf(stderr, "Cannot open %s for appending. Abort\n", filename);
|
||||
MPI_Finalize();
|
||||
exit(1);
|
||||
}
|
||||
fprintf(fp, "%s %lf %lf\n", hname, rtt, offs);
|
||||
fclose(fp);
|
||||
|
||||
MPI_Finalize();
|
||||
return 0;
|
||||
}
|
@ -42,12 +42,19 @@
|
||||
#include "opal/runtime/opal_params.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
char *opal_signal_string = NULL;
|
||||
char *opal_net_private_ipv4 = NULL;
|
||||
char *opal_set_max_sys_limits = NULL;
|
||||
int opal_pmi_version = 0;
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
char *opal_clksync_file = NULL;
|
||||
char *opal_timing_file = NULL;
|
||||
bool opal_timing_account_overhead = true;
|
||||
#endif
|
||||
|
||||
bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
|
||||
bool opal_cuda_support;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
@ -235,6 +242,30 @@ int opal_register_params(void)
|
||||
opal_pmi_version = 1;
|
||||
#endif
|
||||
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "clksync_file",
|
||||
"Mapping of clock offsets from HNP node",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&opal_clksync_file);
|
||||
if( opal_timing_clksync_read(opal_clksync_file) ){
|
||||
opal_output(0, "Cannot read file %s containing clock synchronisation information\n", opal_clksync_file);
|
||||
}
|
||||
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "timing_file",
|
||||
"OPAL Timing framework output file",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&opal_timing_file);
|
||||
|
||||
(void) mca_base_var_register ("opal", "opal", NULL, "timing_overhead",
|
||||
"Whether account measured timing overhead or not (default: true)",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
|
||||
&opal_timing_account_overhead);
|
||||
#endif
|
||||
|
||||
opal_warn_on_fork = true;
|
||||
(void) mca_base_var_register("ompi", "mpi", NULL, "warn_on_fork",
|
||||
"If nonzero, issue a warning if program forks under conditions that could cause system errors",
|
||||
|
@ -31,6 +31,12 @@ extern char *opal_net_private_ipv4;
|
||||
extern char *opal_set_max_sys_limits;
|
||||
extern int opal_pmi_version;
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
extern char *opal_clksync_file;
|
||||
extern char *opal_timing_file;
|
||||
extern bool opal_timing_account_overhead;
|
||||
#endif
|
||||
|
||||
OPAL_DECLSPEC extern int opal_initialized;
|
||||
OPAL_DECLSPEC extern bool opal_built_with_cuda_support;
|
||||
/**
|
||||
|
@ -67,7 +67,8 @@ headers = \
|
||||
stacktrace.h \
|
||||
strncpy.h \
|
||||
sys_limits.h \
|
||||
uri.h
|
||||
timings.h \
|
||||
uri.h
|
||||
|
||||
libopalutil_la_SOURCES = \
|
||||
$(headers) \
|
||||
@ -102,7 +103,8 @@ libopalutil_la_SOURCES = \
|
||||
stacktrace.c \
|
||||
strncpy.c \
|
||||
sys_limits.c \
|
||||
uri.c
|
||||
timings.c \
|
||||
uri.c
|
||||
|
||||
libopalutil_la_LIBADD = \
|
||||
keyval/libopalutilkeyval.la
|
||||
|
340
opal/util/timings.c
Обычный файл
340
opal/util/timings.c
Обычный файл
@ -0,0 +1,340 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <stdlib.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
// TODO : restore ifdefs
|
||||
//#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
//#endif
|
||||
|
||||
#include <errno.h>
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_RESOURCE_H
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#include "opal/constants.h"
|
||||
#include "opal/runtime/opal_params.h"
|
||||
|
||||
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/timings.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
|
||||
|
||||
static void debug_hang(int i)
|
||||
{
|
||||
while( i ){
|
||||
sleep(1);
|
||||
}
|
||||
}
|
||||
|
||||
double opal_timing_get_ts(void);
|
||||
opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t);
|
||||
void opal_timing_init(opal_timing_t *t);
|
||||
opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...);
|
||||
|
||||
void opal_timing_release(opal_timing_t *t);
|
||||
|
||||
static OBJ_CLASS_INSTANCE(opal_timing_event_t, opal_list_item_t, NULL, NULL);
|
||||
|
||||
|
||||
opal_mutex_t tm_lock;
|
||||
static char *nodename = NULL;
|
||||
static char *jobid = "";
|
||||
static double hnp_offs = 0;
|
||||
// TODO use RTT to estimate precise of measurement
|
||||
static double hnp_rtt = 0;
|
||||
|
||||
int opal_timing_clksync_read(char *fname)
|
||||
{
|
||||
int rc = 0;
|
||||
FILE *fp = NULL;
|
||||
char *line = NULL;
|
||||
size_t n;
|
||||
bool found = false;
|
||||
char *ptr = NULL;
|
||||
|
||||
char hname[1024];
|
||||
if( gethostname(hname, 1024) ){
|
||||
rc = -1;
|
||||
opal_output(0, "opal_timing_clksync_read(%s): Cannot gethostname\n",fname);
|
||||
return -1;
|
||||
}
|
||||
nodename = strdup(hname);
|
||||
ptr = strchr(nodename,'.');
|
||||
if( ptr != NULL ){
|
||||
*ptr = '\0';
|
||||
}
|
||||
|
||||
if( fname == NULL ){
|
||||
return 0;
|
||||
}
|
||||
|
||||
fp = fopen(fname,"r");
|
||||
if( fp == NULL ){
|
||||
opal_output(0, "opal_timing_clksync_read(%s): Cannot open the file\n",fname);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while( getline(&line,&n,fp) > 0 ){
|
||||
ptr = strchr(line,' ');
|
||||
if( ptr == NULL ){
|
||||
rc = -1;
|
||||
goto err_exit;
|
||||
}
|
||||
*ptr = '\0';
|
||||
ptr++;
|
||||
if( strcmp(line, hname) == 0 ){
|
||||
if( sscanf(ptr,"%lf %lf", &hnp_rtt, &hnp_offs) != 2 ){
|
||||
rc = -1;
|
||||
goto err_exit;
|
||||
}
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( !found ){
|
||||
opal_output(0,"opal_timing_clksync_read: Can't find my host %s in %s\n", hname, fname);
|
||||
rc = -1;
|
||||
}
|
||||
|
||||
err_exit:
|
||||
|
||||
if( line != NULL ){
|
||||
free(line);
|
||||
}
|
||||
|
||||
if( fp != NULL ){
|
||||
fclose(fp);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
int opal_timing_set_jobid(char *jid)
|
||||
{
|
||||
jobid = strdup(jid);
|
||||
if( jobid == NULL ){
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get current timestamp */
|
||||
double opal_timing_get_ts(void){
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv,NULL);
|
||||
double ret = tv.tv_sec + tv.tv_usec*1E-6;
|
||||
return ret;
|
||||
}
|
||||
|
||||
opal_timing_event_t *opal_timing_event_alloc(opal_timing_t *t)
|
||||
{
|
||||
if( t->buffer_offset >= t->buffer_size ){
|
||||
// notch timings overhead
|
||||
double alloc_begin = opal_timing_get_ts();
|
||||
|
||||
t->buffer = malloc(sizeof(opal_timing_event_t)*t->buffer_size);
|
||||
if( t->buffer == NULL ){
|
||||
// TODO: out of memory error process
|
||||
}
|
||||
memset(t->buffer, 0, sizeof(opal_timing_event_t)*t->buffer_size);
|
||||
|
||||
double alloc_end = opal_timing_get_ts();
|
||||
|
||||
t->buffer_offset = 0;
|
||||
t->buffer[0].fib = 1;
|
||||
t->buffer[0].ts_ovh = alloc_end - alloc_begin;
|
||||
}
|
||||
int tmp = t->buffer_offset;
|
||||
(t->buffer_offset)++;
|
||||
return t->buffer + tmp;
|
||||
}
|
||||
|
||||
void opal_timing_init(opal_timing_t *t)
|
||||
{
|
||||
memset(t,0,sizeof(*t));
|
||||
|
||||
t->cur_id = 0;
|
||||
// initialize events list
|
||||
t->events = OBJ_NEW(opal_list_t);
|
||||
// Set buffer size
|
||||
t->buffer_size = OPAL_TIMING_BUFSIZE;
|
||||
// Set buffer_offset = buffer_size so new buffer
|
||||
// will be allocated at first event report
|
||||
t->buffer_offset = t->buffer_size;
|
||||
|
||||
OPAL_TIMING_EVENT((t,"%p: Created, events = %p, buffer: ptr = %p, offs = %d", t, t->events, t->buffer, t->buffer_size));
|
||||
}
|
||||
|
||||
opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...)
|
||||
{
|
||||
opal_timing_event_t *ev = opal_timing_event_alloc(t);
|
||||
OBJ_CONSTRUCT(ev, opal_timing_event_t);
|
||||
ev->ts = opal_timing_get_ts();
|
||||
va_list args;
|
||||
va_start( args, fmt );
|
||||
vsnprintf(ev->descr, OPAL_TIMING_DESCR_MAX - 1, fmt, args);
|
||||
ev->descr[OPAL_TIMING_DESCR_MAX-1] = '\0';
|
||||
va_end( args );
|
||||
opal_timing_prep_t p = { t, ev };
|
||||
return p;
|
||||
}
|
||||
|
||||
void opal_timing_add_step(opal_timing_prep_t p,
|
||||
const char *func, const char *file, int line)
|
||||
{
|
||||
p.ev->func = func;
|
||||
p.ev->file = file;
|
||||
p.ev->line = line;
|
||||
p.ev->type = TEVENT;
|
||||
opal_list_append(p.t->events, (opal_list_item_t*)p.ev);
|
||||
}
|
||||
|
||||
int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname)
|
||||
{
|
||||
opal_timing_event_t *ev;
|
||||
int count = 0;
|
||||
FILE *fp = NULL;
|
||||
char *buf = NULL;
|
||||
int buf_size = 0;
|
||||
int rc = 0;
|
||||
|
||||
debug_hang(0);
|
||||
|
||||
if( fname != NULL ){
|
||||
fp = fopen(fname,"a");
|
||||
if( fp == NULL ){
|
||||
// TODO: log error
|
||||
rc = OPAL_ERROR;
|
||||
goto err_exit;
|
||||
}
|
||||
prefix=NULL;
|
||||
}
|
||||
|
||||
buf = malloc(OPAL_TIMING_OUTBUF_SIZE+1);
|
||||
if( buf == NULL ){
|
||||
// TODO: log error
|
||||
rc = OPAL_ERROR;
|
||||
goto err_exit;
|
||||
}
|
||||
buf[0] = '\0';
|
||||
|
||||
double overhead = 0;
|
||||
OPAL_LIST_FOREACH(ev, t->events, opal_timing_event_t){
|
||||
count++;
|
||||
if( ev->fib && account_overhead ){
|
||||
overhead += ev->ts_ovh;
|
||||
}
|
||||
|
||||
if( count > 1){
|
||||
char *line;
|
||||
const char *file_name = ev->file;
|
||||
const char *ptr = file_name;
|
||||
for( ; *ptr != '\0' ; ptr++ ){
|
||||
if( *ptr == '/'){
|
||||
file_name = ptr+1;
|
||||
}
|
||||
}
|
||||
if( prefix != NULL ){
|
||||
rc = asprintf(&line,"%s:\t%lfs\t\"%s\"\t|\t%s\t%s\t%s\t%s:%d\n",
|
||||
prefix,ev->ts + hnp_offs + overhead,
|
||||
ev->descr, nodename, jobid, ev->func, file_name, ev->line);
|
||||
} else {
|
||||
rc = asprintf(&line,"%lfs\t\"%s\"\t|\t%s\t%s\t%s\t%s:%d\n",
|
||||
ev->ts + hnp_offs + overhead,
|
||||
ev->descr, nodename, jobid, ev->func, file_name, ev->line);
|
||||
}
|
||||
if( rc < 0 ){
|
||||
// TODO: log mem allocation problems
|
||||
goto err_exit;
|
||||
}
|
||||
rc = 0;
|
||||
|
||||
if( strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){
|
||||
// TODO: log buffer overflow
|
||||
free(line);
|
||||
goto err_exit;
|
||||
}
|
||||
if( buf_size + strlen(line) > OPAL_TIMING_OUTBUF_SIZE ){
|
||||
// flush buffer to the file
|
||||
if( fp != NULL ){
|
||||
fprintf(fp,"%s", buf);
|
||||
fprintf(fp,"\n");
|
||||
} else {
|
||||
opal_output(0,"\n%s", buf);
|
||||
}
|
||||
buf[0] = '\0';
|
||||
buf_size = 0;
|
||||
}
|
||||
sprintf(buf,"%s%s", buf, line);
|
||||
buf_size += strlen(line);
|
||||
free(line);
|
||||
}
|
||||
}
|
||||
|
||||
if( buf_size > 0 ){
|
||||
// flush buffer to the file
|
||||
if( fp != NULL ){
|
||||
fprintf(fp,"%s", buf);
|
||||
fprintf(fp,"\n");
|
||||
} else {
|
||||
opal_output(0,"\n%s", buf);
|
||||
}
|
||||
buf[0] = '\0';
|
||||
buf_size = 0;
|
||||
}
|
||||
|
||||
err_exit:
|
||||
if( buf != NULL ){
|
||||
free(buf);
|
||||
}
|
||||
if( fp != NULL ){
|
||||
fclose(fp);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
void opal_timing_release(opal_timing_t *t)
|
||||
{
|
||||
int cnt = opal_list_get_size(t->events);
|
||||
|
||||
if( cnt > 0 ){
|
||||
opal_list_t *tmp = OBJ_NEW(opal_list_t);
|
||||
int i;
|
||||
for(i=0; i<cnt; i++){
|
||||
opal_timing_event_t *ev = (opal_timing_event_t *)opal_list_remove_first(t->events);
|
||||
if( ev->fib ){
|
||||
opal_list_append(tmp,(opal_list_item_t*)ev);
|
||||
}
|
||||
}
|
||||
|
||||
cnt = opal_list_get_size(tmp);
|
||||
for(i=0; i<cnt; i++){
|
||||
opal_timing_event_t *ev = (opal_timing_event_t *)opal_list_remove_first(tmp);
|
||||
free(ev);
|
||||
}
|
||||
OBJ_RELEASE(tmp);
|
||||
} else {
|
||||
// Error case. At list one event was inserted at initialization.
|
||||
|
||||
}
|
||||
|
||||
OBJ_RELEASE(t->events);
|
||||
t->events = NULL;
|
||||
}
|
||||
|
||||
#endif
|
95
opal/util/timings.h
Обычный файл
95
opal/util/timings.h
Обычный файл
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* ? Copyrights ?
|
||||
*/
|
||||
|
||||
#ifndef OPAL_SYS_TIMING_H
|
||||
#define OPAL_SYS_TIMING_H
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/runtime/opal_params.h"
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
|
||||
#define OPAL_TIMING_DESCR_MAX 1024
|
||||
#define OPAL_TIMING_BUFSIZE 32
|
||||
#define OPAL_TIMING_OUTBUF_SIZE (10*1024)
|
||||
|
||||
typedef enum { TEVENT, TBEGIN, TEND } opal_event_type_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
int fib;
|
||||
opal_event_type_t type;
|
||||
const char *func;
|
||||
const char *file;
|
||||
int line;
|
||||
double ts, ts_ovh;
|
||||
char descr[OPAL_TIMING_DESCR_MAX];
|
||||
int id;
|
||||
} opal_timing_event_t;
|
||||
|
||||
typedef struct opal_timing_t
|
||||
{
|
||||
int cur_id;
|
||||
opal_list_t *events;
|
||||
opal_timing_event_t *buffer;
|
||||
size_t buffer_offset, buffer_size;
|
||||
} opal_timing_t;
|
||||
|
||||
typedef struct {
|
||||
opal_timing_t *t;
|
||||
opal_timing_event_t *ev;
|
||||
} opal_timing_prep_t;
|
||||
|
||||
int opal_timing_clksync_read(char *opal_clksync_file);
|
||||
int opal_timing_set_jobid(char *jid);
|
||||
|
||||
void opal_timing_init(opal_timing_t *t);
|
||||
|
||||
opal_timing_prep_t opal_timing_prep_ev(opal_timing_t *t, const char *fmt, ...);
|
||||
void opal_timing_add_step(opal_timing_prep_t p,
|
||||
const char *func, const char *file, int line);
|
||||
|
||||
/*
|
||||
opal_timing_prep_t opal_timing_prep_end(opal_timing_t *t, int id, const char *fmt, ...);
|
||||
int opal_timing_begin(opal_timing_t *t, char *file, int line);
|
||||
void opal_timing_end(opal_timing_prep_t p, char *file, int line);
|
||||
*/
|
||||
|
||||
int opal_timing_report(opal_timing_t *t, bool account_overhead, const char *prefix, char *fname);
|
||||
void opal_timing_release(opal_timing_t *t);
|
||||
|
||||
#define OPAL_TIMING_DECLARE(t) opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled
|
||||
|
||||
#define OPAL_TIMING_DECLARE_EXT(x, t) x extern opal_timing_t t; // must have the semicolon here to avoid warnings when not enabled
|
||||
|
||||
#define OPAL_TIMING_INIT(t) opal_timing_init(t)
|
||||
|
||||
#define OPAL_TIMING_EVENT(x) opal_timing_add_step( opal_timing_prep_ev x, __FUNCTION__, __FILE__, __LINE__)
|
||||
|
||||
#define OPAL_TIMING_REPORT(enable, t, prefix) { \
|
||||
if( enable ) { \
|
||||
opal_timing_report(t, opal_timing_account_overhead, prefix, opal_timing_file); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define OPAL_TIMING_RELEASE(t) opal_timing_release(t)
|
||||
|
||||
#else
|
||||
|
||||
#define OPAL_TIMING_DECLARE(t)
|
||||
|
||||
#define OPAL_TIMING_DECLARE_EXT(x, t)
|
||||
|
||||
#define OPAL_TIMING_INIT(t)
|
||||
|
||||
#define OPAL_TIMING_EVENT(x)
|
||||
|
||||
#define OPAL_TIMING_REPORT(enable, t, prefix)
|
||||
|
||||
#define OPAL_TIMING_RELEASE(t)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
@ -40,6 +40,7 @@
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/util/timings.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
@ -47,6 +48,8 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
OPAL_TIMING_DECLARE_EXT(ORTE_DECLSPEC, tm_oob)
|
||||
|
||||
/*
|
||||
* Convenience Typedef
|
||||
*/
|
||||
@ -58,6 +61,9 @@ typedef struct {
|
||||
int max_uri_length;
|
||||
opal_hash_table_t peers;
|
||||
bool use_module_threads;
|
||||
#if OPAL_ENABLE_TIMING
|
||||
bool timing;
|
||||
#endif
|
||||
} orte_oob_base_t;
|
||||
ORTE_DECLSPEC extern orte_oob_base_t orte_oob_base;
|
||||
|
||||
|
@ -48,6 +48,7 @@
|
||||
* Global variables
|
||||
*/
|
||||
orte_oob_base_t orte_oob_base;
|
||||
OPAL_TIMING_DECLARE(tm_oob)
|
||||
|
||||
static int orte_oob_base_register(mca_base_register_flag_t flags)
|
||||
{
|
||||
@ -58,6 +59,15 @@ static int orte_oob_base_register(mca_base_register_flag_t flags)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_oob_base.use_module_threads);
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
/* Detailed timing setup */
|
||||
orte_oob_base.timing = false;
|
||||
(void) mca_base_var_register ("orte", "oob", "base", "timing",
|
||||
"Enable OOB timings",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_oob_base.timing);
|
||||
#endif
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -95,6 +105,10 @@ static int orte_oob_base_close(void)
|
||||
|
||||
OBJ_DESTRUCT(&orte_oob_base.peers);
|
||||
|
||||
|
||||
OPAL_TIMING_EVENT((&tm_oob, "Finish"));
|
||||
OPAL_TIMING_REPORT(orte_oob_base.timing, &tm_oob, "COMM");
|
||||
|
||||
return mca_base_framework_components_close(&orte_oob_base_framework, NULL);
|
||||
}
|
||||
|
||||
@ -117,6 +131,8 @@ static int orte_oob_base_open(mca_base_open_flag_t flags)
|
||||
orte_state.add_job_state(ORTE_JOB_STATE_FT_RESTART, orte_oob_base_ft_event, ORTE_ERROR_PRI);
|
||||
#endif
|
||||
|
||||
OPAL_TIMING_INIT(&tm_oob);
|
||||
|
||||
/* Open up all available components */
|
||||
return mca_base_framework_components_open(&orte_oob_base_framework, flags);
|
||||
}
|
||||
|
@ -80,6 +80,9 @@ static int send_bytes(mca_oob_tcp_peer_t* peer)
|
||||
mca_oob_tcp_send_t* msg = peer->send_msg;
|
||||
int rc;
|
||||
|
||||
OPAL_TIMING_EVENT((&tm_oob, "to %s %d bytes",
|
||||
ORTE_NAME_PRINT(&(peer->name)), msg->sdbytes));
|
||||
|
||||
while (0 < msg->sdbytes) {
|
||||
rc = write(peer->sd, msg->sdptr, msg->sdbytes);
|
||||
if (rc < 0) {
|
||||
@ -313,6 +316,9 @@ void mca_oob_tcp_send_handler(int sd, short flags, void *cbdata)
|
||||
static int read_bytes(mca_oob_tcp_peer_t* peer)
|
||||
{
|
||||
int rc;
|
||||
#if OPAL_ENABLE_TIMING
|
||||
int to_read = peer->recv_msg->rdbytes;
|
||||
#endif
|
||||
|
||||
/* read until all bytes recvd or error */
|
||||
while (0 < peer->recv_msg->rdbytes) {
|
||||
@ -384,6 +390,9 @@ static int read_bytes(mca_oob_tcp_peer_t* peer)
|
||||
peer->recv_msg->rdptr += rc;
|
||||
}
|
||||
|
||||
OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes",
|
||||
ORTE_NAME_PRINT(&(peer->name)), to_read));
|
||||
|
||||
/* we read the full data block */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -398,6 +407,7 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
|
||||
mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata;
|
||||
int rc;
|
||||
orte_rml_send_t *snd;
|
||||
bool timing_same_as_hdr = false;
|
||||
|
||||
if (orte_abnormal_term_ordered) {
|
||||
return;
|
||||
@ -468,7 +478,13 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
|
||||
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:tcp:recv:handler read hdr",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
#if OPAL_ENABLE_TIMING
|
||||
int to_recv = peer->recv_msg->rdbytes;
|
||||
#endif
|
||||
if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
|
||||
timing_same_as_hdr = true;
|
||||
OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes [header]",
|
||||
ORTE_NAME_PRINT(&(peer->name)), to_recv));
|
||||
/* completed reading the header */
|
||||
peer->recv_msg->hdr_recvd = true;
|
||||
/* convert the header */
|
||||
@ -520,6 +536,12 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
|
||||
(int)peer->recv_msg->hdr.nbytes,
|
||||
ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
|
||||
peer->recv_msg->hdr.tag);
|
||||
|
||||
OPAL_TIMING_EVENT((&tm_oob, "from %s %d bytes [body:%s]",
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
(int)peer->recv_msg->hdr.nbytes,
|
||||
(timing_same_as_hdr) ? "same" : "next"));
|
||||
|
||||
/* am I the intended recipient (header was already converted back to host order)? */
|
||||
if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||
peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
|
@ -30,7 +30,6 @@ struct orte_plm_alps_component_t {
|
||||
orte_plm_base_component_t super;
|
||||
int priority;
|
||||
bool debug;
|
||||
bool timing;
|
||||
char *aprun_cmd;
|
||||
char *custom_args;
|
||||
};
|
||||
|
@ -128,8 +128,6 @@ static int plm_alps_register(void)
|
||||
|
||||
static int plm_alps_open(void)
|
||||
{
|
||||
mca_plm_alps_component.timing = orte_timing;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,7 @@
|
||||
|
||||
#include "opal/dss/dss_types.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "opal/util/timings.h"
|
||||
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
@ -48,6 +49,7 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
OPAL_TIMING_DECLARE_EXT(ORTE_DECLSPEC, tm_rml)
|
||||
|
||||
/*
|
||||
* MCA Framework
|
||||
@ -81,6 +83,9 @@ ORTE_DECLSPEC void orte_rml_base_comm_stop(void);
|
||||
typedef struct {
|
||||
opal_list_t posted_recvs;
|
||||
opal_list_t unmatched_msgs;
|
||||
#if OPAL_ENABLE_TIMING
|
||||
bool timing;
|
||||
#endif
|
||||
} orte_rml_base_t;
|
||||
ORTE_DECLSPEC extern orte_rml_base_t orte_rml_base;
|
||||
|
||||
|
@ -37,6 +37,7 @@
|
||||
|
||||
orte_rml_module_t orte_rml;
|
||||
orte_rml_base_t orte_rml_base;
|
||||
OPAL_TIMING_DECLARE(tm_rml)
|
||||
|
||||
orte_rml_component_t *orte_rml_component = NULL;
|
||||
|
||||
@ -61,6 +62,15 @@ static int orte_rml_base_register(mca_base_register_flag_t flags)
|
||||
&orte_rml_base_wrapper);
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rml",NULL,"wrapper", 0);
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
orte_rml_base.timing = false;
|
||||
(void) mca_base_var_register ("orte", "rml", "base", "timing",
|
||||
"Enable RML timings",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_rml_base.timing);
|
||||
#endif
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -73,6 +83,8 @@ static int orte_rml_base_close(void)
|
||||
}
|
||||
OBJ_DESTRUCT(&orte_rml_base.posted_recvs);
|
||||
|
||||
OPAL_TIMING_REPORT(orte_rml_base.timing, &tm_rml, "RML");
|
||||
|
||||
return mca_base_framework_components_close(&orte_rml_base_framework, NULL);
|
||||
}
|
||||
|
||||
@ -81,7 +93,7 @@ static int orte_rml_base_open(mca_base_open_flag_t flags)
|
||||
/* Initialize globals */
|
||||
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
|
||||
|
||||
OPAL_TIMING_INIT(&tm_rml);
|
||||
/* Open up all available components */
|
||||
return mca_base_framework_components_open(&orte_rml_base_framework, flags);
|
||||
}
|
||||
|
@ -36,6 +36,7 @@
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/timings.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -164,6 +165,9 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata)
|
||||
ORTE_NAME_PRINT(&msg->sender),
|
||||
msg->tag));
|
||||
|
||||
OPAL_TIMING_EVENT((&tm_rml,"from %s %d bytes",
|
||||
ORTE_NAME_PRINT(&msg->sender), msg->iov.iov_len));
|
||||
|
||||
/* see if we have a waiting recv for this message */
|
||||
OPAL_LIST_FOREACH(post, &orte_rml_base.posted_recvs, orte_rml_posted_recv_t) {
|
||||
/* since names could include wildcards, must use
|
||||
|
@ -108,6 +108,7 @@ static void send_msg(int fd, short args, void *cbdata)
|
||||
"%s rml_send_msg to peer %s at tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag));
|
||||
OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer)));
|
||||
|
||||
/* if this is a message to myself, then just post the message
|
||||
* for receipt - no need to dive into the oob
|
||||
|
@ -59,9 +59,6 @@ opal_list_t orte_proc_states;
|
||||
int orte_clean_output = -1;
|
||||
|
||||
/* globals used by RTE */
|
||||
bool orte_timing;
|
||||
FILE *orte_timing_output = NULL;
|
||||
bool orte_timing_details;
|
||||
bool orte_debug_daemons_file_flag = false;
|
||||
bool orte_leave_session_attached;
|
||||
bool orte_do_not_launch = false;
|
||||
|
@ -435,9 +435,6 @@ ORTE_DECLSPEC orte_node_rank_t orte_get_proc_node_rank(orte_process_name_t *proc
|
||||
ORTE_DECLSPEC orte_vpid_t orte_get_lowest_vpid_alive(orte_jobid_t job);
|
||||
|
||||
/* global variables used by RTE - instanced in orte_globals.c */
|
||||
ORTE_DECLSPEC extern bool orte_timing;
|
||||
ORTE_DECLSPEC extern FILE *orte_timing_output;
|
||||
ORTE_DECLSPEC extern bool orte_timing_details;
|
||||
ORTE_DECLSPEC extern bool orte_debug_daemons_flag;
|
||||
ORTE_DECLSPEC extern bool orte_debug_daemons_file_flag;
|
||||
ORTE_DECLSPEC extern bool orte_leave_session_attached;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/util/timings.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/threads/threads.h"
|
||||
|
||||
@ -230,7 +231,11 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
|
||||
error = "orte_ess_init";
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
||||
#if OPAL_ENABLE_TIMING
|
||||
opal_timing_set_jobid(ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
#endif
|
||||
|
||||
/* All done */
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -45,7 +45,6 @@
|
||||
|
||||
static bool passed_thru = false;
|
||||
static int orte_progress_thread_debug_level = -1;
|
||||
static char *orte_timing_file = NULL;
|
||||
static char *orte_xml_file = NULL;
|
||||
static char *orte_fork_agent_string = NULL;
|
||||
static char *orte_tmpdir_base = NULL;
|
||||
@ -321,47 +320,6 @@ int orte_register_params(void)
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_startup_timeout);
|
||||
|
||||
/* check for timing requests */
|
||||
orte_timing_details = false;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "timing_details",
|
||||
"Request that detailed timing data by reported",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_timing_details);
|
||||
|
||||
/* ensure the timing flag is set too */
|
||||
orte_timing = orte_timing_details;
|
||||
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "timing",
|
||||
"Request that critical timing loops be measured",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_timing);
|
||||
|
||||
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
orte_timing_file = NULL;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "timing_file",
|
||||
"Name of the file where timing data is to be written (relative or absolute path)",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_timing_file);
|
||||
if (orte_timing && NULL == orte_timing_file) {
|
||||
/* send the timing output to stdout */
|
||||
orte_timing_output = stdout;
|
||||
} else if (NULL != orte_timing_file) {
|
||||
/* make sure the timing flag is set */
|
||||
orte_timing = true;
|
||||
/* send the output to the indicated file */
|
||||
orte_timing_output = fopen(orte_timing_file, "w");
|
||||
if (NULL == orte_timing_output) {
|
||||
/* couldn't be opened */
|
||||
opal_output(0, "File %s could not be opened", orte_timing_file);
|
||||
orte_timing_output = stderr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* User-level debugger info string */
|
||||
orte_base_user_debugger = "totalview @mpirun@ -a @mpirun_args@ : ddt -n @np@ -start @executable@ @executable_argv@ @single_app@ : fxp @mpirun@ -a @mpirun_args@";
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "base_user_debugger",
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user