Upgraded VT version to 5.14
This commit was SVN r27607.
Этот коммит содержится в:
родитель
aebd1ea432
Коммит
6320c4b022
@ -1,7 +1,53 @@
|
||||
5.13.1openmpi
|
||||
- updated version of internal OTF to 1.11.2openmpi
|
||||
5.14openmpi
|
||||
- updated version of internal OTF to 1.12.1openmpi
|
||||
(see extlib/otf/ChangeLog)
|
||||
- added support for filtering functions of specific call paths
|
||||
- introduced new environment variable VT_MPI_IGNORE_FILTER to
|
||||
enable/disable recording of MPI communication events although its
|
||||
corresponding functions are filtered
|
||||
- fixed undefined reference error for vt_get_mpi_f_in_place___ when
|
||||
linking the Fortran MPI wrapper library (libvt-fmpi) built for
|
||||
SGI-MPT (--with-sgimpt)
|
||||
- compiler wrappers:
|
||||
- added option '-vt:inst-exclude-file[-list]' to exclude source
|
||||
files from the automatic instrumentation by the compiler or
|
||||
PDT/TAU
|
||||
- added option '-vt:opari-exclude-file[-list]' to exclude source
|
||||
files from the instrumentation of OpenMP constructs by OPARI
|
||||
- consider preprocessed files (-vt:preprocess) for reuse
|
||||
(-vt:reusefiles)
|
||||
- do additionally scan for comments (pdbcomment) if performing
|
||||
PDT/TAU instrumentation to prevent instrumenting functions
|
||||
within comments
|
||||
- OPARI: (see tools/opari/ChangeLog:26,27)
|
||||
|
||||
5.13.2
|
||||
- added support for IBM BlueGene/Q
|
||||
- added support for PAPI 5 (aka PAPI-V)
|
||||
- fixed CUDA runtime API wrapper for CUDA 5
|
||||
- fixed "Cannot find communicator" error occurred when completing
|
||||
a non-blocking MPI communication using an already freed communicator
|
||||
(e.g. MPI_Irecv(...,comm,...), MPI_Comm_free(comm), MPI_Wait())
|
||||
- fixed 'gnu' compiler instrumentation for functions defined within
|
||||
shared objects
|
||||
- vtunify:
|
||||
- disable OpenMP parallelization if PGI compiler version < 9
|
||||
is used (threadprivate not supported)
|
||||
- OPARI: (see tools/opari/ChangeLog:24,25)
|
||||
|
||||
5.13.1
|
||||
- updated version of internal OTF to 1.11.2goldfish
|
||||
(see extlib/otf/ChangeLog)
|
||||
- use high precision timer RTC on Cray XE/XK6 platforms using the
|
||||
Cray compiler
|
||||
- fixed parsing symbol list file given by VT_GNU_NMFILE if it is
|
||||
generated by nm with multiple input files
|
||||
- compiler wrappers:
|
||||
- added detection of Cray compiler's OpenMP flag (-h omp) to
|
||||
enable OPARI instrumentation
|
||||
- add OPARI option '-nosrc' when using the Cray compiler
|
||||
(at least the Fortran compiler does not understand
|
||||
#line constructs)
|
||||
- vtnvcc:
|
||||
- add path to cuda.h to the PDT parser command
|
||||
- exclude *.cu source files from instrumenting with
|
||||
@ -13,7 +59,7 @@
|
||||
- enhanced precision of timestamp conversion from local
|
||||
to global
|
||||
|
||||
5.13openmpi
|
||||
5.13
|
||||
- updated version of internal OTF to 1.11.1goldfish
|
||||
(see extlib/otf/ChangeLog)
|
||||
- added support for highly parallel trace writing using the
|
||||
|
@ -225,6 +225,10 @@ How to install and configure VampirTrace
|
||||
give the command for PDT Fortran source code parser,
|
||||
default: f95parse, f90parse, or gfparse
|
||||
|
||||
--with-pdt-comment=PDTCOMMENT
|
||||
give the command for PDT comment parser,
|
||||
default: pdbcomment
|
||||
|
||||
--with-papi-dir=PAPIDIR
|
||||
give the path for PAPI, default: /usr
|
||||
|
||||
@ -345,7 +349,7 @@ How to install and configure VampirTrace
|
||||
(e.g. RANLIB, AR, MPICC, CXXFLAGS).
|
||||
|
||||
Examples:
|
||||
BlueGene/P:
|
||||
BlueGene/P and BlueGene/Q:
|
||||
% ./configure --host=powerpc64-ibm-linux-gnu
|
||||
|
||||
Cray XK6:
|
||||
|
@ -22,6 +22,7 @@ EXTRA_DIST = \
|
||||
VERSION \
|
||||
config/defaults/bgl \
|
||||
config/defaults/bgp \
|
||||
config/defaults/bgq \
|
||||
config/defaults/crayxt \
|
||||
config/defaults/crayxe \
|
||||
config/defaults/ibm \
|
||||
|
@ -1 +1 @@
|
||||
5.13.1openmpi
|
||||
5.14openmpi
|
||||
|
20
ompi/contrib/vt/vt/config/defaults/bgq
Обычный файл
20
ompi/contrib/vt/vt/config/defaults/bgq
Обычный файл
@ -0,0 +1,20 @@
|
||||
CC="bgxlc_r"
|
||||
CXX="bgxlC_r"
|
||||
FC="bgxlf95_r"
|
||||
MPICC="mpixlc_r"
|
||||
MPICXX="mpixlcxx_r"
|
||||
CFLAGS="-O3 -qstrict"
|
||||
CXXFLAGS="-O3 -qstrict -qminimaltoc"
|
||||
CC_FOR_BUILD="xlc_r"
|
||||
CXX_FOR_BUILD="xlC_r"
|
||||
CFLAGS_FOR_BUILD="-O3 -qstrict"
|
||||
CXXFLAGS_FOR_BUILD="-O3 -qstrict"
|
||||
enable_shared="no"
|
||||
with_cross_prefix="bg"
|
||||
with_mpibgq="yes"
|
||||
with_cxxrtlib="-L/opt/ibmcmp/vacpp/bg/12.1/bglib64 -libmc++ -lstdc++"
|
||||
with_shlibc="/lib64/libc.so.6"
|
||||
|
||||
# Disable compiler optimization for the OTF library to work around a
|
||||
# not yet evaluated segmentation fault occurs when flushing the trace buffer.
|
||||
with_otf_flags="CFLAGS=-O0"
|
@ -10,6 +10,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_CC_TAUINST_OPTS=
|
||||
VT_WRAPPER_CC_TAUINST_PARSE_BIN=
|
||||
VT_WRAPPER_CC_TAUINST_PARSE_OPTS=
|
||||
VT_WRAPPER_CC_TAUINST_COMMENT_BIN=
|
||||
VT_WRAPPER_CC_TAUINST_COMMENT_OPTS=
|
||||
VT_WRAPPER_CC_COMPINST_COMPILER_FLAGS=
|
||||
VT_WRAPPER_CC_DEFAULT_PARTYPE="seq"
|
||||
|
||||
@ -23,6 +25,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_CXX_TAUINST_OPTS=
|
||||
VT_WRAPPER_CXX_TAUINST_PARSE_BIN=
|
||||
VT_WRAPPER_CXX_TAUINST_PARSE_OPTS=
|
||||
VT_WRAPPER_CXX_TAUINST_COMMENT_BIN=
|
||||
VT_WRAPPER_CXX_TAUINST_COMMENT_OPTS=
|
||||
VT_WRAPPER_CXX_COMPINST_COMPILER_FLAGS=
|
||||
VT_WRAPPER_CXX_DEFAULT_PARTYPE="seq"
|
||||
|
||||
@ -36,6 +40,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_FC_TAUINST_OPTS=
|
||||
VT_WRAPPER_FC_TAUINST_PARSE_BIN=
|
||||
VT_WRAPPER_FC_TAUINST_PARSE_OPTS=
|
||||
VT_WRAPPER_FC_TAUINST_COMMENT_BIN=
|
||||
VT_WRAPPER_FC_TAUINST_COMMENT_OPTS=
|
||||
VT_WRAPPER_FC_COMPINST_COMPILER_FLAGS=
|
||||
VT_WRAPPER_FC_DEFAULT_PARTYPE="seq"
|
||||
|
||||
@ -45,11 +51,12 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_NVCC_EXTRA_LIBS=
|
||||
VT_WRAPPER_NVCC_CPP=$CPP
|
||||
VT_WRAPPER_NVCC_EXTRA_CPPFLAGS=
|
||||
|
||||
VT_WRAPPER_NVCC_DYNINST_COMPILER_FLAGS=
|
||||
VT_WRAPPER_NVCC_TAUINST_OPTS=
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_BIN=
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS=
|
||||
VT_WRAPPER_NVCC_TAUINST_COMMENT_BIN=
|
||||
VT_WRAPPER_NVCC_TAUINST_COMMENT_OPTS=
|
||||
VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS=
|
||||
VT_WRAPPER_NVCC_AVAIL_INST="manual"
|
||||
VT_WRAPPER_NVCC_DEFAULT_INST="manual"
|
||||
@ -257,12 +264,16 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS -Wl,-force_flat_namespace"
|
||||
])
|
||||
|
||||
AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xyes"],
|
||||
AS_IF([test x"$enable_shared" = "xyes"],
|
||||
[
|
||||
VT_WRAPPER_CC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CC_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
VT_WRAPPER_FC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_FC_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
case $PLATFORM in
|
||||
bgp | bgq)
|
||||
VT_WRAPPER_CC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CC_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS="$VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
VT_WRAPPER_FC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_FC_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS="$VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS -Wl,-dy"
|
||||
;;
|
||||
esac
|
||||
])
|
||||
|
||||
AS_IF([test x"$compinst_type" = "xpgi9" -o x"$compinst_type" = "xcraycce"],
|
||||
@ -283,6 +294,10 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_OPARI_OPTS="-nodecl"
|
||||
break
|
||||
;;
|
||||
*Cray\ C*)
|
||||
VT_WRAPPER_OPARI_OPTS="-nosrc"
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
@ -342,15 +357,19 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_CC_TAUINST_OPTS="-c -spec \${datadir}/TAUINST.SPEC"
|
||||
VT_WRAPPER_CC_TAUINST_PARSE_BIN="$tauinst_cparse_cmd"
|
||||
VT_WRAPPER_CC_TAUINST_PARSE_OPTS="$mpiincdir"
|
||||
VT_WRAPPER_CC_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
|
||||
VT_WRAPPER_CXX_TAUINST_OPTS="-c++ -spec \${datadir}/TAUINST.SPEC"
|
||||
VT_WRAPPER_CXX_TAUINST_PARSE_BIN="$tauinst_cxxparse_cmd"
|
||||
VT_WRAPPER_CXX_TAUINST_PARSE_OPTS="$VT_WRAPPER_CC_TAUINST_PARSE_OPTS"
|
||||
VT_WRAPPER_CXX_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
|
||||
VT_WRAPPER_FC_TAUINST_OPTS="-fortran -spec \${datadir}/TAUINST.SPEC"
|
||||
VT_WRAPPER_FC_TAUINST_PARSE_BIN="$tauinst_fparse_cmd"
|
||||
VT_WRAPPER_FC_TAUINST_PARSE_OPTS="$fmpiincdir"
|
||||
VT_WRAPPER_FC_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
|
||||
VT_WRAPPER_NVCC_TAUINST_OPTS="$VT_WRAPPER_CC_TAUINST_OPTS"
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_BIN="$VT_WRAPPER_CC_TAUINST_PARSE_BIN"
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS="$VT_WRAPPER_CC_TAUINST_PARSE_OPTS $CUDATKINCDIR"
|
||||
VT_WRAPPER_NVCC_TAUINST_COMMENT_BIN="$tauinst_comment_cmd"
|
||||
VT_WRAPPER_NVCC_AVAIL_INST="$VT_WRAPPER_NVCC_AVAIL_INST tauinst"
|
||||
VT_WRAPPER_AVAIL_INST="$VT_WRAPPER_AVAIL_INST tauinst"
|
||||
])
|
||||
@ -365,6 +384,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
AC_SUBST(VT_WRAPPER_CC_TAUINST_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_CC_TAUINST_PARSE_BIN)
|
||||
AC_SUBST(VT_WRAPPER_CC_TAUINST_PARSE_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_CC_TAUINST_COMMENT_BIN)
|
||||
AC_SUBST(VT_WRAPPER_CC_TAUINST_COMMENT_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_CC_COMPINST_COMPILER_FLAGS)
|
||||
AC_SUBST(VT_WRAPPER_CC_DEFAULT_PARTYPE)
|
||||
|
||||
@ -378,6 +399,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
AC_SUBST(VT_WRAPPER_CXX_TAUINST_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_CXX_TAUINST_PARSE_BIN)
|
||||
AC_SUBST(VT_WRAPPER_CXX_TAUINST_PARSE_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_CXX_TAUINST_COMMENT_BIN)
|
||||
AC_SUBST(VT_WRAPPER_CXX_TAUINST_COMMENT_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_CXX_COMPINST_COMPILER_FLAGS)
|
||||
AC_SUBST(VT_WRAPPER_CXX_DEFAULT_PARTYPE)
|
||||
|
||||
@ -391,6 +414,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
AC_SUBST(VT_WRAPPER_FC_TAUINST_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_FC_TAUINST_PARSE_BIN)
|
||||
AC_SUBST(VT_WRAPPER_FC_TAUINST_PARSE_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_FC_TAUINST_COMMENT_BIN)
|
||||
AC_SUBST(VT_WRAPPER_FC_TAUINST_COMMENT_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_FC_COMPINST_COMPILER_FLAGS)
|
||||
AC_SUBST(VT_WRAPPER_FC_DEFAULT_PARTYPE)
|
||||
|
||||
@ -404,6 +429,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
AC_SUBST(VT_WRAPPER_NVCC_TAUINST_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_TAUINST_PARSE_BIN)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_TAUINST_COMMENT_BIN)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_TAUINST_COMMENT_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_AVAIL_INST)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_DEFAULT_INST)
|
||||
|
@ -127,9 +127,16 @@ See \`config.log' for more details.])
|
||||
AS_IF([test x"$cuda_error" = "xno"],
|
||||
[
|
||||
have_cuda="yes"
|
||||
],
|
||||
[
|
||||
dnl if no CUDA found, remove content of CUDATKLIBDIR to prevent adding them
|
||||
dnl to the linker flags when using the VT compiler wrappers
|
||||
CUDATKLIBDIR=
|
||||
])
|
||||
|
||||
AC_SUBST(CUDATKDIR)
|
||||
AC_SUBST(CUDATKINCDIR)
|
||||
AC_SUBST(CUDATKLIBDIR)
|
||||
AC_SUBST(CUDALIB)
|
||||
AC_SUBST(CUDARTLIB)
|
||||
])
|
||||
|
@ -67,4 +67,13 @@ AC_DEFUN([ACVT_CUDAWRAP],
|
||||
have_cudartwrap="yes"
|
||||
])
|
||||
])
|
||||
|
||||
dnl if CUPTI found, CUPTILIB already contains CUDATKLIBDIR and CUDARTLIB;
|
||||
dnl remove content of CUDATKLIBDIR and CUDARTLIB to prevent double linking when
|
||||
dnl using the VT compiler wrappers
|
||||
AS_IF([test x"$have_cupti" = "xyes"],
|
||||
[
|
||||
CUDATKLIBDIR=
|
||||
CUDARTLIB=
|
||||
])
|
||||
])
|
||||
|
@ -56,10 +56,10 @@ AC_DEFUN([ACVT_CUPTI],
|
||||
AS_IF([test x"$CUPTILIB" = x -a x"$cupti_error" = "xno"],
|
||||
[
|
||||
sav_LIBS=$LIBS
|
||||
LIBS="$LIBS $CUPTILIBDIR -lcupti $CUDATKLIBDIR $CUDALIB"
|
||||
LIBS="$LIBS $CUPTILIBDIR -lcupti $CUDATKLIBDIR $CUDALIB $CUDARTLIB"
|
||||
AC_MSG_CHECKING([whether linking with -lcupti works])
|
||||
AC_TRY_LINK([],[],
|
||||
[AC_MSG_RESULT([yes]); CUPTILIB="-lcupti $CUDATKLIBDIR $CUDALIB"],[AC_MSG_RESULT([no])])
|
||||
[AC_MSG_RESULT([yes]); CUPTILIB="-lcupti $CUDATKLIBDIR $CUDALIB $CUDARTLIB"],[AC_MSG_RESULT([no])])
|
||||
LIBS=$sav_LIBS
|
||||
])
|
||||
|
||||
@ -108,8 +108,8 @@ AC_DEFUN([ACVT_CUPTI],
|
||||
])
|
||||
])
|
||||
|
||||
dnl if no CUPTI found, remove content of CUPTILIBDIR to prevent adding the
|
||||
dnl '-LCUPTILIBDIR' linker flag by the VT compiler wrappers
|
||||
dnl if no CUPTI found, remove content of CUPTILIBDIR to prevent adding them
|
||||
dnl to the linker flags when using the VT compiler wrappers
|
||||
AS_IF([test x"$have_cupti" = "xno"],
|
||||
[CUPTILIBDIR=])
|
||||
|
||||
|
@ -38,9 +38,9 @@ AC_DEFUN([ACVT_DL],
|
||||
AC_MSG_NOTICE([error: dynamic linking library (libdl) isn't suitable on this platform])
|
||||
dl_error="yes"
|
||||
])
|
||||
AS_IF([test "$PLATFORM" = "bgp"],
|
||||
AS_IF([test "$PLATFORM" = "bgp" -o "$PLATFORM" = "bgq"],
|
||||
[
|
||||
dnl RTLD_NEXT available but not working on BG/P platforms
|
||||
dnl RTLD_NEXT available but not working on BG/P (and BG/Q?) platforms
|
||||
ac_cv_have_decl_RTLD_NEXT="no"
|
||||
])
|
||||
AS_IF([test "$PLATFORM" = "crayxt" -o "$PLATFORM" = "crayxe"],
|
||||
|
@ -19,7 +19,10 @@ AC_DEFUN([ACVT_GETCPU],
|
||||
|
||||
AS_IF([test x"$getcpu_error" = "xno"],
|
||||
[
|
||||
sav_CPPFLAGS=$CPPFLAGS
|
||||
CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
|
||||
AC_CHECK_FUNC([sched_getcpu], [], [getcpu_error="yes"])
|
||||
CPPFLAGS=$sav_CPPFLAGS
|
||||
])
|
||||
|
||||
AS_IF([test x"$getcpu_error" = "xno" -a x"$cross_compiling" = "xno"],
|
||||
@ -27,6 +30,7 @@ AC_DEFUN([ACVT_GETCPU],
|
||||
AC_MSG_CHECKING([whether sched_getcpu works])
|
||||
AC_TRY_RUN(
|
||||
[
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
int main() { return (sched_getcpu() != -1) ? 0 : 1; }
|
||||
],
|
||||
|
@ -16,7 +16,7 @@ AC_DEFUN([ACVT_JAVA],
|
||||
|
||||
AS_IF([test x"$check_java" = "xyes"],
|
||||
[
|
||||
AS_IF([test "$PLATFORM" = "bgp"],
|
||||
AS_IF([test "$PLATFORM" = "bgp" -o "$PLATFORM" = "bgq"],
|
||||
[
|
||||
AC_MSG_NOTICE([error: Java tracing not supported on this platform])
|
||||
java_error="yes"
|
||||
|
@ -59,10 +59,14 @@ AC_DEFUN([ACVT_LIBWRAP],
|
||||
|
||||
AS_IF([test x"$check_libwrap" != "xno"],
|
||||
[
|
||||
AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xno"],
|
||||
AS_IF([test x"$enable_shared" = "xno"],
|
||||
[
|
||||
AC_MSG_NOTICE([error: library tracing requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
|
||||
libwrap_error="yes"
|
||||
case $PLATFORM in
|
||||
bgp | bgq)
|
||||
AC_MSG_NOTICE([error: library tracing requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
|
||||
libwrap_error="yes"
|
||||
;;
|
||||
esac
|
||||
])
|
||||
|
||||
AS_IF([test x"$libwrap_error" = "xno"],
|
||||
|
@ -26,7 +26,10 @@ AC_DEFUN([ACVT_MEMHOOKS],
|
||||
[])])])
|
||||
])
|
||||
|
||||
AS_IF([test x"$memhooks_error" = "xno"], [have_memhooks="yes"])
|
||||
AS_IF([test x"$memhooks_error" = "xno"],
|
||||
[
|
||||
have_memhooks="yes"
|
||||
])
|
||||
])
|
||||
])
|
||||
|
||||
|
@ -183,6 +183,23 @@ AC_DEFUN([ACVT_MPI],
|
||||
])
|
||||
])
|
||||
|
||||
AC_ARG_WITH(mpibgq,
|
||||
AC_HELP_STRING([--with-mpibgq], [set MPI-libs for IBM BG/Q]),
|
||||
[
|
||||
AS_IF([test x"$withval" = "xyes" -a x"$inside_openmpi" = "xno"],
|
||||
[
|
||||
MPILIB="-lmpich"
|
||||
PMPILIB="-lmpich"
|
||||
FMPILIB="-lfmpich"
|
||||
MPICFLAGS="$MPICFLAGS -DMPICH_IGNORE_CXX_SEEK"
|
||||
check_mpi2_thread="no"; have_mpi2_thread="yes"
|
||||
check_mpi2_1sided="no"; have_mpi2_1sided="yes"
|
||||
check_mpi2_extcoll="no"; have_mpi2_extcoll="yes"
|
||||
ac_cv_have_decl_MPI_IN_PLACE="yes"
|
||||
ac_cv_have_decl_MPI_ROOT="yes"
|
||||
])
|
||||
])
|
||||
|
||||
AC_ARG_WITH(mpich,
|
||||
AC_HELP_STRING([--with-mpich], [set MPI-libs for MPICH]),
|
||||
[
|
||||
@ -311,6 +328,7 @@ AC_DEFUN([ACVT_MPI],
|
||||
ac_cv_func_MPI_Type_create_f90_integer="yes"
|
||||
ac_cv_func_MPI_Type_create_f90_real="yes"
|
||||
ac_cv_func_MPI_Type_create_struct="yes"
|
||||
ac_cv_func_MPI_Type_dup="yes"
|
||||
ac_cv_func_MPI_Type_match_size="yes"
|
||||
ac_cv_func_PMPI_Win_test="yes"
|
||||
ac_cv_func_PMPI_Win_lock="yes"
|
||||
@ -754,6 +772,7 @@ dnl check for MPI-2 functions
|
||||
MPI_Type_create_f90_integer \
|
||||
MPI_Type_create_f90_real \
|
||||
MPI_Type_create_struct \
|
||||
MPI_Type_dup \
|
||||
MPI_Type_match_size])
|
||||
|
||||
dnl check for MPI-2 Thread support
|
||||
@ -965,76 +984,69 @@ EOF
|
||||
fmpiwraplib_error="yes"
|
||||
])
|
||||
|
||||
AS_IF([test x"$check_fc_conv" = "xyes" -a x"$fmpiwraplib_error" = "xno"],
|
||||
AS_IF([test x"$fmpiwraplib_error" = "xno"],
|
||||
[
|
||||
sav_CC=$CC
|
||||
sav_CPPFLAGS=$CPPFLAGS
|
||||
sav_LIBS=$LIBS
|
||||
CC=$MPICC
|
||||
CPPFLAGS="$CPPFLAGS $MPICFLAGS $MPIINCDIR"
|
||||
LIBS="$LIBS $MPILIBDIR $MPILIB"
|
||||
|
||||
dnl check for handle conversion: MPI_Comm
|
||||
AC_CHECK_DECL([MPI_Comm_f2c],
|
||||
[AC_CHECK_DECL([MPI_Comm_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_COMM=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AS_IF([test x"$check_fc_conv" = "xyes"],
|
||||
[
|
||||
dnl check for MPI handle conversion functions
|
||||
|
||||
dnl check for handle conversion: MPI_Errhandler
|
||||
AC_CHECK_DECL([MPI_Errhandler_f2c],
|
||||
[AC_CHECK_DECL([MPI_Errhandler_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_ERRH=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Comm_f2c],
|
||||
[AC_CHECK_DECL([MPI_Comm_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_COMM=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_File
|
||||
AC_CHECK_DECL([MPI_File_f2c],
|
||||
[AC_CHECK_DECL([MPI_File_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_FILE=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Errhandler_f2c],
|
||||
[AC_CHECK_DECL([MPI_Errhandler_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_ERRH=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_Group
|
||||
AC_CHECK_DECL([MPI_Group_f2c],
|
||||
[AC_CHECK_DECL([MPI_Group_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_GROUP=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_File_f2c],
|
||||
[AC_CHECK_DECL([MPI_File_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_FILE=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_Info
|
||||
AC_CHECK_DECL([MPI_Info_f2c],
|
||||
[AC_CHECK_DECL([MPI_Info_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_INFO=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Group_f2c],
|
||||
[AC_CHECK_DECL([MPI_Group_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_GROUP=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_Op
|
||||
AC_CHECK_DECL([MPI_Op_f2c],
|
||||
[AC_CHECK_DECL([MPI_Op_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_OP=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Info_f2c],
|
||||
[AC_CHECK_DECL([MPI_Info_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_INFO=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_Request
|
||||
AC_CHECK_DECL([MPI_Request_f2c],
|
||||
[AC_CHECK_DECL([MPI_Request_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_REQUEST=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Op_f2c],
|
||||
[AC_CHECK_DECL([MPI_Op_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_OP=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_Status
|
||||
AC_CHECK_DECL([MPI_Status_f2c],
|
||||
[AC_CHECK_DECL([MPI_Status_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_STATUS=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Request_f2c],
|
||||
[AC_CHECK_DECL([MPI_Request_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_REQUEST=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_Datatype
|
||||
AC_CHECK_DECL([MPI_Type_f2c],
|
||||
[AC_CHECK_DECL([MPI_Type_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_TYPE=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Status_f2c],
|
||||
[AC_CHECK_DECL([MPI_Status_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_STATUS=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for handle conversion: MPI_Win
|
||||
AC_CHECK_DECL([MPI_Win_f2c],
|
||||
[AC_CHECK_DECL([MPI_Win_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_WIN=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
AC_CHECK_DECL([MPI_Type_f2c],
|
||||
[AC_CHECK_DECL([MPI_Type_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_TYPE=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
|
||||
dnl check for MPI-2 constants
|
||||
AC_CHECK_DECL([MPI_Win_f2c],
|
||||
[AC_CHECK_DECL([MPI_Win_c2f],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_WIN=1], [], [#include "mpi.h"])],
|
||||
[], [#include "mpi.h"])
|
||||
])
|
||||
|
||||
dnl check for MPI-2 constants to convert
|
||||
|
||||
AC_CHECK_DECLS([MPI_IN_PLACE],
|
||||
[VT_MPIGEN_HAVE_FC_CONV_MPI2CONST=1; have_mpi2_const="yes"], [], [#include "mpi.h"])
|
||||
@ -1044,7 +1056,6 @@ dnl check for MPI_STATUS_SIZE
|
||||
|
||||
CC=$sav_CC
|
||||
CPPFLAGS=$sav_CPPFLAGS
|
||||
LIBS=$sav_LIBS
|
||||
])
|
||||
|
||||
AC_SUBST(VT_MPIGEN_HAVE_FC_CONV_COMM)
|
||||
|
@ -9,11 +9,11 @@ AC_DEFUN([ACVT_PLATFORM],
|
||||
|
||||
AC_ARG_WITH(platform,
|
||||
AC_HELP_STRING([--with-platform=PLATFORM],
|
||||
[configure for given platform (altix,bgl,bgp,crayt3e,crayx1,crayxt,crayxe,ibm,linux,macos,necsx,origin,sicortex,sun,generic), default: automatically by configure]),
|
||||
[configure for given platform (altix,bgl,bgp,bgq,crayt3e,crayx1,crayxt,crayxe,ibm,linux,macos,necsx,origin,sicortex,sun,generic), default: automatically by configure]),
|
||||
[
|
||||
AC_MSG_RESULT([skipped (--with-platform=$withval)])
|
||||
|
||||
pform_list="altix bgl bgp crayt3e crayx1 crayxt crayxe ibm linux macos necsx origin sicortex sun generic"
|
||||
pform_list="altix bgl bgp bgq crayt3e crayx1 crayxt crayxe ibm linux macos necsx origin sicortex sun generic"
|
||||
pform_found="no"
|
||||
for p in $pform_list
|
||||
do
|
||||
@ -32,15 +32,17 @@ AC_DEFUN([ACVT_PLATFORM],
|
||||
[PLATFORM=altix],
|
||||
[AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgl/BlueLight],
|
||||
[PLATFORM=bgl],
|
||||
[AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgsys],
|
||||
[PLATFORM=bgp],
|
||||
[AS_IF([test "$host_cpu" = "x86_64" -a "x`uname -r | grep -q cray_gem && echo TRUE`" = "xTRUE"],
|
||||
[PLATFORM=crayxe],
|
||||
[AS_IF([test "$host_cpu" = "x86_64" -a -d /opt/xt-boot],
|
||||
[PLATFORM=crayxt],
|
||||
[AS_IF([test "$host_cpu" = "mips64" -a -d /opt/sicortex],
|
||||
[PLATFORM=sicortex],
|
||||
[PLATFORM=linux])])])])])])
|
||||
[AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgsys/drivers/ppcfloor/hwi],
|
||||
[PLATFORM=bgq],
|
||||
[AS_IF([test "$host_cpu" = "powerpc64" -a "$host" != "$build" -a -d /bgsys],
|
||||
[PLATFORM=bgp],
|
||||
[AS_IF([test "$host_cpu" = "x86_64" -a "x`uname -r | grep -q cray_gem && echo TRUE`" = "xTRUE"],
|
||||
[PLATFORM=crayxe],
|
||||
[AS_IF([test "$host_cpu" = "x86_64" -a -d /opt/xt-boot],
|
||||
[PLATFORM=crayxt],
|
||||
[AS_IF([test "$host_cpu" = "mips64" -a -d /opt/sicortex],
|
||||
[PLATFORM=sicortex],
|
||||
[PLATFORM=linux])])])])])])])
|
||||
;;
|
||||
sunos* | solaris*)
|
||||
PLATFORM=sun
|
||||
@ -81,11 +83,14 @@ AC_DEFUN([ACVT_PLATFORM],
|
||||
BITMODE=$withval
|
||||
])
|
||||
|
||||
|
||||
AS_IF([test "$PLATFORM" = "bgp"],
|
||||
[
|
||||
CPPFLAGS="$CPPFLAGS -I/bgsys/drivers/ppcfloor/arch/include"
|
||||
])
|
||||
case $PLATFORM in
|
||||
bgp)
|
||||
CPPFLAGS="$CPPFLAGS -I/bgsys/drivers/ppcfloor/arch/include"
|
||||
;;
|
||||
bgq)
|
||||
CPPFLAGS="$CPPFLAGS -I/bgsys/drivers/ppcfloor"
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_SUBST(PLATFORM)
|
||||
AC_SUBST(BITMODE)
|
||||
|
@ -13,10 +13,14 @@ AC_DEFUN([ACVT_PLUGINCNTR],
|
||||
|
||||
AS_IF([test x"$check_plugin_cntr" = "xyes"],
|
||||
[
|
||||
AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xno"],
|
||||
AS_IF([test x"$enable_shared" = "xno"],
|
||||
[
|
||||
AC_MSG_NOTICE([error: the plugin counter support requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
|
||||
plugin_cntr_error="yes"
|
||||
case $PLATFORM in
|
||||
bgp | bgq)
|
||||
AC_MSG_NOTICE([error: the plugin counter support requires building of shared libraries on this platform; re-configure with \`--enable-shared'])
|
||||
plugin_cntr_error="yes"
|
||||
;;
|
||||
esac
|
||||
])
|
||||
|
||||
AS_IF([test x"$plugin_cntr_error" = "xno"],
|
||||
|
@ -9,6 +9,7 @@ AC_DEFUN([ACVT_TAUINST],
|
||||
tauinst_cparse_cmd=
|
||||
tauinst_cxxparse_cmd=
|
||||
tauinst_fparse_cmd=
|
||||
tauinst_comment_cmd=
|
||||
|
||||
AC_ARG_ENABLE(tauinst,
|
||||
AC_HELP_STRING([--enable-tauinst],
|
||||
@ -51,6 +52,15 @@ AC_DEFUN([ACVT_TAUINST],
|
||||
tauinst_fparse_cmd=$withval
|
||||
])
|
||||
|
||||
AC_ARG_WITH(pdt-comment,
|
||||
AC_HELP_STRING([--with-pdt-comment=PDTCOMMENT],
|
||||
[give the command for PDT comment parser, default: pdbcomment]),
|
||||
[
|
||||
AS_IF([test x"$withval" = "xyes" -o x"$withval" = "xno"],
|
||||
[AC_MSG_ERROR([value of '--with-pdt-comment' not properly set!])])
|
||||
tauinst_comment_cmd=$withval
|
||||
])
|
||||
|
||||
AS_IF([test "$check_tauinst" = "yes"],
|
||||
[
|
||||
AC_CHECK_PROG(tauinst_cmd, tau_instrumentor, tau_instrumentor)
|
||||
@ -77,6 +87,9 @@ AC_DEFUN([ACVT_TAUINST],
|
||||
[
|
||||
tauinst_fparse_cmd=
|
||||
])
|
||||
AC_CHECK_PROG(tauinst_comment_cmd, pdbcomment, pdbcomment)
|
||||
AS_IF([test x"$tauinst_comment_cmd" = x],
|
||||
[AC_MSG_WARN([no pdbcomment found; You might experience compile-time problems with comments if using TAU instrumentation])])
|
||||
|
||||
AS_IF([test x"$tauinst_cparse_cmd$tauinst_cxxparse_cmd$tauinst_fparse_cmd" = x],
|
||||
[
|
||||
|
@ -58,9 +58,9 @@ AC_DEFUN([ACVT_TIMER],
|
||||
AC_DEFINE([TIMER_RTS_GET_TIMEBASE], [1], [Use `rts_get_timebase' function])
|
||||
timer=TIMER_RTS_GET_TIMEBASE
|
||||
;;
|
||||
bgp)
|
||||
AC_DEFINE([TIMER_BGP_GET_TIMEBASE], [1], [Use `_bgp_GetTimeBase' function])
|
||||
timer=TIMER_BGP_GET_TIMEBASE
|
||||
bgp | bgq)
|
||||
AC_DEFINE([TIMER_GET_TIMEBASE], [1], [Use `GetTimeBase' function])
|
||||
timer=TIMER_GET_TIMEBASE
|
||||
;;
|
||||
ibm)
|
||||
AC_DEFINE([TIMER_POWER_REALTIME], [1], [IBM Power family Real-Time-Clock])
|
||||
@ -90,10 +90,6 @@ AC_DEFUN([ACVT_TIMER],
|
||||
AC_DEFINE([TIMER_GETTIMEOFDAY], [3], [Use `gettimeofday' function])
|
||||
timer=TIMER_CYCLE_COUNTER
|
||||
|
||||
case `$CC -V 2>&1` in
|
||||
*Cray*) timer=TIMER_GETTIMEOFDAY ;;
|
||||
esac
|
||||
|
||||
AS_IF([test $PLATFORM = "crayxt"],
|
||||
[
|
||||
AC_TRY_COMPILE([],
|
||||
@ -128,7 +124,7 @@ AC_DEFUN([ACVT_TIMER],
|
||||
AC_MSG_NOTICE([selected timer: $timer])
|
||||
|
||||
case $timer in
|
||||
TIMER_RTS_GET_TIMEBASE | TIMER_BGP_GET_TIMEBASE | TIMER_SYSSX_HGTIME | TIMER_GETTIMEOFDAY)
|
||||
TIMER_RTS_GET_TIMEBASE | TIMER_GET_TIMEBASE | TIMER_SYSSX_HGTIME | TIMER_GETTIMEOFDAY)
|
||||
timer_is_global=yes
|
||||
timer_is_global_def=1
|
||||
;;
|
||||
|
@ -30,7 +30,7 @@ AC_DEFUN([ACVT_RUN],
|
||||
|
||||
AS_IF([test x"$check_vtrun" = "xyes"],
|
||||
[
|
||||
AS_IF([test "$PLATFORM" = "bgl" -o "$PLATFORM" = "bgp"],
|
||||
AS_IF([test "$PLATFORM" = "bgl" -o "$PLATFORM" = "bgp" -o "$PLATFORM" = "bgq"],
|
||||
[
|
||||
AC_MSG_NOTICE([error: application execution wrapper not supported on this platform])
|
||||
vtrun_error="yes"
|
||||
|
@ -99,6 +99,8 @@ AC_PROG_CPP
|
||||
AC_PROG_CXXCPP
|
||||
AM_PROG_AS
|
||||
|
||||
AC_C_BIGENDIAN
|
||||
|
||||
# Do we want to support Fortran
|
||||
check_fortran="yes"
|
||||
force_fortran="no"
|
||||
@ -203,11 +205,17 @@ AC_PROG_LIBTOOL
|
||||
SHREXT=$shrext_cmds
|
||||
AC_SUBST(SHREXT)
|
||||
|
||||
# If building of shared libraries is desired on BlueGene/P, add '-Wl,-dy'
|
||||
# to linker flags on BlueGene/P platforms to lead libtool to believe that
|
||||
# dynamic linking is the default behaviour of the linker.
|
||||
AS_IF([test "$PLATFORM" = "bgp" -a x"$enable_shared" = "xyes"],
|
||||
[export LDFLAGS="$LDFLAGS $lt_prog_compiler_wl-dy"])
|
||||
# If building of shared libraries is desired on BlueGene/P/Q, add '-Wl,-dy'
|
||||
# to linker flags to lead libtool to believe that dynamic linking is the
|
||||
# default behaviour of the linker.
|
||||
AS_IF([test x"$enable_shared" = "xyes"],
|
||||
[
|
||||
case $PLATFORM in
|
||||
bgp | bgq)
|
||||
export LDFLAGS="$LDFLAGS $lt_prog_compiler_wl-dy"
|
||||
;;
|
||||
esac
|
||||
])
|
||||
|
||||
# Check for BSD compatible symbol lister command
|
||||
#AC_PROG_NM # already been checked by AC_PROG_LIBTOOL
|
||||
|
@ -2,26 +2,33 @@
|
||||
#
|
||||
# Syntax: <functions> -- <limit> [S:<[min-]max-stack-level>] [R]
|
||||
# or: <groups> -- <limit> [S:<[min-]max-stack-level>] [R] G
|
||||
# or: <function-call-path> -- <limit> C
|
||||
#
|
||||
# functions, groups Semicolon-separated list of functions/groups.
|
||||
# (can contain wildcards)
|
||||
#
|
||||
# function-call-path Semicolon-separated list of functions in a call path.
|
||||
# (MUST NOT contain wildcards)
|
||||
#
|
||||
# limit call limit
|
||||
# Stop recording of function/group when the specified call
|
||||
# Stop recording of functions/groups when the specified call
|
||||
# limit is reached.
|
||||
# (0 = don't record function/group, -1 = record unlimited)
|
||||
# (0 = don't record functions/groups, -1 = record unlimited)
|
||||
#
|
||||
# S:<[min-]max-stack-level>
|
||||
# minimum/maximum call stack level
|
||||
# Don't record function/group called beyond the specified
|
||||
# Don't record functions/groups called beyond the specified
|
||||
# stack level boundaries.
|
||||
# (values must be > 0, only valid if call limit is != 0)
|
||||
#
|
||||
# R Attribute for recursive filtering.
|
||||
# Don't record callees of filtered function/group.
|
||||
# Don't record callees of filtered functions/groups.
|
||||
#
|
||||
# G Attribute for filtering function groups.
|
||||
#
|
||||
# C Attribute for filtering a call path.
|
||||
# (impies recursive filtering 'R')
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# add;sub;mul;div -- 1000
|
||||
@ -34,6 +41,25 @@
|
||||
# when they are called between call stack level 5 and 10 but at most 3000000
|
||||
# times.
|
||||
#
|
||||
#
|
||||
# Call Path Specific Filtering:
|
||||
#
|
||||
# The 'C' attribute indicates that the listed functions specify a call path
|
||||
# - a specific sequence of function calls. Recording of the last function in the
|
||||
# list will be stopped if the specified call limit is reached.
|
||||
# The call path must begin with the root function, typically main, and MUST NOT
|
||||
# contain wildcards.
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# main;foo;bar -- 0 C
|
||||
#
|
||||
# This filter directive causes that the function "bar" called from "foo" which
|
||||
# prior was called from "main" will never be recorded. Since call path filtering
|
||||
# impies recursiveness (see attribute 'R') all callee functions of this call
|
||||
# path will be excluded from recording as well.
|
||||
#
|
||||
#
|
||||
# Rank Specific Filtering:
|
||||
#
|
||||
# Use the '@' clauses to restrict all the following filter directives to the
|
||||
|
@ -67,7 +67,7 @@ BODY { font-family: sans-serif; }
|
||||
<P>
|
||||
|
||||
<P>
|
||||
<B><BIG CLASS="XHUGE">VampirTrace 5.13 User Manual</BIG></B>
|
||||
<B><BIG CLASS="XHUGE">VampirTrace 5.14 User Manual</BIG></B>
|
||||
<BR>
|
||||
<BR>
|
||||
<BR>
|
||||
@ -98,226 +98,226 @@ Contents</A>
|
||||
<!--Table of Contents-->
|
||||
|
||||
<UL CLASS="TofC">
|
||||
<LI><A NAME="tex2html127"
|
||||
HREF="#SECTION00200000000000000000">Introduction</A>
|
||||
<LI><A NAME="tex2html128"
|
||||
HREF="#SECTION00200000000000000000">Introduction</A>
|
||||
<LI><A NAME="tex2html129"
|
||||
HREF="#SECTION00300000000000000000">Instrumentation</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html129"
|
||||
HREF="#SECTION00310000000000000000">Compiler Wrappers</A>
|
||||
<LI><A NAME="tex2html130"
|
||||
HREF="#SECTION00320000000000000000">Instrumentation Types</A>
|
||||
HREF="#SECTION00310000000000000000">Compiler Wrappers</A>
|
||||
<LI><A NAME="tex2html131"
|
||||
HREF="#SECTION00320000000000000000">Instrumentation Types</A>
|
||||
<LI><A NAME="tex2html132"
|
||||
HREF="#SECTION00330000000000000000">Automatic Instrumentation</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html132"
|
||||
HREF="#SECTION00331000000000000000">Supported Compilers</A>
|
||||
<LI><A NAME="tex2html133"
|
||||
HREF="#SECTION00332000000000000000">Notes for Using the GNU, Intel, PathScale, or Open64 Compiler</A>
|
||||
HREF="#SECTION00331000000000000000">Supported Compilers</A>
|
||||
<LI><A NAME="tex2html134"
|
||||
HREF="#SECTION00333000000000000000">Notes on Instrumentation of Inline Functions</A>
|
||||
HREF="#SECTION00332000000000000000">Notes for Using the GNU, Intel, PathScale, or Open64 Compiler</A>
|
||||
<LI><A NAME="tex2html135"
|
||||
HREF="#SECTION00333000000000000000">Notes on Instrumentation of Inline Functions</A>
|
||||
<LI><A NAME="tex2html136"
|
||||
HREF="#SECTION00334000000000000000">Instrumentation of Loops with OpenUH Compiler</A>
|
||||
</UL>
|
||||
<LI><A NAME="tex2html136"
|
||||
<LI><A NAME="tex2html137"
|
||||
HREF="#SECTION00340000000000000000">Manual Instrumentation</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html137"
|
||||
HREF="#SECTION00341000000000000000">Using the VampirTrace API</A>
|
||||
<LI><A NAME="tex2html138"
|
||||
HREF="#SECTION00341000000000000000">Using the VampirTrace API</A>
|
||||
<LI><A NAME="tex2html139"
|
||||
HREF="#SECTION00342000000000000000">Measurement Controls</A>
|
||||
</UL>
|
||||
<LI><A NAME="tex2html139"
|
||||
HREF="#SECTION00350000000000000000">Source Instrumentation Using PDT/TAU</A>
|
||||
<LI><A NAME="tex2html140"
|
||||
HREF="#SECTION00350000000000000000">Source Instrumentation Using PDT/TAU</A>
|
||||
<LI><A NAME="tex2html141"
|
||||
HREF="#SECTION00360000000000000000">Binary Instrumentation Using Dyninst</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html141"
|
||||
<LI><A NAME="tex2html142"
|
||||
HREF="#SECTION00361000000000000000">Static Binary Instrumentation</A>
|
||||
</UL>
|
||||
<LI><A NAME="tex2html142"
|
||||
HREF="#SECTION00370000000000000000">Runtime Instrumentation Using VTRun</A>
|
||||
<LI><A NAME="tex2html143"
|
||||
HREF="#SECTION00380000000000000000">Tracing Java Applications Using JVMTI</A>
|
||||
HREF="#SECTION00370000000000000000">Runtime Instrumentation Using VTRun</A>
|
||||
<LI><A NAME="tex2html144"
|
||||
HREF="#SECTION00380000000000000000">Tracing Java Applications Using JVMTI</A>
|
||||
<LI><A NAME="tex2html145"
|
||||
HREF="#SECTION00390000000000000000">Tracing Calls to 3rd-Party Libraries</A>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html145"
|
||||
<LI><A NAME="tex2html146"
|
||||
HREF="#SECTION00400000000000000000">Runtime Measurement</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html146"
|
||||
HREF="#SECTION00410000000000000000">Trace File Name and Location</A>
|
||||
<LI><A NAME="tex2html147"
|
||||
HREF="#SECTION00420000000000000000">Environment Variables</A>
|
||||
HREF="#SECTION00410000000000000000">Trace File Name and Location</A>
|
||||
<LI><A NAME="tex2html148"
|
||||
HREF="#SECTION00430000000000000000">Influencing Trace Buffer Size</A>
|
||||
HREF="#SECTION00420000000000000000">Environment Variables</A>
|
||||
<LI><A NAME="tex2html149"
|
||||
HREF="#SECTION00440000000000000000">Profiling an Application</A>
|
||||
HREF="#SECTION00430000000000000000">Influencing Trace Buffer Size</A>
|
||||
<LI><A NAME="tex2html150"
|
||||
HREF="#SECTION00450000000000000000">Unification of Local Traces</A>
|
||||
HREF="#SECTION00440000000000000000">Profiling an Application</A>
|
||||
<LI><A NAME="tex2html151"
|
||||
HREF="#SECTION00460000000000000000">Synchronized Buffer Flush</A>
|
||||
HREF="#SECTION00450000000000000000">Unification of Local Traces</A>
|
||||
<LI><A NAME="tex2html152"
|
||||
HREF="#SECTION00470000000000000000">Enhanced Timer Synchronization</A>
|
||||
HREF="#SECTION00460000000000000000">Synchronized Buffer Flush</A>
|
||||
<LI><A NAME="tex2html153"
|
||||
HREF="#SECTION00470000000000000000">Enhanced Timer Synchronization</A>
|
||||
<LI><A NAME="tex2html154"
|
||||
HREF="#SECTION00480000000000000000">Environment Configuration Using VTSetup</A>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html154"
|
||||
<LI><A NAME="tex2html155"
|
||||
HREF="#SECTION00500000000000000000">Recording Additional Events and Counters</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html155"
|
||||
HREF="#SECTION00510000000000000000">Hardware Performance Counters</A>
|
||||
<LI><A NAME="tex2html156"
|
||||
HREF="#SECTION00520000000000000000">Resource Usage Counters</A>
|
||||
HREF="#SECTION00510000000000000000">Hardware Performance Counters</A>
|
||||
<LI><A NAME="tex2html157"
|
||||
HREF="#SECTION00530000000000000000">Memory Allocation Counter</A>
|
||||
HREF="#SECTION00520000000000000000">Resource Usage Counters</A>
|
||||
<LI><A NAME="tex2html158"
|
||||
HREF="#SECTION00540000000000000000">CPU ID Counter</A>
|
||||
HREF="#SECTION00530000000000000000">Memory Allocation Counter</A>
|
||||
<LI><A NAME="tex2html159"
|
||||
HREF="#SECTION00550000000000000000">NVIDIA CUDA</A>
|
||||
HREF="#SECTION00540000000000000000">CPU ID Counter</A>
|
||||
<LI><A NAME="tex2html160"
|
||||
HREF="#SECTION00560000000000000000">Pthread API Calls</A>
|
||||
HREF="#SECTION00550000000000000000">NVIDIA CUDA</A>
|
||||
<LI><A NAME="tex2html161"
|
||||
HREF="#SECTION00570000000000000000">Plugin Counter Metrics</A>
|
||||
HREF="#SECTION00560000000000000000">Pthread API Calls</A>
|
||||
<LI><A NAME="tex2html162"
|
||||
HREF="#SECTION00580000000000000000">I/O Calls</A>
|
||||
HREF="#SECTION00570000000000000000">Plugin Counter Metrics</A>
|
||||
<LI><A NAME="tex2html163"
|
||||
HREF="#SECTION00590000000000000000">fork/system/exec Calls</A>
|
||||
HREF="#SECTION00580000000000000000">I/O Calls</A>
|
||||
<LI><A NAME="tex2html164"
|
||||
HREF="#SECTION005100000000000000000">MPI Correctness Checking Using UniMCI</A>
|
||||
HREF="#SECTION00590000000000000000">fork/system/exec Calls</A>
|
||||
<LI><A NAME="tex2html165"
|
||||
HREF="#SECTION005110000000000000000">User-defined Counters</A>
|
||||
HREF="#SECTION005100000000000000000">MPI Correctness Checking Using UniMCI</A>
|
||||
<LI><A NAME="tex2html166"
|
||||
HREF="#SECTION005120000000000000000">User-defined Markers</A>
|
||||
HREF="#SECTION005110000000000000000">User-defined Counters</A>
|
||||
<LI><A NAME="tex2html167"
|
||||
HREF="#SECTION005120000000000000000">User-defined Markers</A>
|
||||
<LI><A NAME="tex2html168"
|
||||
HREF="#SECTION005130000000000000000">User-defined Communcation</A>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html168"
|
||||
<LI><A NAME="tex2html169"
|
||||
HREF="#SECTION00600000000000000000">Filtering & Grouping</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html169"
|
||||
HREF="#SECTION00610000000000000000">Function Filtering</A>
|
||||
<LI><A NAME="tex2html170"
|
||||
HREF="#SECTION00620000000000000000">Java Specific Filtering</A>
|
||||
HREF="#SECTION00610000000000000000">Function Filtering</A>
|
||||
<LI><A NAME="tex2html171"
|
||||
HREF="#SECTION00620000000000000000">Java Specific Filtering</A>
|
||||
<LI><A NAME="tex2html172"
|
||||
HREF="#SECTION00630000000000000000">Function Grouping</A>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html172"
|
||||
<LI><A NAME="tex2html173"
|
||||
HREF="#SECTION00700000000000000000">VampirTrace Installation</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html173"
|
||||
HREF="#SECTION00710000000000000000">Basics</A>
|
||||
<LI><A NAME="tex2html174"
|
||||
HREF="#SECTION00720000000000000000">Configure Options</A>
|
||||
HREF="#SECTION00710000000000000000">Basics</A>
|
||||
<LI><A NAME="tex2html175"
|
||||
HREF="#SECTION00730000000000000000">Cross Compilation</A>
|
||||
HREF="#SECTION00720000000000000000">Configure Options</A>
|
||||
<LI><A NAME="tex2html176"
|
||||
HREF="#SECTION00740000000000000000">Environment Set-Up</A>
|
||||
HREF="#SECTION00730000000000000000">Cross Compilation</A>
|
||||
<LI><A NAME="tex2html177"
|
||||
HREF="#SECTION00740000000000000000">Environment Set-Up</A>
|
||||
<LI><A NAME="tex2html178"
|
||||
HREF="#SECTION00750000000000000000">Notes for Developers</A>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html178"
|
||||
<LI><A NAME="tex2html179"
|
||||
HREF="#SECTION00800000000000000000">Command Reference</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html179"
|
||||
HREF="#SECTION00810000000000000000">Compiler Wrappers (vtcc,vtcxx,vtfort)</A>
|
||||
<LI><A NAME="tex2html180"
|
||||
HREF="#SECTION00820000000000000000">Local Trace Unifier (vtunify)</A>
|
||||
HREF="#SECTION00810000000000000000">Compiler Wrappers (vtcc,vtcxx,vtfort)</A>
|
||||
<LI><A NAME="tex2html181"
|
||||
HREF="#SECTION00830000000000000000">Binary Instrumentor (vtdyn)</A>
|
||||
HREF="#SECTION00820000000000000000">Local Trace Unifier (vtunify)</A>
|
||||
<LI><A NAME="tex2html182"
|
||||
HREF="#SECTION00840000000000000000">Trace Filter Tool (vtfilter)</A>
|
||||
HREF="#SECTION00830000000000000000">Binary Instrumentor (vtdyn)</A>
|
||||
<LI><A NAME="tex2html183"
|
||||
HREF="#SECTION00850000000000000000">Library Wrapper Generator (vtlibwrapgen)</A>
|
||||
HREF="#SECTION00840000000000000000">Trace Filter Tool (vtfilter)</A>
|
||||
<LI><A NAME="tex2html184"
|
||||
HREF="#SECTION00860000000000000000">Application Execution Wrapper (vtrun)</A>
|
||||
HREF="#SECTION00850000000000000000">Library Wrapper Generator (vtlibwrapgen)</A>
|
||||
<LI><A NAME="tex2html185"
|
||||
HREF="#SECTION00870000000000000000">IOFSL server startup script (vtiofsl-start)</A>
|
||||
HREF="#SECTION00860000000000000000">Application Execution Wrapper (vtrun)</A>
|
||||
<LI><A NAME="tex2html186"
|
||||
HREF="#SECTION00870000000000000000">IOFSL server startup script (vtiofsl-start)</A>
|
||||
<LI><A NAME="tex2html187"
|
||||
HREF="#SECTION00880000000000000000">IOFSL server shutdown script (vtiofsl-stop)</A>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html187"
|
||||
<LI><A NAME="tex2html188"
|
||||
HREF="#SECTION00900000000000000000">Counter Specifications</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html188"
|
||||
HREF="#SECTION00910000000000000000">PAPI</A>
|
||||
<LI><A NAME="tex2html189"
|
||||
HREF="#SECTION00920000000000000000">CPC</A>
|
||||
HREF="#SECTION00910000000000000000">PAPI</A>
|
||||
<LI><A NAME="tex2html190"
|
||||
HREF="#SECTION00930000000000000000">NEC SX Hardware Performance Counter</A>
|
||||
HREF="#SECTION00920000000000000000">CPC</A>
|
||||
<LI><A NAME="tex2html191"
|
||||
HREF="#SECTION00930000000000000000">NEC SX Hardware Performance Counter</A>
|
||||
<LI><A NAME="tex2html192"
|
||||
HREF="#SECTION00940000000000000000">Resource Usage</A>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html192"
|
||||
<LI><A NAME="tex2html193"
|
||||
HREF="#SECTION001000000000000000000">Using VampirTrace with IOFSL</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html193"
|
||||
HREF="#SECTION001010000000000000000">Introduction</A>
|
||||
<LI><A NAME="tex2html194"
|
||||
HREF="#SECTION001010000000000000000">Introduction</A>
|
||||
<LI><A NAME="tex2html195"
|
||||
HREF="#SECTION001020000000000000000">Overview</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html195"
|
||||
HREF="#SECTION001021000000000000000">File handling in OTF</A>
|
||||
<LI><A NAME="tex2html196"
|
||||
HREF="#SECTION001022000000000000000">I/O Forwarding Scalability Layer</A>
|
||||
HREF="#SECTION001021000000000000000">File handling in OTF</A>
|
||||
<LI><A NAME="tex2html197"
|
||||
HREF="#SECTION001022000000000000000">I/O Forwarding Scalability Layer</A>
|
||||
<LI><A NAME="tex2html198"
|
||||
HREF="#SECTION001023000000000000000">Architecture</A>
|
||||
</UL>
|
||||
<LI><A NAME="tex2html198"
|
||||
<LI><A NAME="tex2html199"
|
||||
HREF="#SECTION001030000000000000000">Installation</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html199"
|
||||
HREF="#SECTION001031000000000000000">Support Libraries</A>
|
||||
<LI><A NAME="tex2html200"
|
||||
HREF="#SECTION001032000000000000000">Building IOFSL</A>
|
||||
HREF="#SECTION001031000000000000000">Support Libraries</A>
|
||||
<LI><A NAME="tex2html201"
|
||||
HREF="#SECTION001032000000000000000">Building IOFSL</A>
|
||||
<LI><A NAME="tex2html202"
|
||||
HREF="#SECTION001033000000000000000">Building VampirTrace & OTF</A>
|
||||
</UL>
|
||||
<LI><A NAME="tex2html202"
|
||||
<LI><A NAME="tex2html203"
|
||||
HREF="#SECTION001040000000000000000">Usage Examples</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html203"
|
||||
HREF="#SECTION001041000000000000000">Using VampirTrace with IOFSL on Cray XK6 / with PBS</A>
|
||||
<LI><A NAME="tex2html204"
|
||||
HREF="#SECTION001041000000000000000">Using VampirTrace with IOFSL on Cray XK6 / with PBS</A>
|
||||
<LI><A NAME="tex2html205"
|
||||
HREF="#SECTION001042000000000000000">Manual Usage</A>
|
||||
</UL>
|
||||
</UL>
|
||||
<BR>
|
||||
<LI><A NAME="tex2html205"
|
||||
<LI><A NAME="tex2html206"
|
||||
HREF="#SECTION001100000000000000000">FAQ</A>
|
||||
<UL>
|
||||
<LI><A NAME="tex2html206"
|
||||
HREF="#SECTION001110000000000000000">Can I use different compilers for VampirTrace and my application?</A>
|
||||
<LI><A NAME="tex2html207"
|
||||
HREF="#SECTION001120000000000000000">Why does my application need such a long time for starting?</A>
|
||||
HREF="#SECTION001110000000000000000">Can I use different compilers for VampirTrace and my application?</A>
|
||||
<LI><A NAME="tex2html208"
|
||||
HREF="#SECTION001130000000000000000">How can I limit compiler instrumentation?</A>
|
||||
HREF="#SECTION001120000000000000000">Why does my application need such a long time for starting?</A>
|
||||
<LI><A NAME="tex2html209"
|
||||
HREF="#SECTION001130000000000000000">How can I limit compiler instrumentation?</A>
|
||||
<LI><A NAME="tex2html210"
|
||||
HREF="#SECTION001140000000000000000">Why do I see multiple
|
||||
I/O operations for a single (un)formatted file read/write from my Fortran
|
||||
application?</A>
|
||||
<LI><A NAME="tex2html210"
|
||||
HREF="#SECTION001150000000000000000">The application has run to completion, but there is no *.otf file. What can I do?</A>
|
||||
<LI><A NAME="tex2html211"
|
||||
HREF="#SECTION001160000000000000000">What limitations are associated with "on/off" and buffer rewind?</A>
|
||||
HREF="#SECTION001150000000000000000">The application has run to completion, but there is no *.otf file. What can I do?</A>
|
||||
<LI><A NAME="tex2html212"
|
||||
HREF="#SECTION001170000000000000000">VampirTrace warns that it ``cannot lock file a.lock'', what's wrong?</A>
|
||||
HREF="#SECTION001160000000000000000">What limitations are associated with "on/off" and buffer rewind?</A>
|
||||
<LI><A NAME="tex2html213"
|
||||
HREF="#SECTION001180000000000000000">Can I relocate my VampirTrace installation without rebuilding from source?</A>
|
||||
HREF="#SECTION001170000000000000000">VampirTrace warns that it ``cannot lock file a.lock'', what's wrong?</A>
|
||||
<LI><A NAME="tex2html214"
|
||||
HREF="#SECTION001190000000000000000">What are the byte counts in collective communication records?</A>
|
||||
HREF="#SECTION001180000000000000000">Can I relocate my VampirTrace installation without rebuilding from source?</A>
|
||||
<LI><A NAME="tex2html215"
|
||||
HREF="#SECTION0011100000000000000000">I get ``error: unknown asm constraint letter''</A>
|
||||
HREF="#SECTION001190000000000000000">What are the byte counts in collective communication records?</A>
|
||||
<LI><A NAME="tex2html216"
|
||||
HREF="#SECTION0011110000000000000000">I have a question that is not answered in this document!</A>
|
||||
HREF="#SECTION0011100000000000000000">I get ``error: unknown asm constraint letter''</A>
|
||||
<LI><A NAME="tex2html217"
|
||||
HREF="#SECTION0011110000000000000000">I have a question that is not answered in this document!</A>
|
||||
<LI><A NAME="tex2html218"
|
||||
HREF="#SECTION0011120000000000000000">I need support for additional features so I can trace application xyz.</A>
|
||||
</UL></UL>
|
||||
<!--End of Table of Contents-->
|
||||
@ -355,20 +355,20 @@ OpenMP events, and performance counters.
|
||||
<P>
|
||||
After a successful tracing run, VampirTrace writes all collected data to a
|
||||
trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
|
||||
HREF="#foot1549"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1569"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
As a result, the information is available for post-mortem analysis and
|
||||
visualization by various tools.
|
||||
Most notably, VampirTrace provides the input data for the Vampir analysis
|
||||
and visualization tool<A NAME="tex2html2"
|
||||
HREF="#foot1550"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1570"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
VampirTrace is included in OpenMPI 1.3 and later versions.
|
||||
If not disabled explicitly, VampirTrace is built automatically when installing
|
||||
OpenMPI<A NAME="tex2html3"
|
||||
HREF="#foot1551"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1571"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
@ -1185,7 +1185,7 @@ in a single file, that
|
||||
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
||||
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
||||
profiling have a look at the TAU documentation<A NAME="tex2html4"
|
||||
HREF="#foot1575"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1595"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
||||
<PRE>
|
||||
@ -1202,7 +1202,7 @@ Binary Instrumentation Using Dyninst
|
||||
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
||||
instrument the application during runtime (binary instrumentation), by using
|
||||
Dyninst<A NAME="tex2html5"
|
||||
HREF="#foot1576"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1596"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
Recompiling is not necessary for this kind of instrumentation,
|
||||
but relinking:
|
||||
@ -1353,7 +1353,7 @@ Tracing Calls to 3rd-Party Libraries
|
||||
VampirTrace is also capable to trace calls to third party libraries, which come with
|
||||
at least one C header file even without the library's source code. If VampirTrace was
|
||||
built with support for library tracing (the CTool library<A NAME="tex2html6"
|
||||
HREF="#foot1577"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1597"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is required), the tool <TT>vtlibwrapgen</TT> can be used to
|
||||
generate a wrapper library to intercept each call to the actual library functions.
|
||||
This wrapper library can be linked to the application or used in combination with the
|
||||
@ -1670,6 +1670,10 @@ of a VampirTrace instrumented executable:
|
||||
<TD ALIGN="LEFT">Enable tracing of MPI events?</TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_MPI_IGNORE_FILTER"></A><TT>VT_MPI_IGNORE_FILTER</TT></TD>
|
||||
<TD ALIGN="LEFT">Enable tracing of MPI communication events although its corresponding functions are filtered?</TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_OMPTRACE"></A><TT>VT_OMPTRACE</TT></TD>
|
||||
<TD ALIGN="LEFT">Enable tracing of OpenMP events instrumented by OPARI?</TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
@ -2025,7 +2029,7 @@ for the enhanced timer synchronization:
|
||||
|
||||
<UL>
|
||||
<LI>CLAPACK <A NAME="tex2html7"
|
||||
HREF="#foot1587"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1607"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
||||
</LI>
|
||||
<LI>AMD ACML
|
||||
@ -3244,6 +3248,9 @@ By default, all calls of instrumented functions will be traced, so that the
|
||||
or
|
||||
<BR> <TT><groups> - <limit> [S:<[min-]max-stack-level>] [R] G</TT>
|
||||
<BR>
|
||||
or
|
||||
<BR> <TT><function-call-path> - <limit> C</TT>
|
||||
<BR>
|
||||
<P>
|
||||
<BR>
|
||||
<TABLE CELLPADDING=3>
|
||||
@ -3259,17 +3266,29 @@ or
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT"> </TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><TT>function-call-path</TT></TD>
|
||||
<TD ALIGN="LEFT">Semicolon-separated list of</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">functions in a call path.</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">(MUST NOT contain wildcards)</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT"> </TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><TT>limit</TT></TD>
|
||||
<TD ALIGN="LEFT">call limit</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">Stop recording of function/group when</TD>
|
||||
<TD ALIGN="LEFT">Stop recording of functions/groups when</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">the specified call limit is reached.</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">(0 = don't record function/group,</TD>
|
||||
<TD ALIGN="LEFT">(0 = don't record functions/groups,</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">-1 record unlimited)</TD>
|
||||
@ -3277,6 +3296,12 @@ or
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT"> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
<BR>
|
||||
|
||||
<P>
|
||||
<BR>
|
||||
<TABLE CELLPADDING=3>
|
||||
<TR><TD ALIGN="LEFT"><TT>S:<[min-]max-stack-level></TT></TD>
|
||||
<TD ALIGN="LEFT"> </TD>
|
||||
</TR>
|
||||
@ -3284,7 +3309,7 @@ or
|
||||
<TD ALIGN="LEFT">minimum/maximum call stack level</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">Don't record function/group called</TD>
|
||||
<TD ALIGN="LEFT">Don't record functions/groups called</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">beyond the specified stack level</TD>
|
||||
@ -3310,9 +3335,24 @@ or
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">function/group.</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT"> </TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><TT>G</TT></TD>
|
||||
<TD ALIGN="LEFT">Attribute for filtering function groups.</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT"> </TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><TT>C</TT></TD>
|
||||
<TD ALIGN="LEFT">Attribute for filtering function a call path.</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT">(implies recursive filtering <TT>R</TT>)</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"> </TD>
|
||||
<TD ALIGN="LEFT"> </TD>
|
||||
</TR>
|
||||
</TABLE>
|
||||
<BR>
|
||||
|
||||
@ -3338,6 +3378,33 @@ Besides creating filter files manually, you can also use the <TT>vtfilter</TT>
|
||||
tool to generate them automatically. This tool reads a provided trace
|
||||
and decides whether a function should be filtered or not, based on the evaluation of
|
||||
<H2><A NAME="SECTION00611000000000000000">
|
||||
Call Path Specific Filtering</A>
|
||||
</H2>
|
||||
|
||||
<P>
|
||||
The 'C' attribute indicates that the listed functions specify a call path
|
||||
- a specific sequence of function calls. Recording of the last function in the
|
||||
list will be stopped if the specified call limit is reached.
|
||||
The call path must begin with the root function, typically main, and MUST NOT
|
||||
contain wildcards.
|
||||
<BR>
|
||||
<P>
|
||||
Example:
|
||||
|
||||
<P>
|
||||
<PRE>
|
||||
main;foo;bar -- 0 C
|
||||
</PRE>
|
||||
|
||||
<P>
|
||||
This filter directive causes that the function <TT>bar</TT> called from <TT>foo</TT> which
|
||||
prior was called from <TT>main</TT> will never be recorded. Since call path filtering
|
||||
impies recursiveness (see attribute <TT>R</TT>) all callee functions of this call
|
||||
path will be excluded from recording as well.
|
||||
|
||||
<P>
|
||||
|
||||
<H2><A NAME="SECTION00612000000000000000">
|
||||
Rank Specific Filtering</A>
|
||||
</H2>
|
||||
|
||||
@ -3364,7 +3431,7 @@ The example defines two limits for the ranks 4 - 10, 20 - 29, and 34. The first
|
||||
|
||||
<P>
|
||||
|
||||
<H4><A NAME="SECTION00611010000000000000">
|
||||
<H4><A NAME="SECTION00612010000000000000">
|
||||
Attention:</A>
|
||||
</H4>
|
||||
The rank specific rules are activated later than usual at MPI_Init, because
|
||||
@ -3611,7 +3678,7 @@ default: automatically by configure.
|
||||
enable support for Dyninst instrumentation,
|
||||
default: enable if found by configure.
|
||||
<SPAN CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
|
||||
HREF="#foot1609"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1629"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
|
||||
|
||||
<P>
|
||||
@ -3633,9 +3700,9 @@ enable support for automatic source code
|
||||
instrumentation by using TAU, default: enable if
|
||||
found by configure.
|
||||
<SPAN CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
|
||||
HREF="#foot1610"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1630"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
|
||||
HREF="#foot1611"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1631"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
|
||||
|
||||
<P>
|
||||
@ -3937,6 +4004,13 @@ give the command for PDT C++ source code parser, default: <TT>cxxparse</TT>
|
||||
<BR>
|
||||
give the command for PDT Fortran source code parser, default: <TT>f95parse, f90parse, or gfparse</TT>
|
||||
|
||||
<P>
|
||||
</DD>
|
||||
<DT><STRONG><TT>-with-pdt-comment=PDTCOMMENT</TT></STRONG></DT>
|
||||
<DD>
|
||||
<BR>
|
||||
give the command for PDT comment parser, default: <TT>pdbcomment</TT>
|
||||
|
||||
<P>
|
||||
</DD>
|
||||
<DT><STRONG><TT>-with-papi-dir=PAPIDIR</TT></STRONG></DT>
|
||||
@ -4204,7 +4278,7 @@ give the path for JVMTI-include files, default:
|
||||
|
||||
<P>
|
||||
To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
|
||||
HREF="#foot1612"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1632"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
|
||||
|
||||
<P>
|
||||
@ -4338,7 +4412,7 @@ are shown below:
|
||||
Examples:
|
||||
|
||||
<P>
|
||||
BlueGene/P:
|
||||
BlueGene/P and BlueGene/Q:
|
||||
|
||||
<P>
|
||||
<PRE>
|
||||
@ -4446,6 +4520,25 @@ options:
|
||||
tauinst automatic source code instrumentation by
|
||||
using PDT/TAU
|
||||
|
||||
-vt:inst-exclude-file-list <file>[,file,...]
|
||||
Set list of source files to be excluded
|
||||
from the automatic instrumentation by the
|
||||
compiler or PDT/TAU.
|
||||
(file names can contain wildcards)
|
||||
|
||||
-vt:inst-exclude-file <file>
|
||||
Set pathname of file containing a list of
|
||||
source files to be excluded from the
|
||||
automatic instrumentation by the compiler
|
||||
or PDT/TAU.
|
||||
(file names can contain wildcards, one file
|
||||
name per line)
|
||||
|
||||
Note when using an exclusion list for automatic compiler
|
||||
instrumentation:
|
||||
If a source file from the exclusion list is involved in a
|
||||
compile step, the instrumentation is disabled for this step.
|
||||
|
||||
-vt:opari <!args> Set options for OPARI command. (see
|
||||
share/vampirtrace/doc/opari/Readme.html)
|
||||
|
||||
@ -4457,6 +4550,19 @@ options:
|
||||
Set pathname of the OPARI runtime table file.
|
||||
(default: opari.tab.c)
|
||||
|
||||
-vt:opari-exclude-file-list <file>[,file,...]
|
||||
Set list of source files to be excluded from
|
||||
the instrumentation of OpenMP constructs by
|
||||
OPARI.
|
||||
(file names can contain wildcards)
|
||||
|
||||
-vt:opari-exclude-file <file>
|
||||
Set pathname of file containing a list of
|
||||
source files to be excluded from the
|
||||
instrumentation of OpenMP constructs by OPARI.
|
||||
(file names can contain wildcards, one file name
|
||||
per line)
|
||||
|
||||
-vt:noopari Disable instrumentation of OpenMP contructs
|
||||
by OPARI.
|
||||
|
||||
@ -5334,7 +5440,7 @@ Since IOFSL servers can handle multiple clients, an N:M mapping of clients to se
|
||||
|
||||
<P>
|
||||
When using the IOFSL integration, all write requests in OTF are issued using the zoidfs API<A NAME="tex2html12"
|
||||
HREF="#foot3199"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3235"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>. Those writes are handled by the IOFSL forwarding servers and aggregated into a single file using the atomic append feature. The offset in the multifile is returned to OTF and stored in a second file, the so called index file, in order to maintain the mapping between written blocks and streams. For any block of a stream written into the multifile, the index file contains the ID of the stream, the start of the block, and its length. This allows for an efficient reading of blocks since only the index file has to be scanned for entries for a given stream ID. Additionally, a large number of logical files (streams) can be stored using only two physical files.
|
||||
|
||||
<P>
|
||||
@ -5346,7 +5452,7 @@ Installation</A>
|
||||
<P>
|
||||
In order to use this setup, IOFSL and VampirTrace have to be compiled in order.
|
||||
In the following sections, the directory <TT><install_dir></TT> should be replaced with a - possibly user-local - directory used for installation, e.g. <TT>$HOME/local</TT><A NAME="tex2html13"
|
||||
HREF="#foot3203"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3239"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
The installation procedure for IOFSL is described at https://trac.mcs.anl.gov/projects/iofsl/wiki/Building.
|
||||
Currently the <TT>iofsl_vampir</TT> git branch is required.
|
||||
@ -5525,7 +5631,7 @@ They will be launched on dedicated compute nodes that are part of the batch Job
|
||||
PBS Options</A>
|
||||
</H4>
|
||||
It is important to reserve a sufficient number of processor cores. The number of cores requested must be large enough to contain the number of application cores plus the number of cores required for the IOFSL server instances. Each IOFSL server will run on a dedicated node<A NAME="tex2html14"
|
||||
HREF="#foot3247"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3283"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.Thus N_allocated ≥((N_IOFSL * 16) + N_Application) must hold.
|
||||
<P>
|
||||
Example using 64 server instances:
|
||||
@ -5622,10 +5728,10 @@ Configuring the Server</A>
|
||||
The server is configured using a configuration file.
|
||||
At server start-up, this file is provided using the <TT>-config</TT> argument.
|
||||
The cray XK6 configuration file is provided in the package<A NAME="tex2html15"
|
||||
HREF="#foot3387"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3423"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
For more information about the options available please refer to the IOFSL documentation<A NAME="tex2html16"
|
||||
HREF="#foot3388"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3424"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
The most important option is the <TT>serverlist</TT> entry in the <TT>bmi</TT> section which takes a list of server addresses, e.g. :
|
||||
<PRE>
|
||||
@ -6031,99 +6137,99 @@ If you provide us with your additions afterwards we will consider merging them
|
||||
into the official VampirTrace package.
|
||||
<BR><HR><H4>Footnotes</H4>
|
||||
<DL>
|
||||
<DT><A NAME="foot1549">... (OTF)</A><A
|
||||
<DT><A NAME="foot1569">... (OTF)</A><A
|
||||
HREF="#tex2html1"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.tu-dresden.de/zih/otf
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1550">... tool </A><A
|
||||
<DT><A NAME="foot1570">... tool </A><A
|
||||
HREF="#tex2html2"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.vampir.eu
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1551">...
|
||||
<DT><A NAME="foot1571">...
|
||||
Open MPI </A><A
|
||||
HREF="#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.open-mpi.org/faq/?category=vampirtrace
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1575">... documentation </A><A
|
||||
<DT><A NAME="foot1595">... documentation </A><A
|
||||
HREF="#tex2html4"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1576">...
|
||||
<DT><A NAME="foot1596">...
|
||||
Dyninst </A><A
|
||||
HREF="#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.dyninst.org
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1577">... library </A><A
|
||||
<DT><A NAME="foot1597">... library </A><A
|
||||
HREF="#tex2html6"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://sourceforge.net/projects/ctool
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1587">... CLAPACK</A><A
|
||||
<DT><A NAME="foot1607">... CLAPACK</A><A
|
||||
HREF="#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>www.netlib.org/clapack
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1609">... Dyninst </A><A
|
||||
<DT><A NAME="foot1629">... Dyninst </A><A
|
||||
HREF="#tex2html8"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.dyninst.org
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1610">... PDToolkit </A><A
|
||||
<DT><A NAME="foot1630">... PDToolkit </A><A
|
||||
HREF="#tex2html9"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.cs.uoregon.edu/research/pdt/home.php
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1611">... TAU </A><A
|
||||
<DT><A NAME="foot1631">... TAU </A><A
|
||||
HREF="#tex2html10"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://tau.uoregon.edu
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1612">... CTool </A><A
|
||||
<DT><A NAME="foot1632">... CTool </A><A
|
||||
HREF="#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://sourceforge.net/projects/ctool
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3199">... API</A><A
|
||||
<DT><A NAME="foot3235">... API</A><A
|
||||
HREF="#tex2html12"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>The OTF master control file is written using POSIX I/O in any case.
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3203">...$HOME/local</A><A
|
||||
<DT><A NAME="foot3239">...$HOME/local</A><A
|
||||
HREF="#tex2html13"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>The software packages can be installed in different directories.
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3247">... node</A><A
|
||||
<DT><A NAME="foot3283">... node</A><A
|
||||
HREF="#tex2html14"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>The server makes use of all the nodes resources by multithreading and allocating large I/O buffers
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3387">... package</A><A
|
||||
<DT><A NAME="foot3423">... package</A><A
|
||||
HREF="#tex2html15"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD><TT>tools/vtiofsl/platform/crayxk6-iofwd.cf</TT>
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3388">... documentation</A><A
|
||||
<DT><A NAME="foot3424">... documentation</A><A
|
||||
HREF="#tex2html16"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>https://trac.mcs.anl.gov/projects/iofsl/wiki/ConfigurationFile
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -6,6 +6,7 @@ Bert Wesarg <bert.wesarg AT tu-dresden.de>
|
||||
Robert Dietrich <robert.dietrich AT zih.tu-dresden.de>
|
||||
Jens Doleschal <jens.doleschal AT tu-dresden.de>
|
||||
Thomas Ilsche <thomas.ilsche AT tu-dresden.de>
|
||||
Mathias Korepkat <mathias.korepkat AT tu-dresden.de>
|
||||
Andre Groetzsch <andre.groetzsch AT tu-dresden.de>
|
||||
Michael Heyde <michael.heyde AT tu-dresden.de>
|
||||
Michael Kluge <michael.kluge AT tu-dresden.de>
|
||||
|
@ -1,12 +1,27 @@
|
||||
1.11.3openmpi
|
||||
- otfaux: fixed build errors on Solaris and NetBSD
|
||||
1.12.1openmpi
|
||||
- implemented workaround to avoid setting otf_errno when a false
|
||||
error happens during OTF_RBuffer_Jump to a bogus zlib sync point
|
||||
|
||||
1.11.2openmpi
|
||||
1.12salmon
|
||||
- OTF library:
|
||||
- fixed potential segmentation fault when appending a new
|
||||
key-value pair to a list after removing another one
|
||||
- OTFAUX library:
|
||||
- 'OTFAUX_ThumbnailReader_read()' now correctly return success
|
||||
after reading the thumbnail
|
||||
- otfprofile:
|
||||
- create VampirTrace filter file from irregularity analysis
|
||||
- otfaux:
|
||||
- fixed build errors on Solaris and NetBSD
|
||||
- fixed "time not increasing" error when generating inline
|
||||
snapshots
|
||||
|
||||
1.11.2goldfish
|
||||
- lib OTFAUX:
|
||||
- speed-up messages matching, if no snapshots should
|
||||
be generated
|
||||
|
||||
1.11.1openmpi
|
||||
1.11.1goldfish
|
||||
- new 'OTF_MasterControl_clone()' function to simplify making a copy
|
||||
of a master control object
|
||||
- otfaux:
|
||||
|
@ -6,8 +6,8 @@
|
||||
# <major>.<minor>.<sub>. If sub is zero, then it is omitted.
|
||||
|
||||
major=1
|
||||
minor=11
|
||||
sub=3
|
||||
minor=12
|
||||
sub=1
|
||||
|
||||
# string is used for alpha, beta, or release tags. If it is non-empty, it will
|
||||
# be appended to the version number.
|
||||
@ -24,7 +24,8 @@ sub=3
|
||||
# 1.8.* sturgeon
|
||||
# 1.9.* sawfish
|
||||
# 1.10.* coelacanth
|
||||
# 1.11.* goldfish
|
||||
# 1.11.* goldfish
|
||||
# 1.12.* salmon
|
||||
#
|
||||
|
||||
string=openmpi
|
||||
@ -49,5 +50,5 @@ string=openmpi
|
||||
# release, age must be incremented. Otherwise, reset age
|
||||
# to '0'.
|
||||
|
||||
library=6:2:5
|
||||
library=6:3:5
|
||||
|
||||
|
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf
Двоичный файл не отображается.
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf
Двоичный файл не отображается.
@ -390,12 +390,12 @@ OTFAUX_Process_writeThumbnail( OTFAUX_Process* process,
|
||||
|
||||
int
|
||||
OTFAUX_Process_enqueueRecv( OTFAUX_Process* process,
|
||||
uint64_t eventTime,
|
||||
uint32_t receiverProcessId,
|
||||
uint32_t comm,
|
||||
uint32_t tag,
|
||||
uint32_t length,
|
||||
uint32_t scl )
|
||||
uint64_t eventTime,
|
||||
uint32_t receiverProcessId,
|
||||
uint32_t comm,
|
||||
uint32_t tag,
|
||||
uint32_t length,
|
||||
uint32_t scl )
|
||||
{
|
||||
OTFAUX_ReciveQueue* queue;
|
||||
OTFAUX_Message* recv;
|
||||
@ -403,8 +403,6 @@ OTFAUX_Process_enqueueRecv( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
queue = get_queue( process, receiverProcessId, comm, tag, 1 );
|
||||
if ( !queue )
|
||||
return 0;
|
||||
@ -429,18 +427,16 @@ OTFAUX_Process_enqueueRecv( OTFAUX_Process* process,
|
||||
|
||||
int
|
||||
OTFAUX_Process_enterFunction( OTFAUX_Process* process,
|
||||
uint64_t eventTime,
|
||||
uint32_t function,
|
||||
uint32_t scl,
|
||||
void* eventData )
|
||||
uint64_t eventTime,
|
||||
uint32_t function,
|
||||
uint32_t scl,
|
||||
void* eventData )
|
||||
{
|
||||
OTFAUX_FunctionCall* call;
|
||||
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
if ( !stack_empty( &process->sharedState->functionCalls ) )
|
||||
{
|
||||
/* take it out of the object pool */
|
||||
@ -477,8 +473,6 @@ OTFAUX_Process_leaveFunction( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
if ( stack_empty( &process->functionStack ) )
|
||||
return 0;
|
||||
|
||||
@ -493,16 +487,16 @@ OTFAUX_Process_leaveFunction( OTFAUX_Process* process,
|
||||
|
||||
int
|
||||
OTFAUX_Process_sendMessage( OTFAUX_Process* process,
|
||||
uint64_t eventTime,
|
||||
uint32_t receiverProcessId,
|
||||
uint32_t comm,
|
||||
uint32_t tag,
|
||||
uint32_t length,
|
||||
uint32_t scl,
|
||||
uint64_t* recvTime,
|
||||
uint32_t* recvLength,
|
||||
uint32_t* recvScl,
|
||||
void* eventData )
|
||||
uint64_t eventTime,
|
||||
uint32_t receiverProcessId,
|
||||
uint32_t comm,
|
||||
uint32_t tag,
|
||||
uint32_t length,
|
||||
uint32_t scl,
|
||||
uint64_t* recvTime,
|
||||
uint32_t* recvLength,
|
||||
uint32_t* recvScl,
|
||||
void* eventData )
|
||||
{
|
||||
OTFAUX_ReciveQueue* queue;
|
||||
OTFAUX_Message* msg;
|
||||
@ -510,8 +504,6 @@ OTFAUX_Process_sendMessage( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
/* MsgMatching */
|
||||
queue = get_queue( process, receiverProcessId, comm, tag, 0 );
|
||||
if ( !queue )
|
||||
@ -529,7 +521,16 @@ OTFAUX_Process_sendMessage( OTFAUX_Process* process,
|
||||
*recvScl = msg->recvScl;
|
||||
msg->eventData = eventData;
|
||||
|
||||
stack_add( &process->pendingSends, &msg->e );
|
||||
/* only maintain the pending messages, if we want to write snapshots */
|
||||
if ( process->sharedState->writeSendSnapshot )
|
||||
{
|
||||
stack_add( &process->pendingSends, &msg->e );
|
||||
}
|
||||
else
|
||||
{
|
||||
release_event_data( process, msg->eventData );
|
||||
free( msg );
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -592,8 +593,6 @@ OTFAUX_Process_beginCollOp( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
entry = stack_next( &process->pendingCollOps );
|
||||
while ( entry != &process->pendingCollOps )
|
||||
{
|
||||
@ -647,8 +646,6 @@ OTFAUX_Process_endCollOp( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
entry = stack_next( &process->pendingCollOps );
|
||||
while ( entry != &process->pendingCollOps )
|
||||
{
|
||||
@ -671,11 +668,11 @@ OTFAUX_Process_endCollOp( OTFAUX_Process* process,
|
||||
|
||||
int
|
||||
OTFAUX_Process_openFile( OTFAUX_Process* process,
|
||||
uint64_t eventTime,
|
||||
uint32_t fileId,
|
||||
uint64_t handleId,
|
||||
uint32_t scl,
|
||||
void* eventData )
|
||||
uint64_t eventTime,
|
||||
uint32_t fileId,
|
||||
uint64_t handleId,
|
||||
uint32_t scl,
|
||||
void* eventData )
|
||||
{
|
||||
OTFAUX_File* file;
|
||||
Stack *entry;
|
||||
@ -683,8 +680,6 @@ OTFAUX_Process_openFile( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
entry = stack_next( &process->openFiles );
|
||||
while ( entry != &process->openFiles )
|
||||
{
|
||||
@ -735,8 +730,6 @@ OTFAUX_Process_closeFile( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
entry = stack_next( &process->openFiles );
|
||||
while ( entry != &process->openFiles )
|
||||
{
|
||||
@ -770,8 +763,6 @@ OTFAUX_Process_beginFileOp( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
entry = stack_next( &process->pendingFileOps );
|
||||
while ( entry != &process->pendingFileOps )
|
||||
{
|
||||
@ -821,8 +812,6 @@ OTFAUX_Process_endFileOp( OTFAUX_Process* process,
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
cleanup_pending_sends( process, eventTime );
|
||||
|
||||
entry = stack_next( &process->pendingFileOps );
|
||||
while ( entry != &process->pendingFileOps )
|
||||
{
|
||||
@ -931,14 +920,14 @@ OTFAUX_Process_writeSends( OTFAUX_Process* process,
|
||||
int ret = 1;
|
||||
Stack* entry;
|
||||
|
||||
cleanup_pending_sends( process, snapshotTime );
|
||||
|
||||
if ( !process )
|
||||
return 0;
|
||||
|
||||
if ( !process->sharedState->writeSendSnapshot )
|
||||
return 1;
|
||||
|
||||
cleanup_pending_sends( process, snapshotTime );
|
||||
|
||||
entry = stack_next( &process->pendingSends );
|
||||
while ( ret && entry != &process->pendingSends )
|
||||
{
|
||||
|
@ -673,14 +673,14 @@ OTFAUX_State_writeSnapshot( OTFAUX_State* auxState,
|
||||
snapshotTime,
|
||||
userData );
|
||||
ret = ret && OTFAUX_Process_writeSends( process,
|
||||
snapshotTime,
|
||||
userData );
|
||||
snapshotTime,
|
||||
userData );
|
||||
ret = ret && OTFAUX_Process_writeOpenFiles( process,
|
||||
snapshotTime,
|
||||
userData );
|
||||
snapshotTime,
|
||||
userData );
|
||||
ret = ret && OTFAUX_Process_writeCollOps( process,
|
||||
snapshotTime,
|
||||
userData );
|
||||
snapshotTime,
|
||||
userData );
|
||||
ret = ret && OTFAUX_Process_writeFileOps( process,
|
||||
snapshotTime,
|
||||
userData );
|
||||
|
@ -235,13 +235,14 @@ OTFAUX_ThumbnailReader_read( OTFAUX_ThumbnailReader* tn_reader,
|
||||
status = 1;
|
||||
for (i = 0; i < tn_reader->nprocs; i++)
|
||||
{
|
||||
char comma;
|
||||
status = fscanf( tn_reader->file, "%llx:", &process );
|
||||
if (1 != status)
|
||||
goto out;
|
||||
for (j = 0; j < tn_reader->width; ++j)
|
||||
{
|
||||
status = fscanf( tn_reader->file, "%x,", &functions[j] );
|
||||
if (1 != status)
|
||||
status = fscanf( tn_reader->file, "%x%c", &functions[j], &comma );
|
||||
if ( 2 != status || comma != ',' )
|
||||
goto out;
|
||||
}
|
||||
if (handler)
|
||||
@ -249,12 +250,17 @@ OTFAUX_ThumbnailReader_read( OTFAUX_ThumbnailReader* tn_reader,
|
||||
handler( data, process, functions );
|
||||
}
|
||||
|
||||
if ( fgetc( tn_reader->file ) != '\n' && !feof( tn_reader->file ) )
|
||||
if ( fgetc( tn_reader->file ) != '\n' )
|
||||
{
|
||||
break;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if ( fgetc( tn_reader->file ) != EOF )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
out:
|
||||
free( functions );
|
||||
|
||||
|
@ -230,7 +230,7 @@ uint8_t OTF_KeyValueList_appendPair(OTF_KeyValueList* list, OTF_KeyValuePair pai
|
||||
|
||||
p->kvPair = pair;
|
||||
list->kvCurrent = p->kvNext;
|
||||
|
||||
|
||||
list->count++;
|
||||
|
||||
return 0;
|
||||
@ -819,7 +819,13 @@ uint8_t OTF_KeyValueList_removeKey(OTF_KeyValueList *list, uint32_t key) {
|
||||
if ( p->kvNext ) {
|
||||
p->kvNext->kvPrev = p->kvPrev;
|
||||
}
|
||||
free(p);
|
||||
|
||||
/* move the deleted element after the end of the list */
|
||||
p->kvPrev = list->kvEnd;
|
||||
p->kvNext = NULL;
|
||||
list->kvEnd->kvNext=p;
|
||||
list->kvEnd= p;
|
||||
|
||||
list->count--;
|
||||
return 0;
|
||||
}
|
||||
|
@ -231,9 +231,9 @@ struct OTF_KeyValueList_struct {
|
||||
uint32_t key_count; /* number of different keys in list --> user-relevant */
|
||||
uint32_t count; /* total number of entries in list (treat byte arrays particular) --> internal use only */
|
||||
uint32_t size; /* number of allocated entries --> internal */
|
||||
OTF_KeyValuePairList *kvBegin;
|
||||
OTF_KeyValuePairList *kvEnd;
|
||||
OTF_KeyValuePairList *kvCurrent;
|
||||
OTF_KeyValuePairList *kvBegin; /* first element of the list */
|
||||
OTF_KeyValuePairList *kvEnd; /* last allocated element of the list, may be used or not */
|
||||
OTF_KeyValuePairList *kvCurrent; /* first unused element in the list, insert new ones here */
|
||||
};
|
||||
|
||||
/** @endcond */
|
||||
|
@ -660,6 +660,9 @@ int OTF_RBuffer_jump( OTF_RBuffer* rbuffer, uint64_t filepos ) {
|
||||
|
||||
|
||||
int ret;
|
||||
#ifdef HAVE_ZLIB
|
||||
int otf_errno_backup;
|
||||
#endif
|
||||
size_t read;
|
||||
/* uint64_t currentPos; */
|
||||
uint32_t i;
|
||||
@ -676,7 +679,43 @@ int OTF_RBuffer_jump( OTF_RBuffer* rbuffer, uint64_t filepos ) {
|
||||
}
|
||||
|
||||
rbuffer->pos= 0;
|
||||
#ifdef HAVE_ZLIB
|
||||
/*
|
||||
* ooooooooooooo .oooooo. oooooooooo. .oooooo.
|
||||
* 8' 888 `8 d8P' `Y8b `888' `Y8b d8P' `Y8b
|
||||
* 888 888 888 888 888 888 888
|
||||
* 888 888 888 888 888 888 888
|
||||
* 888 888 888 888 888 888 888
|
||||
* 888 `88b d88' 888 d88' `88b d88'
|
||||
* o888o `Y8bood8P' o888bood8P' `Y8bood8P'
|
||||
*
|
||||
* BIG TODO / FIXME --- this is a temporary workaround, waiting to be
|
||||
* replaced by a better workaround.
|
||||
* When seeking in a zlib compressed file it is possible to find a sync
|
||||
* point marker that is not actually a sync point. Then the inflate will
|
||||
* fail with an error. Usually this happens in
|
||||
* OTF_RBuffer_getFileProperties, where it will just retry, so this is not
|
||||
* too bad. I have no idea what happens if this happens in
|
||||
* OTF_RBuffer_searchTime (a.k.a. partial loading)
|
||||
* Well, in any case - if the error code is set, vtunify will notice that
|
||||
* sooner or later and die thinking that something went wrong. We don't
|
||||
* want that to happen while there was no real error, so we reset the error
|
||||
* code.
|
||||
*
|
||||
* Also this is not threadsafe )-;
|
||||
*
|
||||
* [tilsche/juenz, 12.11.2012]
|
||||
*/
|
||||
otf_errno_backup= otf_errno;
|
||||
#endif
|
||||
read= OTF_File_read( rbuffer->file, rbuffer->buffer, rbuffer->jumpsize );
|
||||
#ifdef HAVE_ZLIB
|
||||
if ( otf_errno != otf_errno_backup ) {
|
||||
|
||||
otf_errno= otf_errno_backup;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
rbuffer->end= (uint32_t) read;
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
|
||||
#define OTF_VERSION_MAJOR 1
|
||||
#define OTF_VERSION_MINOR 11
|
||||
#define OTF_VERSION_MINOR 12
|
||||
#define OTF_VERSION_SUB 1
|
||||
#define OTF_VERSION_STRING "openmpi"
|
||||
|
||||
|
@ -81,6 +81,11 @@ int OTF_WBuffer_close( OTF_WBuffer* wbuffer ) {
|
||||
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Write a timestamp at the very end of a trace to avoid traces with a huge tail
|
||||
* of timestamp-less events (e.g. fake-KV-counters) that require
|
||||
* very inefficient (n^2) backwards search for searching the last timestamp.
|
||||
*/
|
||||
if( (uint32_t) -1 != wbuffer->process ) {
|
||||
|
||||
OTF_WBuffer_writeUint64( wbuffer, wbuffer->time );
|
||||
|
@ -561,3 +561,146 @@ int handleEndFileOperation( void *firsthandlerarg, uint64_t time, uint32_t proce
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleNoOp( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeNoOpKV( control->writer, time,
|
||||
process, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleEventComment( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
const char* comment, OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeEventCommentKV( control->writer, time,
|
||||
process, comment, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleBeginProcess( void *firsthandlerarg, uint64_t time,
|
||||
uint32_t process, OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeBeginProcessKV( control->writer, time,
|
||||
process, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleEndProcess( void *firsthandlerarg, uint64_t time,
|
||||
uint32_t process, OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeEndProcessKV( control->writer, time,
|
||||
process, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleRMAPut( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
|
||||
uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeRMAPutKV( control->writer, time,
|
||||
process, origin, target, communicator, tag, bytes, scltoken, list )
|
||||
) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleRMAPutRemoteEnd( void *firsthandlerarg, uint64_t time,
|
||||
uint32_t process, uint32_t origin, uint32_t target, uint32_t communicator,
|
||||
uint32_t tag, uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeRMAPutRemoteEndKV( control->writer,
|
||||
time, process, origin, target, communicator, tag, bytes, scltoken, list )
|
||||
) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleRMAGet( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
|
||||
uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeRMAGetKV( control->writer, time,
|
||||
process, origin, target, communicator, tag, bytes, scltoken, list )
|
||||
) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
|
||||
int handleRMAEnd( void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t remote,
|
||||
uint32_t communicator, uint32_t tag, uint32_t scltoken, OTF_KeyValueList* list ) {
|
||||
|
||||
|
||||
Control* control= (Control*) firsthandlerarg;
|
||||
|
||||
while ( control->checkTime( time ) )
|
||||
;
|
||||
|
||||
if ( control->copyEvents )
|
||||
return ( 0 == OTF_Writer_writeRMAEndKV( control->writer, time,
|
||||
process, remote, communicator, tag, scltoken, list )
|
||||
) ? OTF_RETURN_ABORT : OTF_RETURN_OK;
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
@ -91,4 +91,39 @@ int handleEndFileOperation( void *firsthandlerarg, uint64_t time, uint32_t proce
|
||||
uint32_t fileid, uint64_t matchingId, uint64_t handleId, uint32_t operation,
|
||||
uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *kvlist );
|
||||
|
||||
int handleNoOp( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
OTF_KeyValueList* list );
|
||||
|
||||
|
||||
int handleEventComment( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
const char* comment, OTF_KeyValueList* list );
|
||||
|
||||
|
||||
int handleBeginProcess( void *firsthandlerarg, uint64_t time,
|
||||
uint32_t process, OTF_KeyValueList* list );
|
||||
|
||||
|
||||
int handleEndProcess( void *firsthandlerarg, uint64_t time,
|
||||
uint32_t process, OTF_KeyValueList* list );
|
||||
|
||||
|
||||
int handleRMAPut( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
|
||||
uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list );
|
||||
|
||||
|
||||
int handleRMAPutRemoteEnd( void *firsthandlerarg, uint64_t time,
|
||||
uint32_t process, uint32_t origin, uint32_t target, uint32_t communicator,
|
||||
uint32_t tag, uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list );
|
||||
|
||||
|
||||
int handleRMAGet( void *firsthandlerarg, uint64_t time, uint32_t process,
|
||||
uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag,
|
||||
uint64_t bytes, uint32_t scltoken, OTF_KeyValueList* list );
|
||||
|
||||
|
||||
int handleRMAEnd( void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t remote,
|
||||
uint32_t communicator, uint32_t tag, uint32_t scltoken, OTF_KeyValueList* list );
|
||||
|
||||
|
||||
#endif /* OTFTOVTF3_HANDLER_H */
|
||||
|
@ -582,6 +582,11 @@ int main ( int argc, const char** argv ) {
|
||||
def_wstream= OTF_Writer_getStream( writer, 0 );
|
||||
}
|
||||
|
||||
/* increase buffer size for writing definitions (and markers), if necessary */
|
||||
if ( 10240 > buffersize ) {
|
||||
OTF_WStream_setBufferSizes( def_wstream, 10240 );
|
||||
}
|
||||
|
||||
OTF_HandlerArray_getCopyHandler_stream( handlers, def_wstream );
|
||||
|
||||
Control* control= new Control( writer, def_wstream, verbose,
|
||||
@ -811,6 +816,64 @@ int main ( int argc, const char** argv ) {
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_ENDFILEOP_RECORD );
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleNoOp,
|
||||
OTF_NOOP_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_NOOP_RECORD );
|
||||
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleEventComment,
|
||||
OTF_EVENTCOMMENT_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_EVENTCOMMENT_RECORD );
|
||||
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleBeginProcess,
|
||||
OTF_BEGINPROCESS_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_BEGINPROCESS_RECORD );
|
||||
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleEndProcess,
|
||||
OTF_ENDPROCESS_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_ENDPROCESS_RECORD );
|
||||
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleRMAPut,
|
||||
OTF_RMAPUT_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_RMAPUT_RECORD );
|
||||
|
||||
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleRMAPutRemoteEnd,
|
||||
OTF_RMAPUTRE_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_RMAPUTRE_RECORD );
|
||||
|
||||
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleRMAGet,
|
||||
OTF_RMAGET_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_RMAGET_RECORD );
|
||||
|
||||
|
||||
|
||||
OTF_HandlerArray_setHandler( handlers,
|
||||
(OTF_FunctionPointer*) handleRMAEnd,
|
||||
OTF_RMAEND_RECORD );
|
||||
OTF_HandlerArray_setFirstHandlerArg( handlers, (void*) control,
|
||||
OTF_RMAEND_RECORD );
|
||||
|
||||
|
||||
if ( doThumbnail ) {
|
||||
uint32_t i;
|
||||
|
@ -227,11 +227,11 @@ int handleDefProcessGroup( void* userData, uint32_t stream,
|
||||
fprintf( c->outfile, "(#%llu) \tDefProcessGroup: stream %u, group %u, name \"%s\", procs ",
|
||||
(long long unsigned) c->num, stream, group, name );
|
||||
|
||||
for( i= 0; i < (numberOfProcs - 1); ++i ) {
|
||||
fprintf( c->outfile, "%u, ", procs[i] );
|
||||
const char* sep= "";
|
||||
for( i= 0; i < numberOfProcs; ++i ) {
|
||||
fprintf( c->outfile, "%s%u", sep, procs[i] );
|
||||
sep= ", ";
|
||||
}
|
||||
|
||||
fprintf( c->outfile, "%u", procs[i] );
|
||||
|
||||
printKeyValueList(c, kvlist);
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ COMMONSOURCES = \
|
||||
$(OTFPROFILESRCDIR)/create_csv.h \
|
||||
$(OTFPROFILESRCDIR)/create_marker.h \
|
||||
$(OTFPROFILESRCDIR)/create_latex.h \
|
||||
$(OTFPROFILESRCDIR)/create_filter.h \
|
||||
$(OTFPROFILESRCDIR)/datastructs.h \
|
||||
$(OTFPROFILESRCDIR)/otfprofile.h \
|
||||
$(OTFPROFILESRCDIR)/process_dispersion.h \
|
||||
@ -26,6 +27,7 @@ COMMONSOURCES = \
|
||||
$(OTFPROFILESRCDIR)/create_csv.cpp \
|
||||
$(OTFPROFILESRCDIR)/create_marker.cpp \
|
||||
$(OTFPROFILESRCDIR)/create_latex.cpp \
|
||||
$(OTFPROFILESRCDIR)/create_filter.cpp \
|
||||
$(OTFPROFILESRCDIR)/otfprofile.cpp \
|
||||
$(OTFPROFILESRCDIR)/process_dispersion.cpp \
|
||||
$(OTFPROFILESRCDIR)/summarize_data.cpp \
|
||||
|
@ -123,7 +123,7 @@ bool ProcessClustering( AllData& alldata ) {
|
||||
char cmd[1024];
|
||||
|
||||
snprintf( cmd, sizeof( cmd ) - 1,
|
||||
"otfshrink -i %s -o %s -f %s",
|
||||
"otfshrink -i %s -o %s -f %s -k",
|
||||
alldata.params.input_file_prefix.c_str(),
|
||||
alldata.params.clustering.shrink_output_prefix.c_str(),
|
||||
alldata.params.clustering.map_file_name.c_str() );
|
||||
|
@ -18,7 +18,8 @@
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
/*store current callpath for each process */
|
||||
map<uint32_t,string> callpathMap;
|
||||
|
||||
static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
|
||||
|
||||
@ -442,6 +443,17 @@ static int handle_enter( void* fha, uint64_t time, uint32_t function,
|
||||
list<StackType>& stack= alldata->stackPerProcess[ process ];
|
||||
stack.push_back( StackType( function, time ) );
|
||||
|
||||
if (alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* store current callpath */
|
||||
std::ostringstream os;
|
||||
os << " "<<function;
|
||||
callpathMap[process] += os.str();
|
||||
/* save maximum length, for buffer allocation in reduce_data.cpp*/
|
||||
if(alldata->maxCallpathLength < callpathMap[process].length())
|
||||
alldata->maxCallpathLength = callpathMap[process].length();
|
||||
}
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
@ -500,12 +512,17 @@ static int handle_leave( void* fha, uint64_t time, uint32_t function,
|
||||
|
||||
stack.pop_back();
|
||||
|
||||
/*
|
||||
cerr << " func " << func << " @ process " << process << ": " <<
|
||||
"excl " << excl << " ticks, incl " << incl << " ticks" << endl;
|
||||
*/
|
||||
alldata->functionMapPerRank[ Pair( process, func ) ].add( 1, excl, incl );
|
||||
|
||||
if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* store function by process, callpath and functionId*/
|
||||
alldata->functionCallpathMapPerRank[ TripleCallpath( process, callpathMap[process],func ) ].add( 1, excl, incl );
|
||||
alldata->functionCallpathMapPerRank[ TripleCallpath( process, callpathMap[process],func ) ].callpath = callpathMap[process];
|
||||
/* reduce callpath step at leave */
|
||||
callpathMap[process] = callpathMap[process].substr (0,callpathMap[process].find_last_of(" "));
|
||||
}
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
@ -1481,7 +1498,7 @@ static bool read_statistics( AllData& alldata, OTF_Reader* reader ) {
|
||||
|
||||
|
||||
bool CollectData( AllData& alldata ) {
|
||||
|
||||
alldata.maxCallpathLength = 0;
|
||||
bool error= false;
|
||||
|
||||
/* start runtime measurement for collecting data */
|
||||
|
@ -23,9 +23,10 @@ using namespace std;
|
||||
/* fence between statistics parts within the buffer for consistency checking */
|
||||
enum { FENCE= 0xDEADBEEF };
|
||||
|
||||
/*store current callpath for each process */
|
||||
map<uint32_t,string> callpath;
|
||||
|
||||
static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
|
||||
|
||||
Progress& progress= alldata.progress;
|
||||
|
||||
progress.cur_bytes= 0;
|
||||
@ -280,6 +281,15 @@ static int handle_enter( void* fha, uint64_t time, uint32_t function,
|
||||
list<StackType>& stack= alldata->stackPerProcess[ process ];
|
||||
stack.push_back( StackType( function, time ) );
|
||||
|
||||
if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* create callpath */
|
||||
/* add callpath step on enter event */
|
||||
std::ostringstream os;
|
||||
os << " " << function;
|
||||
callpath[process] += os.str();
|
||||
}
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
@ -297,43 +307,68 @@ static int handle_leave( void* fha, uint64_t time, uint32_t function,
|
||||
list<StackType>::reverse_iterator parent_it= ++stack.rbegin();
|
||||
|
||||
uint64_t func= top.fid;
|
||||
uint64_t incl= time - top.timestamp;
|
||||
uint64_t excl= incl - top.childDuration;
|
||||
uint64_t incl_time= time - top.timestamp;
|
||||
uint64_t excl_time= incl_time - top.childDuration;
|
||||
|
||||
map< uint64_t, FunctionData>::const_iterator it= alldata->functionMapGlobal.find( func );
|
||||
assert ( alldata->functionMapGlobal.end() != it );
|
||||
FunctionData functionData= it->second;
|
||||
|
||||
double time_excl_min = functionData.excl_time.min;
|
||||
double time_excl_max = functionData.excl_time.max;
|
||||
double time_excl= excl;
|
||||
|
||||
double time_min = functionData.DISPERSION_OPTION.min;
|
||||
double time_max = functionData.DISPERSION_OPTION.max;
|
||||
double time_max_c = 0;
|
||||
double time_min_c = 0;
|
||||
double time_a= DISPERSION_OPTION;
|
||||
|
||||
if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* get currentfunction from functionCallpathMapGlobal */
|
||||
map< PairCallpath, FunctionData,ltPairCallpath>::const_iterator itc= alldata->functionCallpathMapGlobal.find( PairCallpath(func,callpath[process]) );
|
||||
|
||||
assert ( alldata->functionCallpathMapGlobal.end() != itc );
|
||||
FunctionData functionCallpathData= itc->second;
|
||||
|
||||
time_min_c = functionCallpathData.DISPERSION_OPTION.min;
|
||||
time_max_c = functionCallpathData.DISPERSION_OPTION.max;
|
||||
}
|
||||
|
||||
if ( parent_it != stack.rend() ) {
|
||||
|
||||
parent_it->childDuration += incl;
|
||||
parent_it->childDuration += incl_time;
|
||||
|
||||
}
|
||||
|
||||
stack.pop_back();
|
||||
|
||||
if ( time_excl_max > time_excl_min) {
|
||||
if ( time_max > time_min) {
|
||||
|
||||
uint64_t bin = (uint64_t) ( ( log(time_excl) - log(time_excl_min) ) /
|
||||
( log(time_excl_max) - log(time_excl_min) )
|
||||
uint64_t bin = (uint64_t) ( ( log(time_a) - log(time_min) ) /
|
||||
( log(time_max) - log(time_min) )
|
||||
* 100 );
|
||||
/* cerr << " func " << func << " @process " << process << " : " << " bin " << bin << " , excl " << excl << " ticks "<< endl;
|
||||
*/
|
||||
alldata->functionDurationSectionMapPerRank[ Triple(process, func, bin ) ].add( 1, excl, incl );
|
||||
|
||||
uint64_t bin_c = 0;
|
||||
if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
bin_c = (uint64_t) ( ( log(time_a) - log(time_min_c) ) /
|
||||
( log(time_max_c) - log(time_min_c) )
|
||||
* 100 );
|
||||
alldata->functionDurationSectionMapPerRank[ Triple(process, func, bin )]
|
||||
.add( 1, excl_time, incl_time );
|
||||
if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
alldata->functionDurationSectionCallpathMapPerRank[ Quadruple(process, func,callpath[process], bin_c ) ].add( 1, excl_time,callpath[process], incl_time );
|
||||
}
|
||||
|
||||
if ( time_excl_max == time_excl || time_excl_min == time_excl ) {
|
||||
alldata->functionMinMaxLocationMap [ func ].add( excl, process, (time-incl) );
|
||||
/*
|
||||
cerr << " func " << func << " @process " << process << " : " << " time " << (time-incl) << " excl " << excl << endl;
|
||||
*/
|
||||
if ( time_max == time_a || time_min == time_a ) {
|
||||
alldata->functionMinMaxLocationMap [ func ].add( excl_time, process, (time-incl_time) );
|
||||
}
|
||||
|
||||
|
||||
if(alldata->params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
if ( time_max == time_a || time_min == time_a ) {
|
||||
alldata->functionMinMaxLocationCallpathMap [ callpath[process] ].add( excl_time, process, (time-incl_time) );
|
||||
}
|
||||
/* go one step back on callpath, because of this leave */
|
||||
callpath[process] = callpath[process].substr (0,callpath[process].find_last_of(" "));
|
||||
}
|
||||
|
||||
return OTF_RETURN_OK;
|
||||
}
|
||||
|
||||
@ -357,9 +392,16 @@ static void share_profiledata( AllData& alldata ) {
|
||||
|
||||
int s1, s2;
|
||||
|
||||
size= alldata.functionMapGlobal.size(); /* map< uint64_t, FunctionData > functionMapGlobal; */
|
||||
num_fences++;
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
size= alldata.functionCallpathMapGlobal.size(); /* map< PairCallpath, FunctionData, ltPairCallpath > functionCallpathMapGlobal; */
|
||||
num_fences++;
|
||||
}
|
||||
else
|
||||
{
|
||||
size= alldata.functionMapGlobal.size(); /* map< uint64_t, FunctionData > functionMapGlobal; */
|
||||
}
|
||||
/* get bytesize multiplying all pieces */
|
||||
|
||||
MPI_Pack_size( num_fences, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
|
||||
@ -368,9 +410,24 @@ static void share_profiledata( AllData& alldata ) {
|
||||
MPI_Pack_size( 1 + size * 7, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
|
||||
MPI_Pack_size( size * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 );
|
||||
buffer_size += s1 + s2;
|
||||
|
||||
|
||||
/* get bytesize multiplying all pieces */
|
||||
MPI_Pack_size( 1 + size * 8, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
|
||||
MPI_Pack_size( size * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 );
|
||||
buffer_size += s1 + s2;
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator it= alldata.functionCallpathMapGlobal.begin();
|
||||
map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator itend= alldata.functionCallpathMapGlobal.end();
|
||||
for ( ; it != itend; ++it ) {
|
||||
MPI_Pack_size( it->second.callpath.length(), MPI_CHAR, MPI_COMM_WORLD, &s1 );
|
||||
buffer_size += s1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* broadcast buffer size */
|
||||
MPI_Bcast( &buffer_size, 1, MPI_INT, 0, MPI_COMM_WORLD );
|
||||
|
||||
@ -378,6 +435,10 @@ static void share_profiledata( AllData& alldata ) {
|
||||
buffer= new char[ buffer_size ];
|
||||
assert( buffer );
|
||||
|
||||
uint64_t callpath_length=0;
|
||||
MPI_Allreduce(&(alldata.maxCallpathLength),&callpath_length,1,MPI_UNSIGNED_LONG_LONG,MPI_MAX,MPI_COMM_WORLD);
|
||||
char* callpath = new char[callpath_length];
|
||||
|
||||
if ( 0 == alldata.myRank ) {
|
||||
/* pack parts */
|
||||
|
||||
@ -410,17 +471,50 @@ static void share_profiledata( AllData& alldata ) {
|
||||
MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
}
|
||||
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* pack size of functionCallpathMapGlobal */
|
||||
func_map_global_size= alldata.functionCallpathMapGlobal.size();
|
||||
MPI_Pack( &func_map_global_size, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
|
||||
/* pack functionCallpathMapGlobal */
|
||||
{
|
||||
map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator it= alldata.functionCallpathMapGlobal.begin();
|
||||
map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator itend= alldata.functionCallpathMapGlobal.end();
|
||||
uint64_t len;
|
||||
for ( ; it != itend; ++it ) {
|
||||
len = it->first.b.length();
|
||||
MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &len, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) it->first.b.c_str(), len, MPI_CHAR, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
}
|
||||
}
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* broadcast definitions buffer */
|
||||
MPI_Bcast( buffer, buffer_size, MPI_PACKED, 0, MPI_COMM_WORLD );
|
||||
|
||||
/* unpack definitions from buffer */
|
||||
|
||||
if ( 0 != alldata.myRank ) {
|
||||
|
||||
/* unpack parts */
|
||||
@ -433,7 +527,7 @@ static void share_profiledata( AllData& alldata ) {
|
||||
/* unpack size of functionMapGlobal */
|
||||
uint64_t func_map_global_size= 0;
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &func_map_global_size, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
|
||||
/* unpack functionMapGlobal */
|
||||
for ( uint64_t i= 0; i < func_map_global_size; i++ ) {
|
||||
|
||||
@ -459,14 +553,56 @@ static void share_profiledata( AllData& alldata ) {
|
||||
|
||||
alldata.functionMapGlobal[ func ].add( tmp );
|
||||
}
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
fence= 0;
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
}
|
||||
|
||||
/* unpack size of functionMapGlobal */
|
||||
func_map_global_size= 0;
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &func_map_global_size, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* unpack functionMapCallpathGlobal */
|
||||
for ( uint64_t i= 0; i < func_map_global_size; i++ ) {
|
||||
|
||||
uint64_t func;
|
||||
FunctionData tmp;
|
||||
uint64_t len;
|
||||
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &func, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &len, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, callpath, len, MPI_CHAR, MPI_COMM_WORLD );
|
||||
|
||||
tmp.callpath = callpath;
|
||||
tmp.callpath = tmp.callpath.substr (0,len);
|
||||
alldata.functionCallpathMapGlobal[ PairCallpath(func,tmp.callpath) ].add( tmp );
|
||||
}
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
fence= 0;
|
||||
MPI_Unpack( buffer, buffer_size, &buffer_pos, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
}
|
||||
}
|
||||
delete[] buffer;
|
||||
if(callpath_length > 0)
|
||||
delete[] callpath;
|
||||
}
|
||||
#endif /* OTFPROFILE_MPI */
|
||||
|
||||
@ -588,20 +724,16 @@ static bool read_events( AllData& alldata, OTF_Reader* reader ) {
|
||||
bool CollectDispersion( AllData& alldata ) {
|
||||
|
||||
bool error= false;
|
||||
|
||||
/* start runtime measurement for collecting dispersion information */
|
||||
StartMeasurement( alldata, 1, true, "collect dispersion information" );
|
||||
|
||||
/* open OTF file manager and reader */
|
||||
|
||||
OTF_FileManager* manager=
|
||||
OTF_FileManager_open( alldata.params.max_file_handles );
|
||||
assert( manager );
|
||||
|
||||
OTF_Reader* reader=
|
||||
OTF_Reader_open( alldata.params.input_file_prefix.c_str(), manager );
|
||||
assert( reader );
|
||||
|
||||
do {
|
||||
|
||||
#ifdef OTFPROFILE_MPI
|
||||
@ -609,14 +741,12 @@ bool CollectDispersion( AllData& alldata ) {
|
||||
/* share definitions needed for reading events to workers */
|
||||
|
||||
if ( 1 < alldata.numRanks ) {
|
||||
|
||||
share_profiledata( alldata );
|
||||
|
||||
}
|
||||
#endif /* OTFPROFILE_MPI */
|
||||
|
||||
/* read data from events */
|
||||
|
||||
if ( !alldata.params.read_from_stats ) {
|
||||
|
||||
VerbosePrint( alldata, 1, true, "reading events for dispersion\n" );
|
||||
@ -638,18 +768,15 @@ bool CollectDispersion( AllData& alldata ) {
|
||||
#endif /* OTFPROFILE_MPI */
|
||||
|
||||
} while( false );
|
||||
|
||||
/* close OTF file manager and reader */
|
||||
|
||||
OTF_Reader_close( reader );
|
||||
OTF_FileManager_close( manager );
|
||||
|
||||
if ( !error ) {
|
||||
|
||||
/* stop runtime measurement for collecting data */
|
||||
StopMeasurement( alldata, true, "collect dispersion information" );
|
||||
|
||||
}
|
||||
|
||||
return !error;
|
||||
}
|
||||
|
340
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.cpp
Обычный файл
340
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.cpp
Обычный файл
@ -0,0 +1,340 @@
|
||||
/*
|
||||
This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
|
||||
Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <functional>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "otfprofile.h"
|
||||
#include "create_filter.h"
|
||||
#include "OTF_inttypes.h"
|
||||
#include "OTF_Definitions.h"
|
||||
#include "OTF_Platform.h"
|
||||
|
||||
CTree<string> *cur_pos;
|
||||
CTree<string> callpath_tree;
|
||||
|
||||
map<uint32_t, CTree<string>*> filtered;
|
||||
uint64_t maxCount, filterCount;
|
||||
uint32_t idCount;
|
||||
map<Pair_int, int, ltPair_int> edgesEdg;
|
||||
map<Pair_int, uint64_t, ltPair_int> edgesEdg_sec;
|
||||
map<string, CTree<string>*> CallpathNodes;
|
||||
|
||||
#define STARTTEXT "# VampirTrace dispersion callpath filter specification"
|
||||
#define STARTTEXT2 "# generated with otfprofile"
|
||||
#define STARTTEXT3 "# previous filter content"
|
||||
|
||||
bool CreateFilter(AllData& alldata) {
|
||||
maxCount = idCount = filterCount = 0;
|
||||
callpath_tree.parent = NULL;
|
||||
callpath_tree.id = idCount++;
|
||||
callpath_tree.item = " ";
|
||||
callpath_tree.str_hash = 0;
|
||||
callpath_tree.n = 0;
|
||||
callpath_tree.rule = FILTERNOT;
|
||||
|
||||
bool error = false;
|
||||
cur_pos = NULL;
|
||||
|
||||
// Use this function only if marker is set and dispersion data
|
||||
// is collected
|
||||
if ((alldata.params.dispersion.options & DISPERSION_OPT_FILTER) == 0 )
|
||||
return error;
|
||||
|
||||
/*create string streams to save callpath tree information and
|
||||
filter information*/
|
||||
std::ostringstream filter_os;
|
||||
addOldToTree(alldata, filter_os);
|
||||
// setup file writer
|
||||
string filter_file_name = alldata.params.output_file_prefix + ".filter";
|
||||
fstream filter_file;
|
||||
filter_file.open(filter_file_name.c_str(), ios::out | ios::trunc);
|
||||
if (!filter_file.good()) {
|
||||
cerr << "ERROR: Unable to open file '" << filter_file_name
|
||||
<< "' for writing." << endl;
|
||||
return error;
|
||||
}
|
||||
|
||||
time_t t = time(0);
|
||||
char* ts = ctime(&t);
|
||||
ts[strlen(ts)-1] = '\0';
|
||||
|
||||
filter_file << STARTTEXT << endl << STARTTEXT2 << " on "<< ts << endl;
|
||||
|
||||
map<TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath>
|
||||
::const_iterator itc = alldata.functionDispersionCallpathMap.begin();
|
||||
map<TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath>
|
||||
::const_iterator itcend = alldata.functionDispersionCallpathMap.end();
|
||||
|
||||
list<string> callpathes;
|
||||
string tmp;
|
||||
string word;
|
||||
|
||||
while (itc != itcend) {
|
||||
|
||||
if (itc->first.b == "") {
|
||||
itc++;
|
||||
continue;
|
||||
}
|
||||
|
||||
tmp = "";
|
||||
maxCount += itc->second.count;
|
||||
parsePath(alldata, itc->first.b, itc->second.filterRule,
|
||||
itc->second.count, (itc->second.excl_time_95_percent
|
||||
/ alldata.timerResolution));
|
||||
|
||||
itc++;
|
||||
}
|
||||
|
||||
filter_file << endl << endl;
|
||||
postOrder(&callpath_tree, filter_file);
|
||||
|
||||
filter_file << endl << endl << STARTTEXT3 << endl << filter_os.str()
|
||||
<< endl;
|
||||
filter_file.close();
|
||||
|
||||
while (!callpath_tree.children.empty()) {
|
||||
CTree<string>* tmp = callpath_tree.children.back();
|
||||
callpath_tree.children.pop_back();
|
||||
if (tmp != NULL) {
|
||||
delete (tmp);
|
||||
tmp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return !error;
|
||||
}
|
||||
|
||||
CTree<string>* addToTree(string parent, uint32_t pid, string child,
|
||||
uint32_t cid, int rule, uint64_t n, double timeFilt) {
|
||||
if (cur_pos == NULL) {
|
||||
for (uint32_t i = 0; i < callpath_tree.children.size(); i++)
|
||||
if (callpath_tree.children.at(i)->item == parent) {
|
||||
cur_pos = callpath_tree.children.at(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (cur_pos == NULL) {
|
||||
|
||||
CTree<string> *children = new CTree<string> ();
|
||||
children = children;
|
||||
children->item = parent;
|
||||
children->str_hash = pid;
|
||||
|
||||
children->parent = &callpath_tree;
|
||||
if (child == "") {
|
||||
children->n = n;
|
||||
children->rule = rule;
|
||||
} else {
|
||||
children->n = 1;
|
||||
children->rule = FILTERNOT;
|
||||
}
|
||||
|
||||
children->id = idCount++;
|
||||
callpath_tree.children.push_back(children);
|
||||
cur_pos = children;
|
||||
}
|
||||
|
||||
CTree<string> *tmp;
|
||||
tmp = cur_pos;
|
||||
while (parent != cur_pos->item) {
|
||||
if (cur_pos->parent == NULL)
|
||||
break;
|
||||
cur_pos = cur_pos->parent;
|
||||
}
|
||||
|
||||
bool exists = false;
|
||||
for (uint32_t i = 0; i < cur_pos->children.size(); i++) {
|
||||
if ((cur_pos->children.at(i))->item == child) {
|
||||
exists = true;
|
||||
cur_pos = (cur_pos->children.at(i));
|
||||
cur_pos->n += n;
|
||||
if (timeFilt > 0)
|
||||
cur_pos->timeFilt = timeFilt;
|
||||
if (rule != -1 && cur_pos->rule != PREVFILTER) {
|
||||
if (cur_pos->rule == -1)
|
||||
cur_pos->rule = rule;
|
||||
else if (cur_pos->rule != FILTERNOT)
|
||||
cur_pos->rule = rule;
|
||||
|
||||
tmp = cur_pos;
|
||||
if (rule == FILTERNOT)
|
||||
while (tmp->parent != NULL && tmp->parent->rule
|
||||
== FILTERREC) {
|
||||
tmp->parent->rule = FILTEROUT;
|
||||
tmp = tmp->parent;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
if (child == "")
|
||||
exists = true;
|
||||
|
||||
if (!exists) {
|
||||
|
||||
CTree<string> *children = new CTree<string> ();
|
||||
children->item = child;
|
||||
children->rule = rule;
|
||||
children->n = n;
|
||||
children->id = idCount++;
|
||||
children->str_hash = cid;
|
||||
children->parent = cur_pos;
|
||||
if (timeFilt > 0)
|
||||
children->timeFilt = timeFilt;
|
||||
cur_pos->children.push_back(children);
|
||||
cur_pos = children;
|
||||
|
||||
tmp = cur_pos;
|
||||
if (rule == FILTERNOT)
|
||||
while (tmp->parent != NULL && tmp->parent->rule == FILTERREC) {
|
||||
tmp->parent->rule = FILTEROUT;
|
||||
tmp = tmp->parent;
|
||||
}
|
||||
|
||||
return cur_pos;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void trimString(string& str) {
|
||||
string::size_type pos1 = str.find_first_not_of(' ');
|
||||
string::size_type pos2 = str.find_last_not_of(' ');
|
||||
str = str.substr(pos1 == string::npos ? 0 : pos1,
|
||||
pos2 == string::npos ? str.length() - 1 : pos2 - pos1 + 1);
|
||||
}
|
||||
|
||||
void addOldToTree(AllData& alldata, std::ostringstream& old_filter) {
|
||||
vector<string> pathes;
|
||||
|
||||
if( !alldata.params.dispersion.filter_file_name.empty())
|
||||
{
|
||||
string filter_file_name = alldata.params.dispersion.filter_file_name;
|
||||
ifstream b_file(filter_file_name.c_str());
|
||||
if (b_file.good()) {
|
||||
string line;
|
||||
while (std::getline(b_file, line)) {
|
||||
trimString(line);
|
||||
if (line.substr(0, 1) != "#" && line.find_first_of("--")
|
||||
!= line.npos) {
|
||||
pathes.push_back(line);
|
||||
} else {
|
||||
if (line != STARTTEXT && line != STARTTEXT2 && line
|
||||
!= STARTTEXT3)
|
||||
old_filter << line << endl;
|
||||
}
|
||||
}
|
||||
b_file.close();
|
||||
}
|
||||
|
||||
while (!pathes.empty()) {
|
||||
string path = pathes.back();
|
||||
string tmp_path = path;
|
||||
pathes.pop_back();
|
||||
int pos = path.find_last_of("--") - 1;
|
||||
string tail = path.substr(pos, path.length() - pos);
|
||||
path = path.substr(0, pos);
|
||||
trimString(path);
|
||||
trimString(tail);
|
||||
string func = path.substr(path.find_last_of(";") + 1, path.length()
|
||||
- (path.find_last_of(";") + 1));
|
||||
if (tail.find("C") != tail.npos && tail.find(" 0 ") != tail.npos) {
|
||||
parsePath(alldata, path, FILTEROUT);
|
||||
} else {
|
||||
old_filter << tmp_path << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void postOrder(CTree<string>* node, fstream& filter_file) {
|
||||
|
||||
|
||||
if(node->rule != FILTERREC)
|
||||
{
|
||||
for (uint32_t i = 0; i < node->children.size(); i++)
|
||||
postOrder(node->children.at(i), filter_file);
|
||||
}
|
||||
CTree<string>* tmp;
|
||||
if (node->rule != FILTERNOT && node->rule != -1) {
|
||||
string path = "";
|
||||
path = node->item + " -- 0 C";
|
||||
tmp = node->parent;
|
||||
while (tmp->parent != NULL) {
|
||||
if (tmp->rule == FILTERNOT || tmp->rule == -1)
|
||||
path = tmp->item + ";" + path;
|
||||
tmp = tmp->parent;
|
||||
}
|
||||
filter_file << path << endl;
|
||||
filterCount++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void parsePath(AllData& alldata, string path, int rule, uint64_t n,
|
||||
double timeB) {
|
||||
trimString(path);
|
||||
if (rule != 1) {
|
||||
timeB = -1;
|
||||
}
|
||||
|
||||
cur_pos = NULL;
|
||||
uint32_t cid;
|
||||
string child;
|
||||
while (path != "") {
|
||||
uint32_t pid = atoi(path.substr(0, path.find_first_of(" ")).c_str());
|
||||
string parent = alldata.functionIdNameMap[pid];
|
||||
path = path.substr(path.find_first_of(" ") + 1);
|
||||
if (path == "") {
|
||||
if (cur_pos != NULL)
|
||||
break;
|
||||
cid = 5;
|
||||
child = parent;
|
||||
child = "";
|
||||
} else {
|
||||
cid = atoi(path.substr(0, path.find_first_of(" ")).c_str());
|
||||
child = alldata.functionIdNameMap[cid];
|
||||
}
|
||||
|
||||
if (path.find_first_of(" ") == path.npos) {
|
||||
addToTree(parent, pid, child, cid, rule, n, timeB);
|
||||
break;
|
||||
} else
|
||||
addToTree(parent, pid, child, cid, -1, 0, -1);
|
||||
}
|
||||
}
|
||||
|
||||
void parsePath(AllData& alldata, string path, int rule) {
|
||||
cur_pos = NULL;
|
||||
uint32_t cid;
|
||||
string child;
|
||||
while (path != "") {
|
||||
uint32_t pid = 5;
|
||||
string parent = path.substr(0, path.find_first_of(";"));
|
||||
path = path.substr(path.find_first_of(";") + 1);
|
||||
if (path == "") {
|
||||
if (cur_pos != NULL)
|
||||
break;
|
||||
cid = 5;
|
||||
child = parent;
|
||||
child = "";
|
||||
} else {
|
||||
cid = 5;
|
||||
child = path.substr(0, path.find_first_of(";"));
|
||||
}
|
||||
if (path.find_first_of(";") == path.npos) {
|
||||
addToTree(parent, pid, child, cid, rule, 0, -1);
|
||||
break;
|
||||
} else
|
||||
addToTree(parent, pid, child, cid, -1, 0, -1);
|
||||
}
|
||||
}
|
113
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.h
Обычный файл
113
ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_filter.h
Обычный файл
@ -0,0 +1,113 @@
|
||||
/*
|
||||
This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
|
||||
Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
|
||||
*/
|
||||
|
||||
#ifndef CREATE_FILTER_H
|
||||
#define CREATE_FILTER_H
|
||||
|
||||
#include "datastructs.h"
|
||||
#include <vector>
|
||||
|
||||
/**
|
||||
* Enumeration to set filter method:
|
||||
* - FILTERREC: recursive filter
|
||||
* - FILTERNOT: don't filter this path
|
||||
* - FILTEROUT: filter this path, but not recursively
|
||||
* - TIMEFILTER: filter this path to a specific point of time
|
||||
* - PREVFILTER: filter that was set by an other filterfile
|
||||
*/
|
||||
enum filterRule {
|
||||
FILTERREC = 0, FILTERNOT = 1, FILTEROUT = 2, TIMEFILTER = 3, PREVFILTER = 4
|
||||
};
|
||||
|
||||
struct Pair_int {
|
||||
|
||||
uint32_t a;
|
||||
uint32_t b;
|
||||
|
||||
Pair_int() :
|
||||
a(0), b(0) {
|
||||
}
|
||||
Pair_int(long aa, long bb) :
|
||||
a(aa), b(bb) {
|
||||
}
|
||||
~Pair_int() {
|
||||
}
|
||||
};
|
||||
|
||||
struct ltPair_int {
|
||||
|
||||
bool operator()(const Pair_int& p1, const Pair_int& p2) const {
|
||||
|
||||
/* a is the major number for comparison, this gives a better
|
||||
order when reducing the entries over the first argument */
|
||||
|
||||
if (p1.a == p2.a) {
|
||||
|
||||
return p1.b < p2.b;
|
||||
|
||||
} else {
|
||||
|
||||
return p1.a < p2.a;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* CTree is a Tree to save Path information. It Points to the Parent node and
|
||||
* contains child notes, as well as
|
||||
* filter rule, name, id and hash_str.
|
||||
*/
|
||||
template<class T> class CTree {
|
||||
public:
|
||||
T item;
|
||||
int rule;
|
||||
uint64_t n;
|
||||
uint32_t id;
|
||||
size_t str_hash;
|
||||
|
||||
CTree<T> *parent;
|
||||
std::vector<CTree<std::string>*> children;
|
||||
double timeFilt;
|
||||
CTree() {
|
||||
timeFilt = 0;
|
||||
rule = 1;
|
||||
n = 0;
|
||||
id = 0;
|
||||
str_hash = 0;
|
||||
}
|
||||
~CTree() {
|
||||
|
||||
while (!children.empty()) {
|
||||
CTree<T>* tmp = children.back();
|
||||
children.pop_back();
|
||||
if (tmp != NULL) {
|
||||
delete (tmp);
|
||||
tmp = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* create Filtert */
|
||||
bool CreateFilter(AllData& alldata);
|
||||
|
||||
/* add path element to path tree */
|
||||
CTree<string>* addToTree(string parent, uint32_t pid, string child,
|
||||
uint32_t cid, int rule, uint64_t n, string timeFilt);
|
||||
|
||||
/* load filter information from an existing filter file (result.filter)*/
|
||||
void addOldToTree(AllData& alldata, std::ostringstream& old_filter);
|
||||
|
||||
/* iteration step through the callpath tree to write filter file */
|
||||
void postOrder(CTree<string>* node, fstream& filter_file);
|
||||
|
||||
/* parse callpath and add pairs of parent and childs to the tree */
|
||||
void parsePath(AllData& alldata, string path, int rule, uint64_t n,
|
||||
double timeB);
|
||||
|
||||
/* parse callpath and add pairs of parent and childs to the tree */
|
||||
void parsePath(AllData& alldata, string path, int rule);
|
||||
|
||||
#endif /* CREATE_FILTER_H */
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -228,6 +228,94 @@ static bool write_markerDispersion( AllData& alldata, OTF_WStream* writer ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool write_markerDispersion_callpath( AllData& alldata, OTF_WStream* writer ) {
|
||||
|
||||
|
||||
int i= 0;
|
||||
uint64_t timerResolution= alldata.timerResolution;
|
||||
|
||||
map< TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByDispersion >::const_iterator it= alldata.functionDispersionCallpathMap.begin();
|
||||
map< TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByDispersion >::const_iterator itend= alldata.functionDispersionCallpathMap.end();
|
||||
|
||||
|
||||
|
||||
while ( itend != it ) {
|
||||
|
||||
|
||||
if ( it->second.count && it->first.b != "" && it->first.a > alldata.dispersionMarkerBorder) {
|
||||
|
||||
map< string, FunctionMinMaxLocationData>::const_iterator it_loc=
|
||||
alldata.functionMinMaxLocationCallpathMap.find(it->first.b);
|
||||
map< string, FunctionMinMaxLocationData>::const_iterator itend_loc=
|
||||
alldata.functionMinMaxLocationCallpathMap.end();
|
||||
|
||||
if ( itend_loc != it_loc ) {
|
||||
stringstream oss;
|
||||
|
||||
uint64_t time= it_loc->second.location.time_max;
|
||||
uint64_t process= it_loc->second.location.loc_max;
|
||||
string name= alldata.functionIdNameMap.find(it->first.c)->second;
|
||||
|
||||
if ( name.length() > 50) {
|
||||
name.replace(name.find_first_of(",") + 1, name.find_last_of(",")-name.find_first_of(",") - 1, " ... ");
|
||||
}
|
||||
/*
|
||||
cerr << "Irregularity function " << name <<
|
||||
" id: " << it->first.b <<
|
||||
" process: " << process <<
|
||||
" tmin: " << it->second.excl_time_minimum <<
|
||||
" t_25: " << it->second.excl_time_low_quartile <<
|
||||
" tmed: " << it->second.excl_time_median <<
|
||||
" t_75: " << it->second.excl_time_top_quartile <<
|
||||
" tmax: " << it->second.excl_time_maximum <<
|
||||
" tavg: " << it->second.excl_time_sum / it->second.count <<
|
||||
" MinMaxLocationInformation: " <<
|
||||
" min: " << it_loc->second.location.min <<
|
||||
" max: " << it_loc->second.location.max <<
|
||||
" lmin: " << it_loc->second.location.loc_min <<
|
||||
" lmax: " << it_loc->second.location.loc_max <<
|
||||
" tmin: " << it_loc->second.location.time_min <<
|
||||
" tmax: " << it_loc->second.location.time_max <<
|
||||
" tmin: " << (double) it_loc->second.location.time_min / timerResolution <<
|
||||
" tmax: " << (double) it_loc->second.location.time_max / timerResolution << endl;
|
||||
*/
|
||||
|
||||
oss << "Irregularity weight: " << (double) it->first.a / timerResolution
|
||||
<< " Function Name: " << name << " Dispersion values: "
|
||||
<< " [ " << it->second.excl_time_minimum / timerResolution
|
||||
<< " , " << it->second.excl_time_low_quartile / timerResolution
|
||||
<< " , " << it->second.excl_time_median / timerResolution
|
||||
<< " , " << it->second.excl_time_top_quartile / timerResolution
|
||||
<< " , " << (it->second.excl_time_sum / it->second.count) / timerResolution
|
||||
<< " , " << it->second.excl_time_maximum / timerResolution
|
||||
<< " ] " << endl;
|
||||
|
||||
if ( 0 == OTF_WStream_writeMarker( writer, (uint64_t) time,
|
||||
(uint32_t) process, markerDispersionId,
|
||||
oss.str().c_str() ) ) {
|
||||
|
||||
cout << "Error while writing Marker Spots " << endl ;
|
||||
|
||||
}
|
||||
else {
|
||||
/*
|
||||
cout << i << ": " << oss.str() << endl;
|
||||
*/
|
||||
i++;
|
||||
}
|
||||
|
||||
oss.flush();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CreateMarker( AllData& alldata ) {
|
||||
|
||||
bool error= false;
|
||||
@ -279,6 +367,10 @@ bool CreateMarker( AllData& alldata ) {
|
||||
|
||||
VerbosePrint( alldata, 1, true, "writing marker irregularity spots \n" );
|
||||
error= !write_markerDispersion(alldata, writer );
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
error= !write_markerDispersion_callpath(alldata, writer );
|
||||
}
|
||||
|
||||
} while ( false );
|
||||
|
||||
|
@ -35,6 +35,20 @@ typedef enum {
|
||||
|
||||
} ClusterAlgorithm;
|
||||
|
||||
/* *** dispersion modes *** */
|
||||
|
||||
typedef enum {
|
||||
DISPERSION_MODE_PERFUNCTION = 0,
|
||||
DISPERSION_MODE_PERCALLPATH = 1
|
||||
} DispersionMode;
|
||||
|
||||
/* *** dispersion options *** */
|
||||
|
||||
typedef enum {
|
||||
DISPERSION_OPT_INFO = 0x1,
|
||||
DISPERSION_OPT_MARKER = 0x2,
|
||||
DISPERSION_OPT_FILTER = 0x4
|
||||
} DispersionOptions;
|
||||
|
||||
/* *** program parameters *** */
|
||||
|
||||
@ -48,7 +62,6 @@ struct Params {
|
||||
static const bool DEFAULT_LOG_AXIS= true;
|
||||
static const uint8_t DEFAULT_VERBOSE_LEVEL= 0;
|
||||
static const bool DEFAULT_CREATE_CSV= false;
|
||||
static const bool DEFAULT_CREATE_MARKER= false;
|
||||
static const bool DEFAULT_CREATE_TEX= true;
|
||||
static const bool DEFAULT_CREATE_PDF= true;
|
||||
static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
|
||||
@ -62,7 +75,6 @@ struct Params {
|
||||
bool read_from_stats;
|
||||
|
||||
bool create_csv;
|
||||
bool create_marker;
|
||||
bool create_tex;
|
||||
bool create_pdf;
|
||||
string input_file_prefix;
|
||||
@ -98,13 +110,30 @@ struct Params {
|
||||
|
||||
} clustering;
|
||||
|
||||
struct Dispersion {
|
||||
|
||||
static const DispersionMode DEFAULT_MODE= DISPERSION_MODE_PERFUNCTION;
|
||||
static const DispersionOptions DEFAULT_OPTIONS= DISPERSION_OPT_INFO;
|
||||
static const uint32_t DEFAULT_REDUCTION= 15;
|
||||
|
||||
DispersionMode mode;
|
||||
bool enabled;
|
||||
uint32_t options;
|
||||
uint32_t reduction;
|
||||
std::string filter_file_name;
|
||||
|
||||
Dispersion()
|
||||
: mode(DEFAULT_MODE), enabled(false), options(DEFAULT_OPTIONS),
|
||||
reduction(DEFAULT_REDUCTION) {}
|
||||
|
||||
} dispersion;
|
||||
|
||||
Params()
|
||||
: max_file_handles(DEFAULT_MAX_FILE_HANDLES),
|
||||
buffer_size(DEFAULT_BUFFER_SIZE), max_groups(DEFAULT_MAX_GROUPS),
|
||||
logaxis(DEFAULT_LOG_AXIS),
|
||||
verbose_level(DEFAULT_VERBOSE_LEVEL), progress(false),
|
||||
read_from_stats(false), create_csv(DEFAULT_CREATE_CSV),
|
||||
create_marker(DEFAULT_CREATE_MARKER),
|
||||
create_tex(DEFAULT_CREATE_TEX), create_pdf(DEFAULT_CREATE_PDF),
|
||||
output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()) {}
|
||||
|
||||
@ -226,6 +255,38 @@ struct gtPair {
|
||||
}
|
||||
};
|
||||
|
||||
/* *** pair of values as map key *** */
|
||||
|
||||
struct PairCallpath {
|
||||
|
||||
uint64_t a;
|
||||
string b;
|
||||
|
||||
PairCallpath() : a(0), b("") {}
|
||||
PairCallpath( uint64_t aa, string bb ) : a(aa), b(bb) {}
|
||||
~PairCallpath() {}
|
||||
};
|
||||
|
||||
struct ltPairCallpath {
|
||||
|
||||
bool operator()( const PairCallpath& p1, const PairCallpath& p2 ) const {
|
||||
|
||||
/* a is the major number for comparison, this gives a better
|
||||
order when reducing the entries over the first argument */
|
||||
|
||||
if ( p1.a == p2.a ) {
|
||||
if(p1.b.compare(p2.b) < 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
|
||||
} else {
|
||||
|
||||
return p1.a < p2.a;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* *** triplett of values as map key *** */
|
||||
|
||||
struct Triple {
|
||||
@ -265,6 +326,151 @@ struct ltTriple {
|
||||
}
|
||||
};
|
||||
|
||||
struct TripleCallpath {
|
||||
|
||||
uint64_t a;
|
||||
string b;
|
||||
uint64_t c;
|
||||
|
||||
TripleCallpath() : a(0), b(""), c(0) {}
|
||||
TripleCallpath( uint64_t aa, string bb, uint64_t cc ) : a(aa), b(bb), c(cc) {}
|
||||
~TripleCallpath() {}
|
||||
};
|
||||
|
||||
struct gtTripleCallpathSortByCallpath {
|
||||
|
||||
bool operator()( const TripleCallpath& p1, const TripleCallpath& p2 ) const {
|
||||
|
||||
/* a is the major number for comparison, this gives a better
|
||||
order when reducing the entries over the first argument */
|
||||
|
||||
if ( p1.c == p2.c ) {
|
||||
|
||||
if ( p1.a == p2.a ) {
|
||||
|
||||
|
||||
if(p1.b.compare(p2.b) < 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
|
||||
} else {
|
||||
return p1.a > p2.a;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
|
||||
return p1.c > p2.c;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct gtTripleCallpathSortByDispersion {
|
||||
|
||||
bool operator()( const TripleCallpath& p1, const TripleCallpath& p2 ) const {
|
||||
|
||||
/* a is the major number for comparison, this gives a better
|
||||
order when reducing the entries over the first argument */
|
||||
|
||||
if ( p1.a == p2.a ) {
|
||||
|
||||
if ( p1.b.compare(p2.b) == 0 ) {
|
||||
|
||||
return p1.c > p2.c;
|
||||
|
||||
} else {
|
||||
|
||||
if(p1.b.compare(p2.b) < 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
return p1.a > p2.a;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct ltTripleCallpath {
|
||||
|
||||
bool operator()( const TripleCallpath& p1, const TripleCallpath& p2 ) const {
|
||||
|
||||
/* a is the major number for comparison, this gives a better
|
||||
order when reducing the entries over the first argument */
|
||||
|
||||
if ( p1.a == p2.a ) {
|
||||
|
||||
if ( p1.b.compare(p2.b) == 0 ) {
|
||||
|
||||
return p1.c < p2.c;
|
||||
|
||||
} else {
|
||||
|
||||
if(p1.b.compare(p2.b) > 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
return p1.a < p2.a;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* *** quartet of values as map key *** */
|
||||
struct Quadruple {
|
||||
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
string c;
|
||||
uint64_t d;
|
||||
|
||||
Quadruple() : a(0), b(0), c(""), d(0) {}
|
||||
Quadruple( uint64_t aa, uint64_t bb, string cc, uint64_t dd ) : a(aa),
|
||||
b(bb), c(cc), d(dd) {}
|
||||
~Quadruple() {}
|
||||
};
|
||||
|
||||
|
||||
struct ltQuadruple {
|
||||
|
||||
bool operator()( const Quadruple& p1, const Quadruple& p2 ) const {
|
||||
|
||||
/* a is the major number for comparison, this gives a better
|
||||
order when reducing the entries over the first argument */
|
||||
|
||||
if ( p1.a == p2.a ) {
|
||||
|
||||
if ( p1.b == p2.b ) {
|
||||
|
||||
if ( p1.d == p2.d ) {
|
||||
|
||||
|
||||
if(p1.c.compare(p2.c) > 0)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
|
||||
} else {
|
||||
|
||||
return p1.d < p2.d;
|
||||
}
|
||||
} else {
|
||||
|
||||
return p1.b < p2.b;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
return p1.a < p2.a;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct Process {
|
||||
|
||||
@ -301,7 +507,8 @@ public:
|
||||
type sum;
|
||||
uint64_t cnt;
|
||||
|
||||
min_max_avg( type a= (type) OTF_UINT64_MAX, type b= (type) 0, type s= (type) 0, uint64_t c= 0 ) :
|
||||
min_max_avg( type a= (type) OTF_UINT64_MAX,
|
||||
type b= (type) 0, type s= (type) 0, uint64_t c= 0 ) :
|
||||
min( a ), max( b ), sum( s ), cnt( c ) {}
|
||||
~min_max_avg() {}
|
||||
|
||||
@ -317,7 +524,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/* add another min_max_avg object as if all their values were appended to on object */
|
||||
/* add another min_max_avg object as if all their values were appended to
|
||||
on object */
|
||||
void add( const min_max_avg<type>& other ) {
|
||||
|
||||
min= ( other.min < min ) ? other.min : min;
|
||||
@ -344,7 +552,8 @@ public:
|
||||
min_max_Location( type a= (type) OTF_UINT64_MAX, type b= (type) 0,
|
||||
uint64_t p= 0, uint64_t q= 0,
|
||||
uint64_t s= 0, uint64_t t= 0 ) :
|
||||
min( a ), max( b ), loc_min( p ), loc_max( q ), time_min( s ), time_max( t ) {}
|
||||
min( a ), max( b ), loc_min( p ), loc_max( q ), time_min( s ),
|
||||
time_max( t ) {}
|
||||
~min_max_Location() {}
|
||||
|
||||
/* append a single value with its location*/
|
||||
@ -365,7 +574,8 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/* add another min_max_Location object as if all their values were appended to on object */
|
||||
/* add another min_max_Location object as if all their values were appended
|
||||
to on object */
|
||||
void add( const min_max_Location<type>& other ) {
|
||||
|
||||
if ( other.min < min ) {
|
||||
@ -411,8 +621,8 @@ struct Grouping {
|
||||
/* insert process into a group, return true if succeeded */
|
||||
bool insert( uint64_t group, uint64_t process ) {
|
||||
|
||||
/* insert the new entry if and only if there was no process with this ID before,
|
||||
because every process can only be in one group */
|
||||
/* insert the new entry if and only if there was no process with this ID
|
||||
before, because every process can only be in one group */
|
||||
|
||||
pair< map< uint64_t, uint64_t >::const_iterator, bool> ret=
|
||||
processesToGroups.insert( pair< uint64_t, uint64_t >( process, group ) );
|
||||
@ -507,7 +717,8 @@ struct FunctionData {
|
||||
min_max_avg<uint64_t> count;
|
||||
min_max_avg<double> excl_time;
|
||||
min_max_avg<double> incl_time;
|
||||
|
||||
string callpath;
|
||||
|
||||
FunctionData( ) {}
|
||||
~FunctionData( ) {}
|
||||
|
||||
@ -518,10 +729,19 @@ struct FunctionData {
|
||||
incl_time.append( in );
|
||||
}
|
||||
|
||||
void add( uint64_t n= 0, double ex= 0.0,string call=0, double in= 0.0 ) {
|
||||
|
||||
count.append( n );
|
||||
excl_time.append( ex );
|
||||
callpath = call;
|
||||
incl_time.append( in );
|
||||
}
|
||||
|
||||
void add( const FunctionData& other ) {
|
||||
|
||||
count.add( other.count );
|
||||
excl_time.add( other.excl_time );
|
||||
callpath = other.callpath;
|
||||
incl_time.add( other.incl_time );
|
||||
}
|
||||
};
|
||||
@ -536,14 +756,22 @@ struct FunctionDispersionData {
|
||||
double excl_time_low_quartile;
|
||||
double excl_time_median;
|
||||
double excl_time_top_quartile;
|
||||
double excl_time_95_percent;
|
||||
double excl_time_maximum;
|
||||
int filterRule;
|
||||
|
||||
FunctionDispersionData( uint64_t a= 0, double b=0.0, double c=0.0,
|
||||
double d=0.0, double e=0.0, double f=0.0,
|
||||
double g=0.0 ) :
|
||||
double g=0.0, double h=0.0 ) :
|
||||
count( a ), excl_time_sum( b ), excl_time_minimum( c ),
|
||||
excl_time_low_quartile ( d ), excl_time_median( e ),
|
||||
excl_time_top_quartile( f ), excl_time_maximum( g ) {}
|
||||
excl_time_top_quartile( f ),excl_time_95_percent(g), excl_time_maximum( h ), filterRule(1){}
|
||||
|
||||
void addFilterRule(int rule)
|
||||
{
|
||||
filterRule = rule;
|
||||
}
|
||||
|
||||
~FunctionDispersionData( ) {}
|
||||
|
||||
};
|
||||
@ -750,10 +978,21 @@ struct AllData {
|
||||
be done */
|
||||
map< Pair, FunctionData, ltPair > functionMapPerRank;
|
||||
|
||||
/* store per-function statistics over the ranks, Triple is <rank,callpath,funcId>
|
||||
|
||||
in case of additional clustering, collect it to the master node such that
|
||||
process clustering according to similar function call patterns can
|
||||
be done */
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath > functionCallpathMapPerRank;
|
||||
|
||||
/* store per-function duration section information over the ranks, Triple is
|
||||
<rank,funcId,bin> */
|
||||
map< Triple, FunctionData, ltTriple > functionDurationSectionMapPerRank;
|
||||
|
||||
/* store per-function duration section information over the ranks, Quadruple is
|
||||
<rank,funcId,callpath,bin> */
|
||||
map< Quadruple, FunctionData, ltQuadruple > functionDurationSectionCallpathMapPerRank;
|
||||
|
||||
/* store per-counter statistics over the functions and ranks,
|
||||
Triple is <rank,funcId,counterId> */
|
||||
map< Triple, CounterData, ltTriple > counterMapPerFunctionRank;
|
||||
@ -782,10 +1021,17 @@ struct AllData {
|
||||
/* compact function statistics summed over all ranks */
|
||||
map< uint64_t, FunctionData > functionMapGlobal;
|
||||
|
||||
/* compact function statistics summed over all ranks */
|
||||
map< PairCallpath, FunctionData, ltPairCallpath > functionCallpathMapGlobal;
|
||||
|
||||
/* compact function duration section information over the functions and bins,
|
||||
Pair is <funcId,bin> */
|
||||
map< Pair, FunctionData, ltPair > functionDurationSectionMapGlobal;
|
||||
|
||||
|
||||
/* compact function duration section information over the functions and bins,
|
||||
TripleCallpath is <funcId,callpath,bin> */
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath > functionDurationSectionCallpathMapGlobal;
|
||||
|
||||
/* store per-counter statistics over the functions and ranks,
|
||||
Pair is <counterId,funcId> */
|
||||
map< Pair, CounterData, ltPair > counterMapGlobal;
|
||||
@ -810,11 +1056,21 @@ struct AllData {
|
||||
|
||||
/* compact function location information over the functions */
|
||||
map< uint64_t, FunctionMinMaxLocationData > functionMinMaxLocationMap;
|
||||
|
||||
/* compact function location information over the callpathes */
|
||||
map< string, FunctionMinMaxLocationData > functionMinMaxLocationCallpathMap;
|
||||
|
||||
/* dispersion information over functions, Pair is < dispersion, funcId > */
|
||||
map< Pair, FunctionDispersionData, gtPair > functionDispersionMap;
|
||||
|
||||
|
||||
/* dispersion information over functions, TripleCallpath is < dispersion, callpath, funcId > */
|
||||
map< TripleCallpath, FunctionDispersionData, gtTripleCallpathSortByCallpath > functionDispersionCallpathMap;
|
||||
|
||||
/* Maximum number of chars to save a callpath*/
|
||||
uint64_t maxCallpathLength;
|
||||
|
||||
/* Border where dispersion marker were set */
|
||||
uint64_t dispersionMarkerBorder;
|
||||
AllData( uint32_t my_rank= 0, uint32_t num_ranks= 1 ) :
|
||||
myRank(my_rank), numRanks(num_ranks), myProcessesNum(0),
|
||||
myProcessesList(NULL), timerResolution(0), recvTimeKey(0) {
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -13,6 +13,15 @@
|
||||
|
||||
#include "datastructs.h"
|
||||
|
||||
/* define the following macro to use exclusive (excl_time) or includive
|
||||
(incl_time) time for dispersion computing */
|
||||
#define DISPERSION_OPTION excl_time
|
||||
|
||||
/* max. number of regions in a call path */
|
||||
#define RFG_FILTER_MAX_CPATH_SIZE 0x80
|
||||
|
||||
/* min. number of function calls on a specific call path to be filtered*/
|
||||
#define MIN_CPATH_COUNT 10
|
||||
|
||||
/* print verbose message to stdout
|
||||
(- do print message only if current verbose level is >= level
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2012.
|
||||
Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
|
||||
*/
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
@ -10,6 +10,8 @@
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
#include "otf.h"
|
||||
#include "otfaux.h"
|
||||
@ -24,261 +26,480 @@ using namespace std;
|
||||
bool ProcessDispersion( AllData& alldata ) {
|
||||
|
||||
bool error= false;
|
||||
|
||||
/* start runtime measurement for process dispersion information */
|
||||
StartMeasurement( alldata, 1, true, "process dispersion information" );
|
||||
|
||||
VerbosePrint( alldata, 1, true, "process dispersion information\n" );
|
||||
|
||||
|
||||
if ( 0 == alldata.myRank ) {
|
||||
|
||||
if ( alldata.params.create_tex ) {
|
||||
|
||||
map< Pair, FunctionData, ltPair >::const_iterator it= alldata.functionDurationSectionMapGlobal.begin();
|
||||
map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.functionDurationSectionMapGlobal.end();
|
||||
int count;
|
||||
if (alldata.params.dispersion.enabled) {
|
||||
|
||||
/*
|
||||
cout << " Size of FunctionDurationSectionMapGlobal: " << alldata.functionDurationSectionMapGlobal.size() << endl;
|
||||
# define PRINT_MIN_MAX_AVG(v,u) (v.cnt) << " x avg " << ((double)(v.sum))/(v.cnt) << "(" << (v.min) << "-" << (v.max) << ") " << u
|
||||
cout << endl << " global function duration section data per bin: " << endl;
|
||||
map<Pair, FunctionData, ltPair>::const_iterator it =
|
||||
alldata.functionDurationSectionMapGlobal.begin();
|
||||
map<Pair, FunctionData, ltPair>::const_iterator itend =
|
||||
alldata.functionDurationSectionMapGlobal.end();
|
||||
|
||||
uint64_t funcid = it->first.a;
|
||||
|
||||
map<uint64_t, FunctionData>::const_iterator iter_funcMapGlobal;
|
||||
iter_funcMapGlobal = alldata.functionMapGlobal.find(funcid);
|
||||
assert( iter_funcMapGlobal != alldata.functionMapGlobal.end() );
|
||||
|
||||
uint64_t n = iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
uint64_t n_temp = 0;
|
||||
uint64_t n_25 = n / 4;
|
||||
uint64_t n_50 = n / 2;
|
||||
uint64_t n_75 = (3 * n) / 4;
|
||||
uint64_t n_95 = (19 * n) / 20;
|
||||
|
||||
double t_min = iter_funcMapGlobal->second.DISPERSION_OPTION.min;
|
||||
double t_max = iter_funcMapGlobal->second.DISPERSION_OPTION.max;
|
||||
double t_sum = iter_funcMapGlobal->second.DISPERSION_OPTION.sum;
|
||||
|
||||
double t_25 = 0.0;
|
||||
double t_50 = 0.0;
|
||||
double t_75 = 0.0;
|
||||
double t_95 = 0.0;
|
||||
count = 0;
|
||||
|
||||
for (; it != itend; ++it)
|
||||
{
|
||||
|
||||
map< Pair, FunctionData, ltPair >::const_iterator it= alldata.functionDurationSectionMapGlobal.begin();
|
||||
map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.functionDurationSectionMapGlobal.end();
|
||||
|
||||
while ( itend != it ) {
|
||||
|
||||
cout << " global function " << it->first.a << " bin " << it->first.b << " -> ";
|
||||
if ( it->second.count.cnt ) {
|
||||
cout << "\t"<<
|
||||
" cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") <<
|
||||
" exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") <<
|
||||
" inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl;
|
||||
}
|
||||
|
||||
it++;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
uint64_t funcid= it->first.a;
|
||||
|
||||
map< uint64_t, FunctionData >::const_iterator iter_funcMapGlobal;
|
||||
iter_funcMapGlobal=alldata.functionMapGlobal.find( funcid );
|
||||
assert( iter_funcMapGlobal != alldata.functionMapGlobal.end() );
|
||||
|
||||
uint64_t n= iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
uint64_t n_temp= 0;
|
||||
uint64_t n_25= n/4;
|
||||
uint64_t n_50= n/2;
|
||||
uint64_t n_75= (3*n)/4;
|
||||
|
||||
double t_min=iter_funcMapGlobal->second.excl_time.min;
|
||||
double t_max=iter_funcMapGlobal->second.excl_time.max;
|
||||
double t_sum=iter_funcMapGlobal->second.excl_time.sum;
|
||||
|
||||
double t_25= 0.0;
|
||||
double t_50= 0.0;
|
||||
double t_75= 0.0;
|
||||
|
||||
for ( ; it != itend; ++it ) {
|
||||
|
||||
count++;
|
||||
//cerr << " funcid " << funcid << endl;
|
||||
|
||||
if ( funcid != it->first.a ) {
|
||||
|
||||
|
||||
if (funcid != it->first.a)
|
||||
{
|
||||
|
||||
/*
|
||||
cerr << " function: " << funcid << " , n: " << n <<
|
||||
" , t_sum: " << t_sum << " , t_min: " << t_min <<
|
||||
" , t_25: " << t_25 << " , t_50: " << t_50 <<
|
||||
cerr << " function: " << funcid << " , n: " << n <<
|
||||
" , t_sum: " << t_sum << " , t_min: " << t_min <<
|
||||
" , t_25: " << t_25 << " , t_50: " << t_50 <<
|
||||
" , t_75: " << t_75 << " , t_max: " << t_max << endl;
|
||||
*/
|
||||
|
||||
alldata.functionDispersionMap[ Pair( (uint64_t)((t_max-t_75)), funcid ) ]
|
||||
= FunctionDispersionData( n, t_sum, t_min, t_25, t_50, t_75, t_max );
|
||||
|
||||
funcid= it->first.a;
|
||||
|
||||
iter_funcMapGlobal=alldata.functionMapGlobal.find( funcid );
|
||||
|
||||
alldata.functionDispersionMap[Pair((uint64_t) ((t_max
|
||||
- t_75)), funcid)] = FunctionDispersionData(n,
|
||||
t_sum, t_min, t_25, t_50, t_75, t_95, t_max);
|
||||
|
||||
alldata.functionDispersionCallpathMap[TripleCallpath(
|
||||
(uint64_t) ((t_max - t_75)), "", funcid)]
|
||||
= FunctionDispersionData(n, t_sum, t_min, t_25,
|
||||
t_50, t_75, t_95, t_max);
|
||||
|
||||
funcid = it->first.a;
|
||||
|
||||
iter_funcMapGlobal = alldata.functionMapGlobal.find(funcid);
|
||||
assert( iter_funcMapGlobal != alldata.functionMapGlobal.end() );
|
||||
n= iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
n_temp= 0;
|
||||
n_25= n/4;
|
||||
n_50= n/2;
|
||||
n_75= (3*n)/4;
|
||||
|
||||
t_min=iter_funcMapGlobal->second.excl_time.min;
|
||||
t_max=iter_funcMapGlobal->second.excl_time.max;
|
||||
t_sum=iter_funcMapGlobal->second.excl_time.sum;
|
||||
t_25= 0.0;
|
||||
t_50= 0.0;
|
||||
t_75= 0.0;
|
||||
|
||||
n = iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
n_temp = 0;
|
||||
n_25 = n / 4;
|
||||
n_50 = n / 2;
|
||||
n_75 = (3 * n) / 4;
|
||||
n_95 = (19 * n) / 20;
|
||||
t_min = iter_funcMapGlobal->second.DISPERSION_OPTION.min;
|
||||
t_max = iter_funcMapGlobal->second.DISPERSION_OPTION.max;
|
||||
t_sum = iter_funcMapGlobal->second.DISPERSION_OPTION.sum;
|
||||
t_25 = 0.0;
|
||||
t_50 = 0.0;
|
||||
t_75 = 0.0;
|
||||
t_95 = 0.0;
|
||||
|
||||
}
|
||||
|
||||
n_temp+= it->second.count.sum;
|
||||
|
||||
|
||||
n_temp += it->second.count.sum;
|
||||
|
||||
/* determine lower quartile, median, and upper quartile */
|
||||
|
||||
if ( 0.0 == t_75 ) {
|
||||
|
||||
if ( n_temp >= n_75 ) {
|
||||
t_75= ( it->second.excl_time.max - it->second.excl_time.min ) / 2 + it->second.excl_time.min ;
|
||||
if (0.0 == t_95)
|
||||
{
|
||||
|
||||
if (n_temp >= n_95)
|
||||
{
|
||||
t_95 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min) / 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
if ( 0.0 == t_50 ) {
|
||||
|
||||
if ( n_temp >= n_50 ) {
|
||||
t_50= ( it->second.excl_time.max - it->second.excl_time.min ) / 2 + it->second.excl_time.min ;
|
||||
|
||||
if (0.0 == t_75)
|
||||
{
|
||||
|
||||
if (n_temp >= n_75)
|
||||
{
|
||||
t_75 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min) / 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
if ( 0.0 == t_25 ) {
|
||||
|
||||
if ( n_temp >= n_25 ) {
|
||||
t_25= ( it->second.excl_time.max - it->second.excl_time.min ) / 2 + it->second.excl_time.min ;
|
||||
|
||||
if (0.0 == t_50)
|
||||
{
|
||||
|
||||
if (n_temp >= n_50)
|
||||
{
|
||||
t_50 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min) / 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
|
||||
if (0.0 == t_25)
|
||||
{
|
||||
|
||||
if (n_temp >= n_25)
|
||||
{
|
||||
t_25 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min)
|
||||
/ 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
alldata.functionDispersionMap[ Pair( (uint64_t)( (t_max-t_75)), funcid ) ]
|
||||
= FunctionDispersionData( n, t_sum, t_min, t_25, t_50, t_75, t_max );
|
||||
}
|
||||
|
||||
if ( alldata.params.create_csv ) {
|
||||
|
||||
map< Triple, FunctionData, ltTriple >::const_iterator it= alldata.functionDurationSectionMapPerRank.begin();
|
||||
map< Triple, FunctionData, ltTriple >::const_iterator itend= alldata.functionDurationSectionMapPerRank.end();
|
||||
|
||||
uint64_t rank= it->first.a;
|
||||
uint64_t funcid= it->first.b;
|
||||
|
||||
map< uint64_t, FunctionData >::const_iterator iter_funcMapGlobal;
|
||||
iter_funcMapGlobal=alldata.functionMapGlobal.find( funcid );
|
||||
assert( iter_funcMapGlobal != alldata.functionMapGlobal.end() );
|
||||
|
||||
uint64_t n= iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
uint64_t n_temp= 0;
|
||||
uint64_t n_25= n/4;
|
||||
uint64_t n_50= n/2;
|
||||
uint64_t n_75= (3*n)/4;
|
||||
|
||||
double t_min=iter_funcMapGlobal->second.excl_time.min;
|
||||
double t_max=iter_funcMapGlobal->second.excl_time.max;
|
||||
double t_sum=iter_funcMapGlobal->second.excl_time.sum;
|
||||
|
||||
double t_25= 0.0;
|
||||
double t_50= 0.0;
|
||||
double t_75= 0.0;
|
||||
|
||||
for ( ; it != itend; ++it ) {
|
||||
|
||||
//cerr << " funcid " << funcid << endl;
|
||||
|
||||
if ( funcid != it->first.a ) {
|
||||
|
||||
/*
|
||||
cerr << " function: " << funcid << " , n: " << n <<
|
||||
" , t_sum: " << t_sum << " , t_min: " << t_min <<
|
||||
" , t_25: " << t_25 << " , t_50: " << t_50 <<
|
||||
" , t_75: " << t_75 << " , t_max: " << t_max << endl;
|
||||
*/
|
||||
|
||||
alldata.functionDispersionMapPerRank[ Triple( (uint64_t)((t_max/t_75)*100), funcid, rank ) ]
|
||||
= FunctionDispersionData( n, t_sum, t_min, t_25, t_50, t_75, t_max );
|
||||
|
||||
rank= it->first.a;
|
||||
funcid= it->first.b;
|
||||
|
||||
iter_funcMapGlobal=alldata.functionMapGlobal.find( funcid );
|
||||
assert( iter_funcMapGlobal != alldata.functionMapGlobal.end() );
|
||||
n= iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
n_temp= 0;
|
||||
n_25= n/4;
|
||||
n_50= n/2;
|
||||
n_75= (3*n)/4;
|
||||
|
||||
t_min=iter_funcMapGlobal->second.excl_time.min;
|
||||
t_max=iter_funcMapGlobal->second.excl_time.max;
|
||||
t_sum=iter_funcMapGlobal->second.excl_time.sum;
|
||||
t_25= 0.0;
|
||||
t_50= 0.0;
|
||||
t_75= 0.0;
|
||||
|
||||
}
|
||||
|
||||
n_temp+= it->second.count.sum;
|
||||
|
||||
/* determine lower quartile, median, and upper quartile */
|
||||
|
||||
if ( 0.0 == t_75 ) {
|
||||
|
||||
if ( n_temp >= n_75 ) {
|
||||
t_75= ( it->second.excl_time.max - it->second.excl_time.min ) / 2 + it->second.excl_time.min ;
|
||||
}
|
||||
|
||||
if ( 0.0 == t_50 ) {
|
||||
|
||||
if ( n_temp >= n_50 ) {
|
||||
t_50= ( it->second.excl_time.max - it->second.excl_time.min ) / 2 + it->second.excl_time.min ;
|
||||
}
|
||||
|
||||
if ( 0.0 == t_25 ) {
|
||||
|
||||
if ( n_temp >= n_25 ) {
|
||||
t_25= ( it->second.excl_time.max - it->second.excl_time.min ) / 2 + it->second.excl_time.min ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
alldata.functionDispersionMap[Pair((uint64_t) ((t_max - t_75)),
|
||||
funcid)] = FunctionDispersionData(n, t_sum, t_min, t_25,
|
||||
t_50, t_75, t_95, t_max);
|
||||
if (alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
alldata.functionDispersionCallpathMap[TripleCallpath(
|
||||
(uint64_t) ((t_max - t_75)), "", funcid)]
|
||||
= FunctionDispersionData(n, t_sum, t_min, t_25, t_50,
|
||||
t_75, t_95, t_max);
|
||||
}
|
||||
|
||||
alldata.functionDispersionMapPerRank[ Triple( (uint64_t)((t_max/t_75)*100), funcid, rank ) ]
|
||||
= FunctionDispersionData( n, t_sum, t_min, t_25, t_50, t_75, t_max );
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
cout << " Size of FunctionDispersionMap: " << alldata.functionDispersionMap.size() << endl;
|
||||
|
||||
cout << endl << " global function dispersion: " << endl;
|
||||
if ((alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
&& (alldata.params.dispersion.enabled))
|
||||
{
|
||||
|
||||
map< Pair, FunctionDispersionData, gtPair >::const_iterator it= alldata.functionDispersionMap.begin();
|
||||
map< Pair, FunctionDispersionData, gtPair >::const_iterator itend= alldata.functionDispersionMap.end();
|
||||
|
||||
while ( itend != it ) {
|
||||
|
||||
cout << " dispersion " << it->first.a << " global function " << it->first.b << " -> ";
|
||||
|
||||
if ( it->second.count ) {
|
||||
cout << "\t" <<
|
||||
" tmin: " << it->second.excl_time_minimum <<
|
||||
"\t t_25: " << it->second.excl_time_low_quartile <<
|
||||
"\t tmed: " << it->second.excl_time_median <<
|
||||
"\t t_75: " << it->second.excl_time_top_quartile <<
|
||||
"\t tmax: " << it->second.excl_time_maximum <<
|
||||
"\t tavg: " << it->second.excl_time_sum / it->second.count << endl;
|
||||
map<uint64_t, uint64_t> dispBorder;
|
||||
|
||||
map<TripleCallpath, uint32_t, ltTripleCallpath> function_filter;
|
||||
|
||||
map<TripleCallpath, FunctionData, ltTripleCallpath>::const_iterator
|
||||
it =
|
||||
alldata.functionDurationSectionCallpathMapGlobal.begin();
|
||||
map<TripleCallpath, FunctionData, ltTripleCallpath>::const_iterator
|
||||
itend =
|
||||
alldata.functionDurationSectionCallpathMapGlobal.end();
|
||||
|
||||
uint64_t funcid = it->first.a;
|
||||
string callpath = it->second.callpath;
|
||||
|
||||
map<PairCallpath, FunctionData>::const_iterator
|
||||
iter_funcCallpathMapGlobal;
|
||||
iter_funcCallpathMapGlobal
|
||||
= alldata.functionCallpathMapGlobal.find(PairCallpath(
|
||||
funcid, callpath));
|
||||
assert( iter_funcCallpathMapGlobal != alldata.functionCallpathMapGlobal.end() );
|
||||
|
||||
uint64_t n = iter_funcCallpathMapGlobal->second.count.sum;
|
||||
|
||||
uint64_t n_temp = 0;
|
||||
uint64_t n_25 = n / 4;
|
||||
uint64_t n_50 = n / 2;
|
||||
uint64_t n_75 = (3 * n) / 4;
|
||||
uint64_t n_95 = (19 * n) / 20;
|
||||
|
||||
double t_min =
|
||||
iter_funcCallpathMapGlobal->second.DISPERSION_OPTION.min;
|
||||
double t_max =
|
||||
iter_funcCallpathMapGlobal->second.DISPERSION_OPTION.max;
|
||||
double t_sum =
|
||||
iter_funcCallpathMapGlobal->second.DISPERSION_OPTION.sum;
|
||||
|
||||
double t_25 = 0.0;
|
||||
double t_50 = 0.0;
|
||||
double t_75 = 0.0;
|
||||
double t_95 = 0.0;
|
||||
|
||||
count = 0;
|
||||
|
||||
uint64_t callpathcount = 0;
|
||||
for (; it != itend; ++it)
|
||||
{
|
||||
count++;
|
||||
//cerr << " funcid " << funcid << endl;
|
||||
if (funcid != it->first.a || callpath != it->first.b)
|
||||
{
|
||||
|
||||
alldata.functionDispersionCallpathMap[TripleCallpath(
|
||||
(uint64_t) ((t_max - t_75)), callpath, funcid)]
|
||||
= FunctionDispersionData(n, t_sum, t_min, t_25,
|
||||
t_50, t_75, t_95, t_max);
|
||||
function_filter[TripleCallpath((uint64_t) ((t_max - t_75)),
|
||||
callpath, funcid)] = n;
|
||||
|
||||
funcid = it->first.a;
|
||||
callpath = it->first.b;
|
||||
iter_funcCallpathMapGlobal
|
||||
= alldata.functionCallpathMapGlobal.find(
|
||||
PairCallpath(funcid, callpath));
|
||||
assert( iter_funcCallpathMapGlobal != alldata.functionCallpathMapGlobal.end() );
|
||||
|
||||
if ( n > MIN_CPATH_COUNT ){
|
||||
|
||||
callpathcount += n;
|
||||
|
||||
}
|
||||
|
||||
n = iter_funcCallpathMapGlobal->second.count.sum;
|
||||
|
||||
n_temp = 0;
|
||||
n_25 = n / 4;
|
||||
n_50 = n / 2;
|
||||
n_75 = (3 * n) / 4;
|
||||
n_95 = (19 * n) / 20;
|
||||
t_min
|
||||
= iter_funcCallpathMapGlobal->second.DISPERSION_OPTION.min;
|
||||
t_max
|
||||
= iter_funcCallpathMapGlobal->second.DISPERSION_OPTION.max;
|
||||
t_sum
|
||||
= iter_funcCallpathMapGlobal->second.DISPERSION_OPTION.sum;
|
||||
t_25 = 0.0;
|
||||
t_50 = 0.0;
|
||||
t_75 = 0.0;
|
||||
t_95 = 0.0;
|
||||
}
|
||||
|
||||
it++;
|
||||
|
||||
n_temp += it->second.count.sum;
|
||||
|
||||
/* determine lower quartile, median, and upper quartile */
|
||||
if (0.0 == t_95)
|
||||
{
|
||||
|
||||
if (n_temp >= n_95)
|
||||
{
|
||||
t_95 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min) / 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
if (0.0 == t_75)
|
||||
{
|
||||
|
||||
if (n_temp >= n_75)
|
||||
{
|
||||
t_75 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min) / 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
if (0.0 == t_50)
|
||||
{
|
||||
|
||||
if (n_temp >= n_50)
|
||||
{
|
||||
t_50 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min) / 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
if (0.0 == t_25)
|
||||
{
|
||||
|
||||
if (n_temp >= n_25)
|
||||
{
|
||||
t_25 = (it->second.DISPERSION_OPTION.max
|
||||
- it->second.DISPERSION_OPTION.min)
|
||||
/ 2
|
||||
+ it->second.DISPERSION_OPTION.min;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
alldata.functionDispersionCallpathMap[TripleCallpath(
|
||||
(uint64_t) ((t_max - t_75)), callpath, funcid)]
|
||||
= FunctionDispersionData(n, t_sum, t_min, t_25, t_50, t_75,
|
||||
t_95, t_max);
|
||||
|
||||
function_filter[TripleCallpath((uint64_t) ((t_max - t_75)),
|
||||
callpath, funcid)] = n;
|
||||
if (n > MIN_CPATH_COUNT){
|
||||
|
||||
callpathcount += n;
|
||||
|
||||
}
|
||||
|
||||
{
|
||||
// set filter and dispersion border
|
||||
map<TripleCallpath, uint32_t, ltTripleCallpath>::const_iterator
|
||||
iter = function_filter.begin();
|
||||
map<TripleCallpath, uint32_t, ltTripleCallpath>::const_iterator
|
||||
iter_end = function_filter.end();
|
||||
|
||||
|
||||
uint32_t k=0;
|
||||
callpathcount = (callpathcount *
|
||||
((double)alldata.params.dispersion.reduction/100));
|
||||
|
||||
for (uint64_t i = 0; iter != iter_end; i++)
|
||||
{
|
||||
|
||||
if (k <= callpathcount && iter->second > MIN_CPATH_COUNT &&
|
||||
( std::count(iter->first.b.begin(), iter->first.b.end(), ' ')
|
||||
< RFG_FILTER_MAX_CPATH_SIZE )
|
||||
){
|
||||
k += iter->second;
|
||||
alldata.functionDispersionCallpathMap[TripleCallpath(
|
||||
iter->first.a, iter->first.b,
|
||||
iter->first.c)].addFilterRule(0);
|
||||
|
||||
} else
|
||||
{
|
||||
dispBorder[iter->first.a] = iter->first.c;
|
||||
if (dispBorder.size() > 50)
|
||||
dispBorder.erase(--(dispBorder.end()));
|
||||
|
||||
}
|
||||
iter++;
|
||||
}
|
||||
}
|
||||
|
||||
alldata.dispersionMarkerBorder = (--(dispBorder.end()))->first;
|
||||
}
|
||||
*/
|
||||
|
||||
if (alldata.params.create_csv)
|
||||
{
|
||||
|
||||
map<Triple, FunctionData, ltTriple>::const_iterator it =
|
||||
alldata.functionDurationSectionMapPerRank.begin();
|
||||
map<Triple, FunctionData, ltTriple>::const_iterator itend =
|
||||
alldata.functionDurationSectionMapPerRank.end();
|
||||
|
||||
uint64_t rank = it->first.a;
|
||||
uint64_t funcid = it->first.b;
|
||||
|
||||
map<uint64_t, FunctionData>::const_iterator iter_funcMapGlobal;
|
||||
iter_funcMapGlobal = alldata.functionMapGlobal.find(funcid);
|
||||
assert( iter_funcMapGlobal != alldata.functionMapGlobal.end() );
|
||||
|
||||
uint64_t n = iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
uint64_t n_temp = 0;
|
||||
uint64_t n_25 = n / 4;
|
||||
uint64_t n_50 = n / 2;
|
||||
uint64_t n_75 = (3 * n) / 4;
|
||||
uint64_t n_95 = (19 * n) / 20;
|
||||
|
||||
double t_min = iter_funcMapGlobal->second.excl_time.min;
|
||||
double t_max = iter_funcMapGlobal->second.excl_time.max;
|
||||
double t_sum = iter_funcMapGlobal->second.excl_time.sum;
|
||||
|
||||
double t_25 = 0.0;
|
||||
double t_50 = 0.0;
|
||||
double t_75 = 0.0;
|
||||
double t_95 = 0.0;
|
||||
for (; it != itend; ++it)
|
||||
{
|
||||
|
||||
//cerr << " funcid " << funcid << endl;
|
||||
if (funcid != it->first.a)
|
||||
{
|
||||
|
||||
/*
|
||||
cerr << " function: " << funcid << " , n: " << n <<
|
||||
" , t_sum: " << t_sum << " , t_min: " << t_min <<
|
||||
" , t_25: " << t_25 << " , t_50: " << t_50 <<
|
||||
" , t_75: " << t_75 << " , t_max: " << t_max << endl;
|
||||
*/
|
||||
|
||||
alldata.functionDispersionMapPerRank[Triple(
|
||||
(uint64_t) ((t_max / t_75) * 100), funcid, rank)]
|
||||
= FunctionDispersionData(n, t_sum, t_min, t_25,
|
||||
t_50, t_75, t_95, t_max);
|
||||
|
||||
rank = it->first.a;
|
||||
funcid = it->first.b;
|
||||
|
||||
iter_funcMapGlobal = alldata.functionMapGlobal.find(funcid);
|
||||
assert( iter_funcMapGlobal != alldata.functionMapGlobal.end() );
|
||||
n = iter_funcMapGlobal->second.count.sum;
|
||||
|
||||
n_temp = 0;
|
||||
n_25 = n / 4;
|
||||
n_50 = n / 2;
|
||||
n_75 = (3 * n) / 4;
|
||||
n_95 = (19 * n) / 20;
|
||||
t_min = iter_funcMapGlobal->second.excl_time.min;
|
||||
t_max = iter_funcMapGlobal->second.excl_time.max;
|
||||
t_sum = iter_funcMapGlobal->second.excl_time.sum;
|
||||
t_25 = 0.0;
|
||||
t_50 = 0.0;
|
||||
t_75 = 0.0;
|
||||
t_95 = 0.0;
|
||||
}
|
||||
|
||||
n_temp += it->second.count.sum;
|
||||
|
||||
/* determine lower quartile, median, and upper quartile */
|
||||
if (0.0 == t_95)
|
||||
{
|
||||
|
||||
if (n_temp >= n_95)
|
||||
{
|
||||
t_95 = (it->second.excl_time.max
|
||||
- it->second.excl_time.min) / 2
|
||||
+ it->second.excl_time.min;
|
||||
}
|
||||
if (0.0 == t_75)
|
||||
{
|
||||
|
||||
if (n_temp >= n_75)
|
||||
{
|
||||
t_75 = (it->second.excl_time.max
|
||||
- it->second.excl_time.min) / 2
|
||||
+ it->second.excl_time.min;
|
||||
}
|
||||
|
||||
if (0.0 == t_50)
|
||||
{
|
||||
|
||||
if (n_temp >= n_50)
|
||||
{
|
||||
t_50 = (it->second.excl_time.max
|
||||
- it->second.excl_time.min) / 2
|
||||
+ it->second.excl_time.min;
|
||||
}
|
||||
|
||||
if (0.0 == t_25)
|
||||
{
|
||||
|
||||
if (n_temp >= n_25)
|
||||
{
|
||||
t_25 = (it->second.excl_time.max
|
||||
- it->second.excl_time.min) / 2
|
||||
+ it->second.excl_time.min;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
alldata.functionDispersionMapPerRank[Triple((uint64_t) ((t_max
|
||||
/ t_75) * 100), funcid, rank)] = FunctionDispersionData(n,
|
||||
t_sum, t_min, t_25, t_50, t_75, t_95, t_max);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
//alldata.functionDurationSectionMapGlobal.clear();
|
||||
|
||||
|
@ -18,14 +18,14 @@ enum { FENCE= 0xDEADBEEF };
|
||||
|
||||
|
||||
/* pack the local alldata into a buffer, return buffer */
|
||||
static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
static char* pack_worker_data( AllData& alldata, uint32_t sizes[12] ) {
|
||||
|
||||
uint64_t fence= FENCE;
|
||||
uint32_t num_fences= 1;
|
||||
|
||||
/* get the sizes of all parts that need to be transmitted */
|
||||
|
||||
for ( uint32_t i= 1; i < 10; i++ ) {
|
||||
for ( uint32_t i= 1; i < 12; i++ ) {
|
||||
|
||||
sizes[i]= 0;
|
||||
}
|
||||
@ -46,7 +46,13 @@ static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
num_fences++;
|
||||
sizes[7]= alldata.collectiveMapPerGroup.size(); /* map< Pair, CollectiveData, ltPair > collectiveMapPerGroup; */
|
||||
num_fences++;
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
sizes[10]= alldata.functionDurationSectionCallpathMapGlobal.size(); /* map< TripleCallpath, FunctionData, ltTripleCallpath > functionDurationSectionCallpathMapGlobal; */
|
||||
num_fences++;
|
||||
sizes[11]= alldata.functionCallpathMapGlobal.size(); /* map< PairCallpath, FunctionData, ltPairCallpath > */
|
||||
num_fences++;
|
||||
}
|
||||
}
|
||||
|
||||
if ( alldata.params.clustering.enabled ) {
|
||||
@ -54,8 +60,8 @@ static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
sizes[8]= alldata.functionMapPerRank.size(); /* map< Pair, FunctionData, ltPair > */
|
||||
num_fences++;
|
||||
}
|
||||
|
||||
if ( alldata.params.create_marker ) {
|
||||
|
||||
if ( alldata.params.dispersion.enabled) {
|
||||
|
||||
sizes[9]= alldata.functionMinMaxLocationMap.size(); /* map< uint64_t, FunctionMinMaxLocactionData > functionMinMaxLocationMap; */
|
||||
num_fences++;
|
||||
@ -103,7 +109,36 @@ static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
MPI_Pack_size( sizes[9] * 7, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
|
||||
MPI_Pack_size( sizes[9] * 0, MPI_DOUBLE, MPI_COMM_WORLD, &s2 );
|
||||
bytesize += s1 + s2;
|
||||
|
||||
|
||||
|
||||
MPI_Pack_size( sizes[10] * 9, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
|
||||
MPI_Pack_size( sizes[10] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 );
|
||||
bytesize += s1 + s2;
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator it= alldata.functionDurationSectionCallpathMapGlobal.begin();
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator itend= alldata.functionDurationSectionCallpathMapGlobal.end();
|
||||
|
||||
for ( ; it != itend; ++it ) {
|
||||
MPI_Pack_size( it->first.b.length(), MPI_CHAR, MPI_COMM_WORLD, &s1 );
|
||||
bytesize += s1;
|
||||
}
|
||||
|
||||
MPI_Pack_size( sizes[11] * 8, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 );
|
||||
MPI_Pack_size( sizes[11] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 );
|
||||
bytesize += s1 + s2;
|
||||
|
||||
{
|
||||
map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator it= alldata.functionCallpathMapGlobal.begin();
|
||||
map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator itend= alldata.functionCallpathMapGlobal.end();
|
||||
for ( ; it != itend; ++it ) {
|
||||
MPI_Pack_size( it->second.callpath.length(), MPI_CHAR, MPI_COMM_WORLD, &s1 );
|
||||
bytesize += s1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* get the buffer */
|
||||
sizes[0]= bytesize;
|
||||
char* buffer= alldata.guaranteePackBuffer( bytesize );
|
||||
@ -145,6 +180,40 @@ static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* pack functionCallpathMapGlobal */
|
||||
{
|
||||
map< PairCallpath, FunctionData, ltPairCallpath >::const_iterator it= alldata.functionCallpathMapGlobal.begin();
|
||||
map< PairCallpath, FunctionData, ltPairCallpath>::const_iterator itend= alldata.functionCallpathMapGlobal.end();
|
||||
uint64_t len;
|
||||
for ( ; it != itend; ++it ) {
|
||||
len = it->second.callpath.length();
|
||||
MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &len, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) it->first.b.c_str(), len, MPI_CHAR, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
}
|
||||
alldata.functionCallpathMapGlobal.clear();
|
||||
}
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
}
|
||||
/* pack functionDurationSectionMapGlobal */
|
||||
{
|
||||
map< Pair, FunctionData, ltPair >::const_iterator it= alldata.functionDurationSectionMapGlobal.begin();
|
||||
@ -359,6 +428,46 @@ static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* pack functionDurationSectionCallpathMapGlobal*/
|
||||
|
||||
{
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator it= alldata.functionDurationSectionCallpathMapGlobal.begin();
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator itend= alldata.functionDurationSectionCallpathMapGlobal.end();
|
||||
uint64_t len = 0;
|
||||
|
||||
for ( ; it != itend; ++it ) {
|
||||
len = it->second.callpath.length();
|
||||
MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &len, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->first.c, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) it->second.callpath.c_str(), len, MPI_CHAR, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
}
|
||||
alldata.functionDurationSectionMapGlobal.clear();
|
||||
|
||||
}
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
}
|
||||
}
|
||||
|
||||
if ( alldata.params.clustering.enabled ) {
|
||||
@ -398,8 +507,50 @@ static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
}
|
||||
|
||||
if ( alldata.params.clustering.enabled ) {
|
||||
|
||||
/* pack functionCallpathMapPerRank */
|
||||
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator it= alldata.functionCallpathMapPerRank.begin();
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator itend= alldata.functionCallpathMapPerRank.end();
|
||||
uint64_t len=0;
|
||||
for ( ; it != itend; ++it ) {
|
||||
len = it->first.b.length();
|
||||
MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &len, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->first.c, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
MPI_Pack( (void*) it->first.b.c_str(), len, MPI_CHAR, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
|
||||
}
|
||||
|
||||
/* in case of producing CSV output do not clear map because it is
|
||||
needed later */
|
||||
if ( !alldata.params.create_csv ) {
|
||||
|
||||
alldata.functionCallpathMapPerRank.clear();
|
||||
}
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD );
|
||||
}
|
||||
|
||||
if ( alldata.params.create_marker ) {
|
||||
if ( alldata.params.dispersion.enabled ) {
|
||||
|
||||
/* pack functionMinMaxLocationMap */
|
||||
|
||||
@ -426,7 +577,7 @@ static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
|
||||
|
||||
/* prepare alldata for unpack, return buffer of sufficient size */
|
||||
static char* prepare_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
static char* prepare_worker_data( AllData& alldata, uint32_t sizes[12] ) {
|
||||
|
||||
uint32_t bytesize= sizes[0];
|
||||
|
||||
@ -434,7 +585,7 @@ static char* prepare_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
}
|
||||
|
||||
/* unpack the received worker data and add it to the local alldata */
|
||||
static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
static void unpack_worker_data( AllData& alldata, uint32_t sizes[12] ) {
|
||||
|
||||
uint64_t fence;
|
||||
|
||||
@ -447,6 +598,10 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
|
||||
|
||||
/* chararray for unpacking the callpath */
|
||||
char* callpath = (char*) malloc(alldata.maxCallpathLength * sizeof(char));
|
||||
|
||||
if ( alldata.params.create_tex ) {
|
||||
|
||||
/* unpack functionMapGlobal */
|
||||
@ -474,12 +629,46 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
|
||||
alldata.functionMapGlobal[ func ].add( tmp );
|
||||
}
|
||||
|
||||
/* extra check that doesn't cost too much */
|
||||
fence= 0;
|
||||
MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* unpack functionCallpathMapGlobal */
|
||||
for ( uint32_t i= 0; i < sizes[11]; i++ ) {
|
||||
|
||||
uint64_t func;
|
||||
uint64_t len;
|
||||
FunctionData tmp;
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, &func, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &len, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, callpath, len, MPI_CHAR, MPI_COMM_WORLD );
|
||||
tmp.callpath = callpath;
|
||||
tmp.callpath = tmp.callpath.substr (0,len);
|
||||
alldata.functionCallpathMapGlobal[ PairCallpath(func,tmp.callpath) ].add( tmp );
|
||||
}
|
||||
/* extra check that doesn't cost too much */
|
||||
fence= 0;
|
||||
MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
}
|
||||
/* unpack functionDurationSectionMapGlobal */
|
||||
for ( uint32_t i= 0; i < sizes[2]; i++ ) {
|
||||
|
||||
@ -710,6 +899,47 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
fence= 0;
|
||||
MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
/* unpack functionDurationSectionCallpathMapGlobal */
|
||||
for ( uint32_t i= 0; i < sizes[10]; i++ ) {
|
||||
|
||||
uint64_t func;
|
||||
uint64_t bin;
|
||||
uint64_t len;
|
||||
FunctionData tmp;
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, &func, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &len, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &bin, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD );
|
||||
MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Unpack( buffer, sizes[0], &position, callpath, len, MPI_CHAR, MPI_COMM_WORLD );
|
||||
tmp.callpath = callpath;
|
||||
tmp.callpath = tmp.callpath.substr (0,len);
|
||||
alldata.functionDurationSectionCallpathMapGlobal[ TripleCallpath( func, tmp.callpath,bin ) ].add( tmp );
|
||||
|
||||
}
|
||||
|
||||
fence= 0;
|
||||
MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
}
|
||||
}
|
||||
|
||||
if ( alldata.params.clustering.enabled ) {
|
||||
@ -747,8 +977,8 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
}
|
||||
|
||||
if ( alldata.params.create_marker ) {
|
||||
|
||||
if ( alldata.params.dispersion.enabled) {
|
||||
|
||||
/* unpack functionMinMaxLocationMap */
|
||||
for ( uint32_t i= 0; i < sizes[9]; i++) {
|
||||
@ -772,6 +1002,8 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
||||
MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD );
|
||||
assert( FENCE == fence );
|
||||
}
|
||||
/* free the callpath chararray */
|
||||
delete callpath;
|
||||
}
|
||||
|
||||
|
||||
@ -813,14 +1045,14 @@ bool ReduceData( AllData& alldata ) {
|
||||
}
|
||||
|
||||
/* send to smaller peer, receive from larger one */
|
||||
uint32_t sizes[10];
|
||||
uint32_t sizes[12];
|
||||
char* buffer;
|
||||
|
||||
if ( alldata.myRank < peer ) {
|
||||
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD,
|
||||
MPI_Recv( sizes, 12, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD,
|
||||
&status );
|
||||
|
||||
// DEBUG
|
||||
@ -860,7 +1092,7 @@ bool ReduceData( AllData& alldata ) {
|
||||
"round %u / %u: sending %u bytes to rank %u\n",
|
||||
round_no, num_rounds, sizes[0], peer );
|
||||
|
||||
MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
|
||||
MPI_Send( sizes, 12, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5,
|
||||
MPI_COMM_WORLD );
|
||||
@ -891,14 +1123,15 @@ bool ReduceData( AllData& alldata ) {
|
||||
bool ReduceDataDispersion( AllData& alldata ) {
|
||||
|
||||
bool error= false;
|
||||
|
||||
|
||||
assert( 1 < alldata.numRanks );
|
||||
|
||||
/* start runtime measurement for reducing data */
|
||||
StartMeasurement( alldata, 1, true, "reduce data dispersion" );
|
||||
|
||||
VerbosePrint( alldata, 1, true, "reducing data dispersion\n" );
|
||||
|
||||
|
||||
|
||||
/* implement reduction myself because MPI and C++ STL don't play with
|
||||
each other */
|
||||
|
||||
@ -906,6 +1139,7 @@ bool ReduceDataDispersion( AllData& alldata ) {
|
||||
uint32_t num_rounds= Logi( alldata.numRanks ) -1;
|
||||
uint32_t round_no= 0;
|
||||
uint32_t round= 1;
|
||||
|
||||
while ( round < alldata.numRanks ) {
|
||||
|
||||
round_no++;
|
||||
@ -926,25 +1160,25 @@ bool ReduceDataDispersion( AllData& alldata ) {
|
||||
}
|
||||
|
||||
/* send to smaller peer, receive from larger one */
|
||||
uint32_t sizes[10];
|
||||
uint32_t sizes[12];
|
||||
char* buffer;
|
||||
|
||||
if ( alldata.myRank < peer ) {
|
||||
|
||||
MPI_Status status;
|
||||
|
||||
MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD,
|
||||
MPI_Recv( sizes, 12, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD,
|
||||
&status );
|
||||
|
||||
// DEBUG
|
||||
//cout << " round " << round << " recv " << peer << "--> " <<
|
||||
//alldata.myRank << " with " <<
|
||||
//sizes[0] << " bytes, " <<
|
||||
//sizes[1] << ", " <<
|
||||
//sizes[2] << ", " <<
|
||||
//sizes[3] << ", " <<
|
||||
//sizes[4] << "" << endl << flush;
|
||||
|
||||
/* cout << " round " << round << " recv " << peer << "--> " <<
|
||||
alldata.myRank << " with " <<
|
||||
sizes[0] << " bytes, " <<
|
||||
sizes[1] << ", " <<
|
||||
sizes[2] << ", " <<
|
||||
sizes[3] << ", " <<
|
||||
sizes[4] << "" << endl << flush;
|
||||
*/
|
||||
buffer= prepare_worker_data( alldata, sizes );
|
||||
|
||||
VerbosePrint( alldata, 2, false,
|
||||
@ -960,6 +1194,8 @@ bool ReduceDataDispersion( AllData& alldata ) {
|
||||
|
||||
/* don't reduce function map global twice */
|
||||
alldata.functionMapGlobal.clear();
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
alldata.functionCallpathMapGlobal.clear();
|
||||
|
||||
buffer= pack_worker_data( alldata, sizes );
|
||||
|
||||
@ -976,7 +1212,7 @@ bool ReduceDataDispersion( AllData& alldata ) {
|
||||
"round %u / %u: sending %u bytes to rank %u\n",
|
||||
round_no, num_rounds, sizes[0], peer );
|
||||
|
||||
MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
|
||||
MPI_Send( sizes, 12, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
|
||||
|
||||
MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5,
|
||||
MPI_COMM_WORLD );
|
||||
@ -988,7 +1224,6 @@ bool ReduceDataDispersion( AllData& alldata ) {
|
||||
|
||||
round= round << 1;
|
||||
}
|
||||
|
||||
alldata.freePackBuffer();
|
||||
|
||||
/* synchronize error indicator with workers */
|
||||
|
@ -225,6 +225,28 @@ bool SummarizeData( AllData& alldata ) {
|
||||
|
||||
alldata.functionMapPerRank.clear();
|
||||
}
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator it= alldata.functionCallpathMapPerRank.begin();
|
||||
map< TripleCallpath, FunctionData, ltTripleCallpath >::const_iterator itend= alldata.functionCallpathMapPerRank.end();
|
||||
while ( itend != it ) {
|
||||
|
||||
const uint64_t& func= it->first.c;
|
||||
const string callpath= it->first.b;
|
||||
|
||||
alldata.functionCallpathMapGlobal[ PairCallpath(func,callpath) ].add( it->second );
|
||||
it++;
|
||||
}
|
||||
|
||||
/* in case of additional clustering or producing CSV output do not
|
||||
clear map ( rank x func ) because it is needed later */
|
||||
if ( !alldata.params.clustering.enabled &&
|
||||
!alldata.params.create_csv ) {
|
||||
|
||||
alldata.functionCallpathMapPerRank.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* summarize map ( rank x func x counter ) to map ( counter x func ) */
|
||||
@ -400,12 +422,26 @@ bool SummarizeDataDispersion( AllData& alldata ) {
|
||||
it++;
|
||||
}
|
||||
|
||||
if(alldata.params.dispersion.mode == DISPERSION_MODE_PERCALLPATH)
|
||||
{
|
||||
map< Quadruple, FunctionData, ltQuadruple>::const_iterator itc= alldata.functionDurationSectionCallpathMapPerRank.begin();
|
||||
map< Quadruple, FunctionData, ltQuadruple>::const_iterator itendc= alldata.functionDurationSectionCallpathMapPerRank.end();
|
||||
while ( itendc != itc ) {
|
||||
|
||||
const uint64_t& func= itc->first.b;
|
||||
const string callpath= itc->first.c;
|
||||
const uint64_t& bin= itc->first.d;
|
||||
|
||||
alldata.functionDurationSectionCallpathMapGlobal[ TripleCallpath( func, callpath, bin ) ].add( itc->second );
|
||||
itc++;
|
||||
}
|
||||
}
|
||||
/* in case of producing CSV output do not clear map ( rank x func x bin )
|
||||
because it is needed later */
|
||||
if ( !alldata.params.create_csv ) {
|
||||
|
||||
alldata.functionDurationSectionMapPerRank.clear();
|
||||
|
||||
alldata.functionDurationSectionCallpathMapPerRank.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,11 +13,13 @@
|
||||
#ifndef _VT_INTTYPES_H
|
||||
#define _VT_INTTYPES_H
|
||||
|
||||
#ifndef _CONFIG_H
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#else /* HAVE_CONFIG_H */
|
||||
# define HAVE_STDINT_H @HAVE_STDINT_H@
|
||||
# define HAVE_INTTYPES_H @HAVE_INTTYPES_H@
|
||||
# define SIZEOF_LONG @SIZEOF_LONG@
|
||||
#endif /* _CONFIG_H */
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
|
||||
#if defined(HAVE_STDINT_H) && HAVE_STDINT_H && !defined(__sgi)
|
||||
# include <stdint.h>
|
||||
|
@ -1,6 +1,6 @@
|
||||
noinst_LTLIBRARIES = libvt_rfg.la
|
||||
|
||||
INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include
|
||||
INCLUDES = -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/util -I$(top_builddir)/util
|
||||
|
||||
libvt_rfg_la_CFLAGS = -static -prefer-pic
|
||||
libvt_rfg_la_LDFLAGS = -static
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -7,40 +7,87 @@
|
||||
#define RFG_FILTER_FLAG_GROUP 1
|
||||
#define RFG_FILTER_FLAG_RECURSIVE 2
|
||||
|
||||
/* max. number of regions in a call path */
|
||||
#define RFG_FILTER_MAX_CPATH_SIZE 0x80
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
typedef struct RFG_Filter_struct RFG_Filter;
|
||||
|
||||
/* initalizes RFG filter object */
|
||||
/* data structure for call-path filter rules */
|
||||
|
||||
typedef struct RFG_FilterCallPathRules_struct
|
||||
{
|
||||
/* hash value of region id array */
|
||||
uint32_t hash;
|
||||
|
||||
/* number of region ids in call path */
|
||||
uint32_t size;
|
||||
|
||||
/* array of region ids in call-path */
|
||||
uint32_t regionIds[RFG_FILTER_MAX_CPATH_SIZE];
|
||||
|
||||
/* call limit */
|
||||
int32_t callLimit;
|
||||
|
||||
} RFG_FilterCallPathRules;
|
||||
|
||||
/* initializes RFG filter object */
|
||||
RFG_Filter* RFG_Filter_init( void );
|
||||
|
||||
/* cleanup RFG filter object */
|
||||
int RFG_Filter_free( RFG_Filter* filter );
|
||||
|
||||
/* reset filter assignments */
|
||||
/* reset filter rules */
|
||||
int RFG_Filter_reset( RFG_Filter* filter );
|
||||
|
||||
/* sets filter definition file name */
|
||||
int RFG_Filter_setDefFile( RFG_Filter* filter, const char* deffile );
|
||||
/* sets pointer to a external function which generates region ids */
|
||||
int RFG_Filter_setRegionIdGenFunc( RFG_Filter* filter, uint32_t (*func)(void) );
|
||||
|
||||
/* sets default call limit */
|
||||
int RFG_Filter_setDefaultCallLimit( RFG_Filter* filter, int32_t limit );
|
||||
/* gets region id by region name, if it's generated during reading call-path
|
||||
filter rules */
|
||||
uint32_t RFG_Filter_getRegionId( RFG_Filter* filter, const char* regionName );
|
||||
|
||||
/* sets filter definition file name */
|
||||
int RFG_Filter_setDefFile( RFG_Filter* filter, const char* fileName );
|
||||
|
||||
/* reads region filter definition file
|
||||
if rank != -1, read file with MPI-rank specific entries,
|
||||
if ( 0 != rank_off ) after the call, then tracing should be disabled
|
||||
if isRankOff != 0 after the call, then tracing should be disabled
|
||||
completely for the current rank, existing information should be discarded. */
|
||||
int RFG_Filter_readDefFile( RFG_Filter* filter, int rank, uint8_t* rank_off );
|
||||
int RFG_Filter_readDefFile( RFG_Filter* filter, int rank,
|
||||
uint8_t* r_isRankOff );
|
||||
|
||||
/* adds filter assignment */
|
||||
int RFG_Filter_add( RFG_Filter* filter, const char* pattern, int32_t climit,
|
||||
uint32_t* sbounds, uint8_t flags );
|
||||
/* adds region filter rules */
|
||||
int RFG_Filter_addRegionRules( RFG_Filter* filter, const char* pattern,
|
||||
int32_t callLimit, uint32_t* stackBounds,
|
||||
uint8_t flags );
|
||||
|
||||
/* gets call limit, stack level bounds, and flags by region/group name */
|
||||
int RFG_Filter_get( RFG_Filter* filter, const char* rname, const char* gname,
|
||||
int32_t* r_climit, uint32_t* r_sbounds, uint8_t* r_flags );
|
||||
/* gets region filter rules by region/group name */
|
||||
int RFG_Filter_getRegionRules( RFG_Filter* filter, const char* regionName,
|
||||
const char* groupName, int32_t* r_callLimit,
|
||||
uint32_t* r_stackBounds, uint8_t* r_flags );
|
||||
|
||||
/* adds call-path filter rules
|
||||
translates given region names into ids (r_regionIds) and generates
|
||||
a hash value for them (r_hash) */
|
||||
int RFG_Filter_addCallPathRules( RFG_Filter* filter, uint32_t size,
|
||||
const char** regionNames, int32_t callLimit,
|
||||
uint32_t* r_hash, uint32_t** r_regionIds );
|
||||
|
||||
/* gets call-path filter rules (i.e. call limit)
|
||||
Note: other than RFG_Filter_getRegionRules, this function returns 0 if no
|
||||
matching filter rule was found */
|
||||
int RFG_Filter_getCallPathRules( RFG_Filter* filter, uint32_t hash,
|
||||
uint32_t size, const uint32_t* regionIds,
|
||||
int32_t* r_callLimit );
|
||||
|
||||
/* gets all call-path filter rules */
|
||||
int RFG_Filter_getAllCallPathRules( RFG_Filter* filter,
|
||||
uint32_t* r_numRules,
|
||||
RFG_FilterCallPathRules** r_rules );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -9,25 +9,36 @@
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define MAX_LINE_LEN 0x20000 /* max file line length */
|
||||
#define MAX_LINE_LEN 0x20000 /* max. file line length */
|
||||
|
||||
/* data structure for group assignments */
|
||||
|
||||
typedef struct RFG_GroupsAssign_struct
|
||||
{
|
||||
char* group; /* group name */
|
||||
uint32_t npattern; /* number of assigned pattern */
|
||||
char** pattern; /* array of assigned pattern */
|
||||
/* group name */
|
||||
char* group_name;
|
||||
|
||||
/* number of assigned pattern */
|
||||
uint32_t num_pattern;
|
||||
|
||||
/* array of assigned pattern */
|
||||
char** pattern;
|
||||
|
||||
} RFG_GroupsAssign;
|
||||
|
||||
/* main data structure for RFG Groups */
|
||||
|
||||
struct RFG_Groups_struct
|
||||
{
|
||||
char* deffile; /* name of group definition file */
|
||||
/* name of group definition file */
|
||||
char* file_name;
|
||||
|
||||
/* number of group assignments */
|
||||
uint32_t num_assigns;
|
||||
|
||||
/* array of group assignments */
|
||||
RFG_GroupsAssign* assigns;
|
||||
|
||||
uint32_t nassigns; /* number of group assignments */
|
||||
RFG_GroupsAssign* assigns; /* array of group assignments */
|
||||
};
|
||||
|
||||
RFG_Groups* RFG_Groups_init()
|
||||
@ -35,17 +46,7 @@ RFG_Groups* RFG_Groups_init()
|
||||
RFG_Groups* ret;
|
||||
|
||||
/* allocate memory for RFG groups object */
|
||||
|
||||
ret = ( RFG_Groups* )malloc( sizeof( RFG_Groups ) );
|
||||
if( ret == NULL )
|
||||
return NULL;
|
||||
|
||||
/* some initializes of data structure elements */
|
||||
|
||||
ret->deffile = NULL;
|
||||
|
||||
ret->nassigns = 0;
|
||||
ret->assigns = NULL;
|
||||
ret = ( RFG_Groups* )calloc( 1, sizeof( RFG_Groups ) );
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -55,21 +56,22 @@ int RFG_Groups_free( RFG_Groups* groups )
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
|
||||
if( !groups ) return 0;
|
||||
if( !groups )
|
||||
return 0;
|
||||
|
||||
/* free group definition file name */
|
||||
|
||||
if( groups->deffile )
|
||||
free( groups->deffile );
|
||||
if( groups->file_name )
|
||||
free( groups->file_name );
|
||||
|
||||
/* free array of group assignments */
|
||||
|
||||
for( i = 0; i < groups->nassigns; i++ )
|
||||
for( i = 0; i < groups->num_assigns; i++ )
|
||||
{
|
||||
for( j = 0; j < groups->assigns[i].npattern; j++ )
|
||||
for( j = 0; j < groups->assigns[i].num_pattern; j++ )
|
||||
free( groups->assigns[i].pattern[j] );
|
||||
|
||||
free( groups->assigns[i].group );
|
||||
free( groups->assigns[i].group_name );
|
||||
free( groups->assigns[i].pattern );
|
||||
}
|
||||
|
||||
@ -83,18 +85,25 @@ int RFG_Groups_free( RFG_Groups* groups )
|
||||
return 1;
|
||||
}
|
||||
|
||||
int RFG_Groups_setDefFile( RFG_Groups* groups, const char* deffile )
|
||||
int RFG_Groups_setDefFile( RFG_Groups* groups, const char* fileName )
|
||||
{
|
||||
if( !groups ) return 0;
|
||||
if( !groups )
|
||||
return 0;
|
||||
|
||||
if( !fileName || *fileName == '\0' )
|
||||
{
|
||||
fprintf( stderr, "RFG_Groups_setDefFile(): Error: Empty file name\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* if a group definition file already set, then free this */
|
||||
|
||||
if( groups->deffile )
|
||||
free( groups->deffile );
|
||||
if( groups->file_name )
|
||||
free( groups->file_name );
|
||||
|
||||
/* set new group definition file */
|
||||
|
||||
groups->deffile = strdup( deffile );
|
||||
groups->file_name = strdup( fileName );
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -106,23 +115,25 @@ int RFG_Groups_readDefFile( RFG_Groups* groups )
|
||||
uint32_t lineno = 0;
|
||||
uint8_t parse_err = 0;
|
||||
|
||||
if( !groups ) return 0;
|
||||
if( !groups )
|
||||
return 0;
|
||||
|
||||
if( !groups->deffile ) return 1;
|
||||
if( !groups->file_name )
|
||||
return 1;
|
||||
|
||||
/* open group definition file */
|
||||
|
||||
f = fopen( groups->deffile, "r" );
|
||||
f = fopen( groups->file_name, "r" );
|
||||
if( !f )
|
||||
{
|
||||
fprintf( stderr,
|
||||
"RFG_Groups_readDefFile(): Error: Could not open file '%s'\n",
|
||||
groups->deffile );
|
||||
"RFG_Groups_readDefFile(): Error: Could not open file '%s'\n",
|
||||
groups->file_name );
|
||||
return 0;
|
||||
}
|
||||
|
||||
line = ( char* )malloc( MAX_LINE_LEN * sizeof( char ) );
|
||||
if( line == NULL )
|
||||
if( !line )
|
||||
{
|
||||
fclose( f );
|
||||
return 0;
|
||||
@ -148,7 +159,7 @@ int RFG_Groups_readDefFile( RFG_Groups* groups )
|
||||
/* cut possible comment from line */
|
||||
|
||||
p = strchr( line, '#' );
|
||||
if( p != NULL )
|
||||
if( p )
|
||||
*p = '\0';
|
||||
|
||||
/* continue if line is empty */
|
||||
@ -161,7 +172,7 @@ int RFG_Groups_readDefFile( RFG_Groups* groups )
|
||||
*/
|
||||
|
||||
p = strchr( line, '=' );
|
||||
if( p == NULL )
|
||||
if( !p )
|
||||
{
|
||||
parse_err = 1;
|
||||
break;
|
||||
@ -208,7 +219,7 @@ int RFG_Groups_readDefFile( RFG_Groups* groups )
|
||||
if( parse_err )
|
||||
{
|
||||
fprintf( stderr, "%s:%u: Could not be parsed\n",
|
||||
groups->deffile, lineno );
|
||||
groups->file_name, lineno );
|
||||
}
|
||||
|
||||
free( line );
|
||||
@ -218,19 +229,32 @@ int RFG_Groups_readDefFile( RFG_Groups* groups )
|
||||
return parse_err ? 0 : 1;
|
||||
}
|
||||
|
||||
int RFG_Groups_addAssign( RFG_Groups* groups, const char* gname,
|
||||
int RFG_Groups_addAssign( RFG_Groups* groups, const char* groupName,
|
||||
const char* pattern )
|
||||
{
|
||||
uint32_t i;
|
||||
RFG_GroupsAssign* entry = NULL;
|
||||
|
||||
if( !groups || !gname || !pattern ) return 0;
|
||||
if( !groups )
|
||||
return 0;
|
||||
|
||||
if( !groupName || *groupName == '\0' )
|
||||
{
|
||||
fprintf( stderr, "RFG_Groups_addAssign(): Error: Empty group name\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( !pattern || *pattern == '\0' )
|
||||
{
|
||||
fprintf( stderr, "RFG_Groups_addAssign(): Error: Empty region pattern\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* search group assignment by group name */
|
||||
|
||||
for( i = 0; i < groups->nassigns; i++ )
|
||||
for( i = 0; i < groups->num_assigns; i++ )
|
||||
{
|
||||
if( strcmp( groups->assigns[i].group, gname ) == 0 )
|
||||
if( strcmp( groups->assigns[i].group_name, groupName ) == 0 )
|
||||
{
|
||||
entry = &(groups->assigns[i]);
|
||||
break;
|
||||
@ -250,16 +274,15 @@ int RFG_Groups_addAssign( RFG_Groups* groups, const char* gname,
|
||||
{
|
||||
groups->assigns =
|
||||
(RFG_GroupsAssign* )realloc( groups->assigns,
|
||||
( groups->nassigns + 1 )
|
||||
* sizeof( RFG_GroupsAssign ) );
|
||||
( groups->num_assigns + 1 ) * sizeof( RFG_GroupsAssign ) );
|
||||
}
|
||||
|
||||
if( groups->assigns == NULL )
|
||||
if( !groups->assigns )
|
||||
return 0;
|
||||
|
||||
entry = &(groups->assigns[groups->nassigns++]);
|
||||
entry->group = strdup( gname );
|
||||
entry->npattern = 0;
|
||||
entry = &(groups->assigns[groups->num_assigns++]);
|
||||
entry->group_name = strdup( groupName );
|
||||
entry->num_pattern = 0;
|
||||
entry->pattern = NULL;
|
||||
}
|
||||
|
||||
@ -267,45 +290,51 @@ int RFG_Groups_addAssign( RFG_Groups* groups, const char* gname,
|
||||
|
||||
if( !entry->pattern )
|
||||
{
|
||||
entry->pattern = ( char** )malloc( sizeof( char * ) );
|
||||
entry->pattern = ( char** )malloc( sizeof( char* ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
entry->pattern = ( char** )realloc( entry->pattern,
|
||||
( entry->npattern + 1 )
|
||||
* sizeof( char * ) );
|
||||
( entry->num_pattern + 1 ) * sizeof( char* ) );
|
||||
}
|
||||
if( entry->pattern == NULL )
|
||||
if( !entry->pattern )
|
||||
return 0;
|
||||
|
||||
entry->pattern[entry->npattern++] = strdup( pattern );
|
||||
entry->pattern[entry->num_pattern++] = strdup( pattern );
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int RFG_Groups_get( RFG_Groups* groups, const char* rname,
|
||||
char** r_gname )
|
||||
int RFG_Groups_get( RFG_Groups* groups, const char* regionName,
|
||||
char** r_groupName )
|
||||
{
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
|
||||
if( !groups || !rname ) return 0;
|
||||
if( !groups )
|
||||
return 0;
|
||||
|
||||
if( !regionName || *regionName == '\0' )
|
||||
{
|
||||
fprintf( stderr, "RFG_Groups_get(): Error: Empty region name\n" );
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* search for matching pattern by region name */
|
||||
|
||||
for( i = 0; i < groups->nassigns; i++ )
|
||||
for( i = 0; i < groups->num_assigns; i++ )
|
||||
{
|
||||
for( j = 0; j < groups->assigns[i].npattern; j++ )
|
||||
for( j = 0; j < groups->assigns[i].num_pattern; j++ )
|
||||
{
|
||||
if( fnmatch( groups->assigns[i].pattern[j], rname, 0 ) == 0 )
|
||||
if( fnmatch( groups->assigns[i].pattern[j], regionName, 0 ) == 0 )
|
||||
{
|
||||
*r_gname = groups->assigns[i].group;
|
||||
*r_groupName = groups->assigns[i].group_name;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*r_gname = NULL;
|
||||
*r_groupName = NULL;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -16,18 +16,18 @@ RFG_Groups* RFG_Groups_init( void );
|
||||
int RFG_Groups_free( RFG_Groups* groups );
|
||||
|
||||
/* sets group definition file name */
|
||||
int RFG_Groups_setDefFile( RFG_Groups* groups, const char* deffile );
|
||||
int RFG_Groups_setDefFile( RFG_Groups* groups, const char* fileName );
|
||||
|
||||
/* reads group definition file */
|
||||
int RFG_Groups_readDefFile( RFG_Groups* groups );
|
||||
|
||||
/* adds group assignment */
|
||||
int RFG_Groups_addAssign( RFG_Groups* groups, const char* gname,
|
||||
int RFG_Groups_addAssign( RFG_Groups* groups, const char* groupName,
|
||||
const char* pattern );
|
||||
|
||||
/* gets group name by region name */
|
||||
int RFG_Groups_get( RFG_Groups* groups, const char* rname,
|
||||
char** r_gname );
|
||||
int RFG_Groups_get( RFG_Groups* groups, const char* regionName,
|
||||
char** r_groupName );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -13,66 +13,110 @@ extern "C" {
|
||||
|
||||
typedef struct RFG_Regions_struct RFG_Regions;
|
||||
|
||||
/* data structure for hash node (mapping of region id/info) */
|
||||
/* data structure for region info */
|
||||
|
||||
typedef struct RFG_RegionInfo_struct
|
||||
{
|
||||
uint32_t regionId; /* region id */
|
||||
char* groupName; /* group name */
|
||||
char* regionName; /* region name */
|
||||
int32_t callLimit; /* call limit */
|
||||
int32_t callLimitCD; /* call limit count down */
|
||||
uint32_t stackBounds[2]; /* stack level bounds */
|
||||
uint8_t flags; /* flags bitmask (group, recursiveness) */
|
||||
struct RFG_RegionInfo_struct* next;
|
||||
/* region id */
|
||||
uint32_t regionId;
|
||||
|
||||
/* group name */
|
||||
char* groupName;
|
||||
|
||||
/* region name */
|
||||
char* regionName;
|
||||
|
||||
/* call limit */
|
||||
int32_t callLimit;
|
||||
|
||||
/* call limit count down */
|
||||
int32_t callLimitCD;
|
||||
|
||||
/* stack level bounds */
|
||||
uint32_t stackBounds[2];
|
||||
|
||||
/* flags bitmask (group, recursiveness) */
|
||||
uint8_t flags;
|
||||
|
||||
} RFG_RegionInfo;
|
||||
|
||||
/* initalizes RFG regions object */
|
||||
/* data structure for call-path info */
|
||||
|
||||
typedef struct RFG_CallPathInfo_struct
|
||||
{
|
||||
/* hash value of region id array */
|
||||
uint32_t hash;
|
||||
|
||||
/* number of region ids in call path */
|
||||
uint32_t size;
|
||||
|
||||
/* array of region ids in call-path */
|
||||
uint32_t regionIds[RFG_FILTER_MAX_CPATH_SIZE];
|
||||
|
||||
/* call limit */
|
||||
int32_t callLimit;
|
||||
|
||||
/* call limit count down */
|
||||
int32_t callLimitCD;
|
||||
|
||||
} RFG_CallPathInfo;
|
||||
|
||||
/* initializes RFG regions object */
|
||||
RFG_Regions* RFG_Regions_init( void );
|
||||
|
||||
/* duplicates RFG regions object */
|
||||
RFG_Regions* RFG_Regions_dup( const RFG_Regions* oldRegions );
|
||||
|
||||
/* cleanup RFG regions object */
|
||||
int RFG_Regions_free( RFG_Regions* regions );
|
||||
|
||||
/* sets pointer to a function which generates region ids */
|
||||
int RFG_Regions_setRegionIdGenFunc( RFG_Regions* regions,
|
||||
uint32_t (*func)(void) );
|
||||
|
||||
/* gets region id by region name, if it's generated during reading call-path
|
||||
filter rules */
|
||||
uint32_t RFG_Regions_getRegionId( RFG_Regions* regions,
|
||||
const char* regionName );
|
||||
|
||||
/* sets region filter definition file */
|
||||
int RFG_Regions_setFilterDefFile( RFG_Regions* regions, const char* deffile );
|
||||
int RFG_Regions_setFilterDefFile( RFG_Regions* regions, const char* fileName );
|
||||
|
||||
/* sets region grouping definition file */
|
||||
int RFG_Regions_setGroupsDefFile( RFG_Regions* regions, const char* deffile );
|
||||
int RFG_Regions_setGroupsDefFile( RFG_Regions* regions, const char* fileName );
|
||||
|
||||
/* reads region filter definition file
|
||||
if rank != -1, read file with MPI-rank specific entries,
|
||||
if ( 0 != rank_off ) after the call, then tracing should be disabled
|
||||
if isRankOff != 0 after the call, then tracing should be disabled
|
||||
completely for the current rank, existing information should be discarded. */
|
||||
int RFG_Regions_readFilterDefFile( RFG_Regions* regions,
|
||||
int rank, uint8_t* rank_off );
|
||||
int RFG_Regions_readFilterDefFile( RFG_Regions* regions, int rank,
|
||||
uint8_t* r_isRankOff );
|
||||
|
||||
/* reads region grouping definition file */
|
||||
int RFG_Regions_readGroupsDefFile( RFG_Regions* regions );
|
||||
|
||||
/* sets default call limit */
|
||||
int RFG_Regions_setDefaultCallLimit( RFG_Regions* regions,
|
||||
const uint32_t limit );
|
||||
|
||||
/* adds group assignment */
|
||||
int RFG_Regions_addGroupAssign( RFG_Regions* regions,
|
||||
const char* gname, int n, ... );
|
||||
int RFG_Regions_addGroupAssign( RFG_Regions* regions, const char* groupName,
|
||||
int n, ... );
|
||||
|
||||
/* function that should be called if a region enter event invoked */
|
||||
int RFG_Regions_stackPush( RFG_Regions* regions,
|
||||
const uint32_t rid, const uint8_t decrement,
|
||||
RFG_RegionInfo** r_rinf, uint8_t* r_rejected );
|
||||
int RFG_Regions_stackPush( RFG_Regions* regions, uint32_t regionId,
|
||||
RFG_RegionInfo** r_regionInfo,
|
||||
RFG_CallPathInfo** r_cpathInfo,
|
||||
uint8_t* r_wasApproved );
|
||||
|
||||
/* function that should be called if a region leave event invoked */
|
||||
int RFG_Regions_stackPop( RFG_Regions* regions,
|
||||
RFG_RegionInfo** r_rinf, uint8_t* r_rejected );
|
||||
int RFG_Regions_stackPop( RFG_Regions* regions, RFG_RegionInfo** r_regionInfo,
|
||||
RFG_CallPathInfo** r_cpathInfo,
|
||||
uint8_t* r_wasApproved );
|
||||
|
||||
/* adds region */
|
||||
RFG_RegionInfo* RFG_Regions_add( RFG_Regions* regions, uint32_t rid,
|
||||
const char* rname, const char* defgname );
|
||||
RFG_RegionInfo* RFG_Regions_add( RFG_Regions* regions, uint32_t regionId,
|
||||
const char* regionName,
|
||||
const char* groupName );
|
||||
|
||||
/* gets region informations by region id */
|
||||
RFG_RegionInfo* RFG_Regions_get( RFG_Regions* regions,
|
||||
const uint32_t rid );
|
||||
/* gets region info by region id */
|
||||
RFG_RegionInfo* RFG_Regions_get( RFG_Regions* regions, uint32_t regionId );
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -1,3 +1,16 @@
|
||||
27. Fixed processing preprocessor directives
|
||||
containing character string constants with C/C++ comment
|
||||
indicators (/**/, //)
|
||||
|
||||
26. Fixed processing C/C++ line-continuation
|
||||
within character string constants
|
||||
|
||||
25. Fixed C/C++ parsing
|
||||
character string constants containing successive quotes (e.g. "\"\"")
|
||||
|
||||
24. Fixed Fortran parsing
|
||||
for detecting unbounded DO loop beginnings
|
||||
|
||||
23. Fixed parsing
|
||||
source files containing Carriage Returns
|
||||
|
||||
|
@ -1,3 +1,16 @@
|
||||
27. Fixed processing preprocessor directives
|
||||
containing character string constants with C/C++ comment
|
||||
indicators (/**/, //)
|
||||
|
||||
26. Fixed processing C/C++ line-continuation
|
||||
within character string constants
|
||||
|
||||
25. Fixed C/C++ parsing
|
||||
character string constants containing successive quotes (e.g. "\"\"")
|
||||
|
||||
24. Fixed Fortran parsing
|
||||
for detecting unbounded DO loop beginnings
|
||||
|
||||
23. Fixed parsing
|
||||
source files containing Carriage Returns
|
||||
|
||||
|
@ -48,18 +48,12 @@ namespace {
|
||||
string::size_type ppos, bool* e, bool* f, bool asd) {
|
||||
unsigned s = preStmt.size();
|
||||
bool inComment = false;
|
||||
bool inString = false;
|
||||
|
||||
for (unsigned i=0; i<s; ++i) {
|
||||
string::size_type pos = 0;
|
||||
string& line = preStmt[i];
|
||||
|
||||
// shift bonded line-continuation '\' one position to right
|
||||
if ( line[line.size()-1] == '\\'
|
||||
&& line.size() >= 2 && line[line.size()-2] != ' '
|
||||
&& line[line.size()-2] != '\t' ) {
|
||||
line.insert(line.size()-1, " ");
|
||||
}
|
||||
|
||||
// "remove" comments
|
||||
while ( pos < line.size() ) {
|
||||
if ( inComment ) {
|
||||
@ -67,8 +61,28 @@ namespace {
|
||||
if ( line[pos] == '*' && line[pos+1] == '/' ) {
|
||||
line[pos++] = ' ';
|
||||
inComment = false;
|
||||
}
|
||||
}
|
||||
line[pos++] = ' ';
|
||||
} else if ( inString || line[pos] == '\"' ) {
|
||||
// character string constant
|
||||
if ( line[pos] == '\"' ) {
|
||||
pos++;
|
||||
} else { // inString
|
||||
inString = false;
|
||||
}
|
||||
while ( pos < line.size() ) {
|
||||
if ( line[pos] == '\\' ) {
|
||||
pos++;
|
||||
if ( line[pos] == '\0' ) {
|
||||
inString = true;
|
||||
break;
|
||||
}
|
||||
} else if ( line[pos] == '\"' ) {
|
||||
pos++;
|
||||
break;
|
||||
}
|
||||
pos++;
|
||||
}
|
||||
} else if ( line[pos] == '/' ) {
|
||||
pos++;
|
||||
if ( line[pos] == '/' ) {
|
||||
@ -86,6 +100,14 @@ namespace {
|
||||
pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// shift bonded line-continuation '\' one position to right
|
||||
if ( !inString &&
|
||||
line[line.size()-1] == '\\'
|
||||
&& line.size() >= 2 && line[line.size()-2] != ' '
|
||||
&& line[line.size()-2] != '\t' ) {
|
||||
line.insert(line.size()-1, " ");
|
||||
}
|
||||
}
|
||||
|
||||
unsigned pline = 0;
|
||||
@ -208,7 +230,7 @@ void process_c_or_cxx(istream& is, const char* infile, ostream& os,
|
||||
os << line[pos++];
|
||||
}
|
||||
|
||||
} else if ( line[pos] == '/' ) {
|
||||
} else if ( !inString && line[pos] == '/' ) {
|
||||
pos++;
|
||||
if ( line[pos] == '/' ) {
|
||||
// c++ comments
|
||||
@ -226,29 +248,29 @@ void process_c_or_cxx(istream& is, const char* infile, ostream& os,
|
||||
|
||||
} else if ( inString || line[pos] == '\"' ) {
|
||||
// character string constant
|
||||
if ( inString ) {
|
||||
inString = false;
|
||||
pos--; // to make sure current character gets reprcessed
|
||||
} else {
|
||||
os << "\"";
|
||||
}
|
||||
do {
|
||||
if ( line[pos] == '\"' ) {
|
||||
os << '\"';
|
||||
pos++;
|
||||
} else { // inString
|
||||
inString = false;
|
||||
}
|
||||
|
||||
while( pos < line.size() ) {
|
||||
if ( line[pos] == '\\' ) {
|
||||
os << '\\';
|
||||
pos++;
|
||||
if ( line[pos] == '\0' ) {
|
||||
inString = true;
|
||||
break;
|
||||
} else if ( line[pos] == '\"' ) {
|
||||
os << '\"';
|
||||
pos++;
|
||||
inString = true;
|
||||
break;
|
||||
}
|
||||
} else if ( line[pos] == '\"' ) {
|
||||
os << '\"';
|
||||
pos++;
|
||||
break;
|
||||
}
|
||||
os << line[pos];
|
||||
pos++;
|
||||
}
|
||||
while ( line[pos] != '\"' );
|
||||
pos++;
|
||||
|
||||
} else if ( line[pos] == '\'' ) {
|
||||
// character constant
|
||||
|
@ -115,11 +115,16 @@ namespace {
|
||||
label="";
|
||||
if (! (line.size()) ) return false;
|
||||
|
||||
// is there a 'do '
|
||||
pstart = lowline.find("do ");
|
||||
if(pstart==string::npos) return false;
|
||||
// is there a 'do '
|
||||
pstart = lowline.find("do");
|
||||
if(pstart==string::npos ||
|
||||
(lowline[pstart+2] != '\0' &&
|
||||
lowline[pstart+2] != ' ' &&
|
||||
lowline[pstart+2] != '\t')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
pos = lowline.find_first_not_of(" \t");
|
||||
if(pos!=pstart) {
|
||||
// there is a DO_construct_name
|
||||
@ -131,8 +136,13 @@ namespace {
|
||||
}
|
||||
|
||||
//check again, if pos now start of do, otherwise not a correct do statement
|
||||
pstart = lowline.find("do ",pos);
|
||||
if(pstart!=pos) return false;
|
||||
pstart = lowline.find("do",pos);
|
||||
if(pstart!=pos ||
|
||||
(lowline[pstart+2] != '\0' &&
|
||||
lowline[pstart+2] != ' ' &&
|
||||
lowline[pstart+2] != '\t')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = lowline.find_first_not_of(" \t",pos+2);
|
||||
if(isdigit(lowline[pos])) {
|
||||
|
@ -444,17 +444,18 @@ MutatorC::initialize()
|
||||
|
||||
// examples:
|
||||
|
||||
//RFG_Filter_add( m_filter, "foo", 0, 0, 0 );
|
||||
//RFG_Filter_add( m_filter, "bar", 0, 0, 0 );
|
||||
//RFG_Filter_addRegionRules( m_filter, "foo", 0, 0, 0 );
|
||||
//RFG_Filter_addRegionRules( m_filter, "bar", 0, 0, 0 );
|
||||
|
||||
// instrumenting this function generated by the Intel compiler
|
||||
// causes a segmentation fault at application runtime
|
||||
RFG_Filter_add( m_filter, "__intel_cpu_indicator_init", 0, 0, 0 );
|
||||
RFG_Filter_addRegionRules( m_filter, "__intel_cpu_indicator_init",
|
||||
0, 0, 0 );
|
||||
|
||||
// this function, also a built-in function from the Intel compiler,
|
||||
// causes aborting instrumentation
|
||||
// (reported by David Shrader / LANL)
|
||||
RFG_Filter_add( m_filter, "__intel_proc_init_N", 0, 0, 0 );
|
||||
RFG_Filter_addRegionRules( m_filter, "__intel_proc_init_N", 0, 0, 0 );
|
||||
|
||||
// create/attach to a process or open binary for rewriting
|
||||
//
|
||||
@ -1369,7 +1370,7 @@ MutatorC::constraintRegion( const std::string & name ) const
|
||||
{
|
||||
int32_t limit;
|
||||
uint8_t flags;
|
||||
RFG_Filter_get( m_filter, name.c_str(), 0, &limit, 0, &flags );
|
||||
RFG_Filter_getRegionRules( m_filter, name.c_str(), 0, &limit, 0, &flags );
|
||||
|
||||
// don't instrument function if call limit is 0 and function isn't
|
||||
// filtered recursively
|
||||
|
@ -450,6 +450,12 @@ int main( int argc, char** argv ) {
|
||||
OTF_Reader_eventBytesProgress( preader, &pminbytestmp, &pcurbytestmp, &pmaxbytestmp );
|
||||
minbytes += pminbytestmp;
|
||||
maxbytes += pmaxbytestmp;
|
||||
#else // VT_MPI
|
||||
/* *** minbytes and maxbytes not used for vtfilter-mpi
|
||||
do the following to prevent "set but not used" warnings
|
||||
when building with the PGI compiler *** */
|
||||
minbytes++;
|
||||
maxbytes++;
|
||||
#endif // VT_MPI
|
||||
|
||||
/* defs */
|
||||
|
@ -321,7 +321,8 @@ FilterTraceC::readFilter(
|
||||
|
||||
// get function's call limit
|
||||
// TODO: consider function groups and recursiveness
|
||||
RFG_Filter_get( rfg_filter, func_name.c_str(), 0, &limit, 0, 0 );
|
||||
RFG_Filter_getRegionRules( rfg_filter, func_name.c_str(), 0, &limit,
|
||||
0, 0 );
|
||||
|
||||
// if it's not the default call limit, assign it to function
|
||||
//
|
||||
@ -376,7 +377,8 @@ FilterTraceC::readFilter(
|
||||
|
||||
// get function's call limit
|
||||
// TODO: consider function groups and recursiveness
|
||||
RFG_Filter_get( rfg_filter, func_name.c_str(), 0, &limit, 0, 0 );
|
||||
RFG_Filter_getRegionRules( rfg_filter, func_name.c_str(), 0, &limit,
|
||||
0, 0 );
|
||||
|
||||
// if it's not the default call limit and the function has no global
|
||||
// filter rule, assign the process specific call limit to function
|
||||
|
@ -469,6 +469,17 @@
|
||||
<since major="5" minor="0" patch="0" /><available>@VT_SETUP_MPI@</available>
|
||||
<anchor>VT_SETUP_VT_MPITRACE</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="101" descname="Ignore filter for MPI communication events" isadvanced="1" group="libtrace"
|
||||
envname="VT_MPI_IGNORE_FILTER">
|
||||
<desc>Enable / disable recording of MPI communication events although its corresponding functions are filterd.</desc>
|
||||
<valuedesc>
|
||||
<value type="bool">
|
||||
<default>0</default>
|
||||
</value>
|
||||
</valuedesc>
|
||||
<since major="5" minor="14" patch="0" /><available>@VT_SETUP_MPI@</available>
|
||||
<anchor>VT_SETUP_VT_MPI_IGNORE_FILTER</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="200" descname="Profiling interval (for STAT mode in ms)" isadvanced="1" group="omode"
|
||||
envname="VT_STAT_INV">
|
||||
<desc>Length of interval for writing the next profiling record.</desc>
|
||||
|
@ -310,9 +310,7 @@ struct DefRec_DefProcessGroupS : DefRec_BaseS
|
||||
( type == TYPE_MPI_COMM_WORLD || type == TYPE_MPI_COMM_OTHER ||
|
||||
type == TYPE_MPI_GROUP ) )
|
||||
{
|
||||
members_hash =
|
||||
vt_hash( (unsigned char*)members,
|
||||
nmembers * sizeof( uint32_t ), 0 );
|
||||
members_hash = vt_hashword( members, nmembers, 0 );
|
||||
}
|
||||
}
|
||||
DefRec_DefProcessGroupS( const DefRec_DefProcessGroupS & a )
|
||||
|
@ -19,10 +19,12 @@
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <assert.h>
|
||||
#include <fnmatch.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -65,7 +67,33 @@ enum
|
||||
};
|
||||
|
||||
//
|
||||
// data structure to store wrapper configuration
|
||||
// data structure for source files to be modified by OPARI and/or TAU
|
||||
//
|
||||
struct ModFileS
|
||||
{
|
||||
ModFileS( const std::string& _src_file, const std::string& _obj_file,
|
||||
const int _action_flags )
|
||||
: src_file( _src_file ), obj_file( _obj_file ),
|
||||
action_flags( _action_flags ) {}
|
||||
|
||||
//
|
||||
// flags for modification actions to perform on source file
|
||||
//
|
||||
enum
|
||||
{
|
||||
MOD_ACTION_FLAG_OPARI = 0x1, // OPARI instrumentation
|
||||
MOD_ACTION_FLAG_TAUINST = 0x2 // TAU instrumentation
|
||||
};
|
||||
|
||||
std::string src_file; // source file name
|
||||
std::string obj_file; // object file name
|
||||
|
||||
int action_flags; // modification action flags
|
||||
|
||||
};
|
||||
|
||||
//
|
||||
// data structure for wrapper configuration
|
||||
//
|
||||
struct ConfigS
|
||||
{
|
||||
@ -139,6 +167,14 @@ struct ConfigS
|
||||
// is instrumentation type available?
|
||||
inline bool isInstAvail( const InstTypeT type ) const;
|
||||
|
||||
// is source file excluded from instrumentation?
|
||||
inline bool isFileExcluded( const std::vector<std::string>& excls,
|
||||
const std::string& file ) const;
|
||||
|
||||
// read exclusion file and store its content in 'excls'
|
||||
bool readExclFile( const std::string& file,
|
||||
std::vector<std::string>& excls ) const;
|
||||
|
||||
static const std::string DEFAULT_OPARI_RCFILE() { return "opari.rc"; }
|
||||
static const std::pair<std::string, std::string> DEFAULT_OPARI_TABFILE()
|
||||
{ return std::make_pair( "opari.tab.c", "opari.tab.o" ); }
|
||||
@ -146,6 +182,10 @@ struct ConfigS
|
||||
LangTypeT lang_type; // language type
|
||||
InstTypeT inst_type; // instrumentation type
|
||||
// (e.g. compinst, manual, ...)
|
||||
std::vector<std::string>
|
||||
inst_excl_files; // source files to be excluded from
|
||||
// automatic instrumentation by the
|
||||
// compiler or PDT/TAU
|
||||
|
||||
int inst_avail; // bitmask for available instr.-types
|
||||
int showme_flags; // bitmask for showme flags
|
||||
@ -194,6 +234,9 @@ struct ConfigS
|
||||
std::string opari_tab_compcmd; // compiler command for OPARI's table file
|
||||
std::string opari_tab_compflags; // compiler flags for OPARI's table file
|
||||
bool opari_keep_rcfile; // Flag: don't delete OPARI's rc file?
|
||||
std::vector<std::string>
|
||||
opari_excl_files; // source files to be excluded from OPARI
|
||||
// instrumentation
|
||||
|
||||
std::string compinst_flags; // compiler flags to enable instrumentation
|
||||
std::string dyninst_flags; // compiler flags to produce debugging
|
||||
@ -203,10 +246,12 @@ struct ConfigS
|
||||
std::string tauinst_args; // TAU instrumentor arguments
|
||||
std::string tauinst_parsecmd; // PDT source code parser command
|
||||
std::string tauinst_parseargs; // PDT parser arguments
|
||||
std::string tauinst_commentcmd; // PDT comment parser command
|
||||
std::string tauinst_commentargs; // PDT comment parser arguments
|
||||
|
||||
std::vector<std::pair<std::string, std::string> >
|
||||
mod_files; // source/object files to be modified by
|
||||
// OPARI and/or TAU
|
||||
std::vector<ModFileS>
|
||||
mod_files; // source files to be modified by OPARI
|
||||
// and/or TAU
|
||||
|
||||
};
|
||||
|
||||
@ -307,7 +352,7 @@ readDataFile()
|
||||
std::string( vt_installdirs_get( VT_INSTALLDIR_DATADIR ) ) + "/" +
|
||||
std::string( ExeName ) + "-wrapper-data.txt";
|
||||
|
||||
const uint32_t keys_num = 31;
|
||||
const uint32_t keys_num = 33;
|
||||
const std::string keys[] = {
|
||||
"version", "language", "compiler_env", "compiler_flags_env",
|
||||
"compiler", "compiler_flags", "linker_flags", "libs", "preprocessor",
|
||||
@ -315,8 +360,9 @@ readDataFile()
|
||||
"vtmtlib", "vthyblib", "vtpomplib", "vtdynattlib", "opari_bin",
|
||||
"opari_opts", "opari_tab_compiler", "opari_tab_compiler_flags",
|
||||
"compinst_compiler_flags", "dyninst_compiler_flags", "tauinst_bin",
|
||||
"tauinst_opts", "tauinst_parse_bin", "tauinst_parse_opts", "inst_avail",
|
||||
"inst_default", "partype_default"
|
||||
"tauinst_opts", "tauinst_parse_bin", "tauinst_parse_opts",
|
||||
"tauinst_comment_bin", "tauinst_comment_opts", "inst_avail", "inst_default",
|
||||
"partype_default"
|
||||
};
|
||||
|
||||
std::ifstream in( data_file.c_str() );
|
||||
@ -542,7 +588,17 @@ readDataFile()
|
||||
Config.tauinst_parseargs = value;
|
||||
break;
|
||||
}
|
||||
case 29: // inst_avail
|
||||
case 29: // tauinst_comment_bin
|
||||
{
|
||||
Config.tauinst_commentcmd = value;
|
||||
break;
|
||||
}
|
||||
case 30: // tauinst_comment_opts
|
||||
{
|
||||
Config.tauinst_commentargs = value;
|
||||
break;
|
||||
}
|
||||
case 31: // inst_avail
|
||||
{
|
||||
char cvalue[128];
|
||||
strncpy( cvalue, value.c_str(), sizeof( cvalue ) - 1 );
|
||||
@ -573,7 +629,7 @@ readDataFile()
|
||||
|
||||
break;
|
||||
}
|
||||
case 30: // inst_default
|
||||
case 32: // inst_default
|
||||
{
|
||||
if( !Config.setInstType( value ) )
|
||||
{
|
||||
@ -585,7 +641,7 @@ readDataFile()
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 31: // partype_default
|
||||
case 33: // partype_default
|
||||
{
|
||||
if( value.compare( "seq" ) == 0 )
|
||||
{
|
||||
@ -816,6 +872,47 @@ parseCommandLine( int argc, char** argv )
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// -vt:inst-exclude-file-list
|
||||
//
|
||||
else if( arg.compare( "-vt:inst-exclude-file-list" ) == 0 )
|
||||
{
|
||||
if( i == args.size() - 1 )
|
||||
{
|
||||
std::cerr << ExeName << ": <file> expected -- "
|
||||
<< "-vt:inst-exclude-file-list" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t excl_file_list_len = args[++i].length()+1;
|
||||
char* excl_file_list = new char[excl_file_list_len];
|
||||
strncpy( excl_file_list, args[i].c_str(), excl_file_list_len - 1 );
|
||||
excl_file_list[excl_file_list_len - 1] = '\0';
|
||||
|
||||
char* token = strtok( excl_file_list, "," );
|
||||
do
|
||||
{
|
||||
std::string file = token;
|
||||
trimString( file );
|
||||
if( file.length() > 0 )
|
||||
Config.inst_excl_files.push_back( file );
|
||||
} while( ( token = strtok( 0, "," ) ) );
|
||||
|
||||
delete [] excl_file_list;
|
||||
}
|
||||
// -vt:inst-exclude-file
|
||||
//
|
||||
else if( arg.compare( "-vt:inst-exclude-file" ) == 0 )
|
||||
{
|
||||
if( i == args.size() - 1 )
|
||||
{
|
||||
std::cerr << ExeName << ": <file> expected -- "
|
||||
<< "-vt:inst-exclude-file" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if( !Config.readExclFile( args[++i], Config.inst_excl_files ) )
|
||||
return false;
|
||||
}
|
||||
// -vt:opari <args>
|
||||
//
|
||||
else if( arg.compare( "-vt:opari" ) == 0 )
|
||||
@ -903,6 +1000,47 @@ parseCommandLine( int argc, char** argv )
|
||||
|
||||
Config.setOpariTabFile( args[++i] );
|
||||
}
|
||||
// -vt:opari-exclude-file-list
|
||||
//
|
||||
else if( arg.compare( "-vt:opari-exclude-file-list" ) == 0 )
|
||||
{
|
||||
if( i == args.size() - 1 )
|
||||
{
|
||||
std::cerr << ExeName << ": <file> expected -- "
|
||||
<< "-vt:opari-exclude-file-list" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t excl_file_list_len = args[++i].length()+1;
|
||||
char* excl_file_list = new char[excl_file_list_len];
|
||||
strncpy( excl_file_list, args[i].c_str(), excl_file_list_len - 1 );
|
||||
excl_file_list[excl_file_list_len - 1] = '\0';
|
||||
|
||||
char* token = strtok( excl_file_list, "," );
|
||||
do
|
||||
{
|
||||
std::string file = token;
|
||||
trimString( file );
|
||||
if( file.length() > 0 )
|
||||
Config.opari_excl_files.push_back( file );
|
||||
} while( ( token = strtok( 0, "," ) ) );
|
||||
|
||||
delete [] excl_file_list;
|
||||
}
|
||||
// -vt:opari-exclude-file
|
||||
//
|
||||
else if( arg.compare( "-vt:opari-exclude-file" ) == 0 )
|
||||
{
|
||||
if( i == args.size() - 1 )
|
||||
{
|
||||
std::cerr << ExeName << ": <file> expected -- "
|
||||
<< "-vt:opari-exclude-file" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if( !Config.readExclFile( args[++i], Config.opari_excl_files ) )
|
||||
return false;
|
||||
}
|
||||
// -vt:noopari
|
||||
//
|
||||
else if( arg.compare( "-vt:noopari" ) == 0 )
|
||||
@ -1017,7 +1155,8 @@ parseCommandLine( int argc, char** argv )
|
||||
arg.compare( "-fopenmp" ) == 0 ||
|
||||
arg.compare( "-Popenmp" ) == 0 ||
|
||||
arg.compare( "-xopenmp" ) == 0 ||
|
||||
arg.compare( "-mp" ) == 0 )
|
||||
arg.compare( "-mp" ) == 0 ||
|
||||
arg.compare( "-homp" ) == 0 )
|
||||
{
|
||||
Config.setUsesThreads( true );
|
||||
Config.setUsesOpenMP( true );
|
||||
@ -1039,6 +1178,13 @@ parseCommandLine( int argc, char** argv )
|
||||
}
|
||||
} while( ( token = strtok( 0, ":" ) ) );
|
||||
}
|
||||
else if( arg.compare( "-h" ) == 0 && i < args.size() - 1 &&
|
||||
args[i+1].compare( "omp" ) == 0 )
|
||||
{
|
||||
Config.setUsesThreads( true );
|
||||
Config.setUsesOpenMP( true );
|
||||
i++;
|
||||
}
|
||||
// nvcc's pthread/openmp flag
|
||||
//
|
||||
else if( arg.compare( 0, 10, "-Xcompiler" ) == 0 ||
|
||||
@ -1094,14 +1240,18 @@ parseCommandLine( int argc, char** argv )
|
||||
{
|
||||
// do nothing
|
||||
}
|
||||
else if( arg.compare("-vt:inst") == 0 ||
|
||||
arg.compare("-vt:opari") == 0 ||
|
||||
arg.compare("-vt:opari-rcfile") == 0 ||
|
||||
arg.compare("-vt:opari-table") == 0 ||
|
||||
arg.compare("-vt:tau") == 0 ||
|
||||
arg.compare("-vt:pdt") == 0 ||
|
||||
arg.compare("-vt:cpp") == 0 ||
|
||||
arg.compare("-vt:cppflags") == 0 )
|
||||
else if( arg.compare( "-vt:inst" ) == 0 ||
|
||||
arg.compare( "-vt:inst-exclude-file-list" ) == 0 ||
|
||||
arg.compare( "-vt:inst-exclude-file" ) == 0 ||
|
||||
arg.compare( "-vt:opari" ) == 0 ||
|
||||
arg.compare( "-vt:opari-rcfile" ) == 0 ||
|
||||
arg.compare( "-vt:opari-table" ) == 0 ||
|
||||
arg.compare( "-vt:opari-exclude-file-list" ) == 0 ||
|
||||
arg.compare( "-vt:opari-exclude-file" ) == 0 ||
|
||||
arg.compare( "-vt:tau" ) == 0 ||
|
||||
arg.compare( "-vt:pdt" ) == 0 ||
|
||||
arg.compare( "-vt:cpp" ) == 0 ||
|
||||
arg.compare( "-vt:cppflags" ) == 0 )
|
||||
{
|
||||
// skip next argument, if necessary
|
||||
i++;
|
||||
@ -1173,15 +1323,64 @@ parseCommandLine( int argc, char** argv )
|
||||
( arg.length() >= 3 &&
|
||||
arg.compare( arg.length() - 3, 3, ".cu" ) == 0 ) )
|
||||
{
|
||||
if( ( Config.showme_flags == 0 ||
|
||||
Config.showme_flags == SHOWME_FLAG_ALL ) &&
|
||||
( Config.inst_type == INST_TYPE_TAUINST || Config.uses_openmp ) )
|
||||
static bool implicit_exclusion_warn = false;
|
||||
static std::queue<std::string> implicit_excluded_files;
|
||||
|
||||
const std::string& file = arg;
|
||||
|
||||
// source file excluded from instrumentation ?
|
||||
if( Config.isFileExcluded( Config.inst_excl_files, file ) )
|
||||
{
|
||||
Config.addModSrcFile( arg );
|
||||
// disable compiler instrumentation by switching to manual
|
||||
if( Config.inst_type == INST_TYPE_COMPINST )
|
||||
{
|
||||
Config.setInstType( INST_TYPE_MANUAL );
|
||||
|
||||
// source files involved in the compile step but not are in the
|
||||
// exclusion list will be implicitly excluded from the compiler
|
||||
// instrumentation; set indicator for printing a warning message
|
||||
// when this happens
|
||||
implicit_exclusion_warn = true;
|
||||
}
|
||||
|
||||
// add source file to the compiler arguments
|
||||
Config.addCompilerArg( file );
|
||||
}
|
||||
else
|
||||
{
|
||||
Config.addCompilerArg( arg );
|
||||
if( Config.showme_flags == 0 ||
|
||||
Config.showme_flags == SHOWME_FLAG_ALL )
|
||||
{
|
||||
// add source file to be modified by OPARI and/or TAU ...
|
||||
if( Config.inst_type == INST_TYPE_TAUINST || Config.uses_openmp )
|
||||
Config.addModSrcFile( file );
|
||||
// ... or add it as it is to the compiler arguments
|
||||
else
|
||||
Config.addCompilerArg( file );
|
||||
|
||||
// the source file might be implicitly excluded from the compiler
|
||||
// instrumentation; store its name for a warning message
|
||||
if( !Config.inst_excl_files.empty() )
|
||||
implicit_excluded_files.push( file );
|
||||
}
|
||||
else
|
||||
{
|
||||
Config.addCompilerArg( file );
|
||||
}
|
||||
}
|
||||
|
||||
// warn about implicitly excluded source files from compiler
|
||||
// instrumentation
|
||||
//
|
||||
if( implicit_exclusion_warn && !implicit_excluded_files.empty() )
|
||||
{
|
||||
while( !implicit_excluded_files.empty() )
|
||||
{
|
||||
std::cerr << "Warning: Implicitly excluded "
|
||||
<< implicit_excluded_files.front()
|
||||
<< " from compiler instrumentation." << std::endl;
|
||||
implicit_excluded_files.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
// -<I|D>*
|
||||
@ -1330,13 +1529,9 @@ doWrap()
|
||||
//
|
||||
for( i = 0; i < Config.mod_files.size(); i++ )
|
||||
{
|
||||
std::string src_file = Config.mod_files[i].first;
|
||||
const ModFileS& mod_file = Config.mod_files[i];
|
||||
|
||||
// skip *.cu source files for TAU instrumentation
|
||||
//
|
||||
const bool skip_tauinst =
|
||||
( Config.inst_type == INST_TYPE_TAUINST && isCuFile( src_file ) );
|
||||
assert( !skip_tauinst || Config.uses_openmp );
|
||||
std::string src_file = mod_file.src_file;
|
||||
|
||||
std::string::size_type si;
|
||||
|
||||
@ -1355,31 +1550,42 @@ doWrap()
|
||||
|
||||
// add path to empty omp.h and macro definition '_OPENMP' to preprocessor
|
||||
// flags, if OpenMP is enabled
|
||||
if( Config.uses_openmp )
|
||||
if( Config.uses_openmp &&
|
||||
i == 0 /* only once! */ )
|
||||
{
|
||||
Config.addPrepFlag( std::string( "-I" ) +
|
||||
vt_installdirs_get( VT_INSTALLDIR_DATADIR ) +
|
||||
" -D_OPENMP" );
|
||||
}
|
||||
|
||||
// compose C preprocessor command
|
||||
// run preprocessor or reuse existing output file
|
||||
//
|
||||
cmd =
|
||||
Config.prep_cmd + " " +
|
||||
Config.prep_flags + " " +
|
||||
src_file + " " +
|
||||
" -o " + cpp_file;
|
||||
|
||||
// show/execute C preprocessor command
|
||||
if( ( rc = showOrExecuteCommand( cmd ) ) != 0 )
|
||||
return rc;
|
||||
if( !Config.reuse_files || access( cpp_file.c_str(), R_OK ) != 0 )
|
||||
{
|
||||
// compose C preprocessor command
|
||||
//
|
||||
cmd =
|
||||
Config.prep_cmd + " " +
|
||||
Config.prep_flags + " " +
|
||||
src_file + " " +
|
||||
" -o " + cpp_file;
|
||||
|
||||
// show/execute C preprocessor command
|
||||
if( ( rc = showOrExecuteCommand( cmd ) ) != 0 )
|
||||
return rc;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( Config.be_verbose )
|
||||
std::cout << "+++ reuse existing " << cpp_file << std::endl;
|
||||
}
|
||||
|
||||
src_file = cpp_file;
|
||||
}
|
||||
|
||||
// run OPARI command on source file
|
||||
//
|
||||
if( Config.uses_openmp )
|
||||
if( ( mod_file.action_flags & ModFileS::MOD_ACTION_FLAG_OPARI ) != 0 )
|
||||
{
|
||||
// create output file name of OPARI
|
||||
//
|
||||
@ -1438,7 +1644,7 @@ doWrap()
|
||||
|
||||
// run PDT parser and TAU instrumentor command on source file
|
||||
//
|
||||
if( Config.inst_type == INST_TYPE_TAUINST && !skip_tauinst )
|
||||
if( ( mod_file.action_flags & ModFileS::MOD_ACTION_FLAG_TAUINST ) != 0 )
|
||||
{
|
||||
// create output file name of the PDT parser
|
||||
//
|
||||
@ -1503,6 +1709,22 @@ doWrap()
|
||||
// show/execute PDT parse command
|
||||
if( ( rc = showOrExecuteCommand( cmd ) ) != 0 )
|
||||
return rc;
|
||||
|
||||
// extend PDB file by comment information, if possible
|
||||
//
|
||||
if( Config.tauinst_commentcmd.length() > 0 )
|
||||
{
|
||||
// compose PDT comment parser command
|
||||
//
|
||||
cmd =
|
||||
Config.tauinst_commentcmd + " " +
|
||||
pdb_file + " -o " + pdb_file + " " +
|
||||
Config.tauinst_commentargs;
|
||||
|
||||
// show/execute PDT comment parser command
|
||||
if( ( rc = showOrExecuteCommand( cmd ) ) != 0 )
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1552,7 +1774,7 @@ doWrap()
|
||||
assert( si != std::string::npos );
|
||||
obj_file = obj_file.substr( 0, si ) + ".o";
|
||||
|
||||
obj_files_to_rename[obj_file] = Config.mod_files[i].second;
|
||||
obj_files_to_rename[obj_file] = mod_file.obj_file;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1617,7 +1839,8 @@ doWrap()
|
||||
{
|
||||
std::string vtlib;
|
||||
|
||||
if( Config.uses_openmp )
|
||||
if( Config.uses_openmp &&
|
||||
access( Config.opari_tabfile.first.c_str(), R_OK ) == 0 )
|
||||
{
|
||||
// compose command for compiling OPARI table file
|
||||
//
|
||||
@ -1826,6 +2049,22 @@ showUsage()
|
||||
<< std::endl
|
||||
<< " default: " << Config.getInstTypeName() << std::endl
|
||||
<< std::endl
|
||||
<< " -vt:inst-exclude-file-list <file>[,file,...]" << std::endl
|
||||
<< " Set list of source files to be excluded from the" << std::endl
|
||||
<< " automatic instrumentation by the compiler or PDT/TAU." << std::endl
|
||||
<< " (file names can contain wildcards)" << std::endl
|
||||
<< std::endl
|
||||
<< " -vt:inst-exclude-file <file>" << std::endl
|
||||
<< " Set pathname of file containing a list of source files" << std::endl
|
||||
<< " to be excluded from the automatic instrumentation by" << std::endl
|
||||
<< " the compiler or PDT/TAU." << std::endl
|
||||
<< " (file names can contain wildcards, one file name per" << std::endl
|
||||
<< " line)" << std::endl
|
||||
<< std::endl
|
||||
<< " Note when using an exclusion list for automatic compiler instrumentation:" << std::endl
|
||||
<< " If a source file from the exclusion list is involved in a compile step," << std::endl
|
||||
<< " the instrumentation is disabled for this step." << std::endl
|
||||
<< std::endl
|
||||
<< " -vt:opari <[!]args> Set/add options for the OPARI command." << std::endl
|
||||
<< " (see " << vt_installdirs_get(VT_INSTALLDIR_DATADIR) << "/doc/opari/Readme.html for more information, default: " << Config.opari_args << ")" << std::endl
|
||||
<< std::endl
|
||||
@ -1837,6 +2076,18 @@ showUsage()
|
||||
<< " Set pathname of the OPARI runtime table file." << std::endl
|
||||
<< " (default: " << ConfigS::DEFAULT_OPARI_TABFILE().first << ")" << std::endl
|
||||
<< std::endl
|
||||
<< " -vt:opari-exclude-file-list <file>[,file,...]" << std::endl
|
||||
<< " Set list of source files to be excluded from the" << std::endl
|
||||
<< " instrumentation of OpenMP constructs by OPARI." << std::endl
|
||||
<< " (file names can contain wildcards)" << std::endl
|
||||
<< std::endl
|
||||
<< " -vt:opari-exclude-file <file>" << std::endl
|
||||
<< " Set pathname of file containing a list of source files" << std::endl
|
||||
<< " to be excluded from the instrumentation of OpenMP" << std::endl
|
||||
<< " constructs by OPARI." << std::endl
|
||||
<< " (file names can contain wildcards, one file name per" << std::endl
|
||||
<< " line)" << std::endl
|
||||
<< std::endl
|
||||
<< " -vt:noopari Disable instrumentation of OpenMP contructs by OPARI." << std::endl
|
||||
<< std::endl
|
||||
<< " -vt:tau <[!]args> Set/add options for the TAU instrumentor command." << std::endl
|
||||
@ -2120,70 +2371,87 @@ ConfigS::addCompilerLib( const std::string& lib )
|
||||
void
|
||||
ConfigS::addModSrcFile( const std::string& file )
|
||||
{
|
||||
// skip *.cu source files for TAU instrumentation
|
||||
//
|
||||
const bool skip_tauinst =
|
||||
( Config.inst_type == INST_TYPE_TAUINST && isCuFile( file ) );
|
||||
if( skip_tauinst )
|
||||
{
|
||||
std::cerr << "Warning: Skip " << file << " for instrumenting with "
|
||||
<< "PDT/TAU - not yet supported." << std::endl;
|
||||
int mod_action_flags = 0;
|
||||
|
||||
// just add unmodified source file name to compiler arguments, if there
|
||||
// is also nothing to do for OPARI
|
||||
// perform OPARI instrumentation?
|
||||
//
|
||||
if( uses_openmp && !isFileExcluded( opari_excl_files, file ) )
|
||||
mod_action_flags |= ModFileS::MOD_ACTION_FLAG_OPARI;
|
||||
|
||||
// perform TAU instrumentation?
|
||||
//
|
||||
if( Config.inst_type == INST_TYPE_TAUINST )
|
||||
{
|
||||
// skip CUDA (*.cu) source files for now
|
||||
//
|
||||
if( !uses_openmp )
|
||||
if( isCuFile( file ) )
|
||||
{
|
||||
addCompilerArg( file );
|
||||
return;
|
||||
std::cerr << "Warning: Skip " << file << " for instrumenting with "
|
||||
<< "PDT/TAU - not yet supported." << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
mod_action_flags |= ModFileS::MOD_ACTION_FLAG_TAUINST;
|
||||
}
|
||||
}
|
||||
|
||||
std::string file_base;
|
||||
std::string file_obj;
|
||||
std::string::size_type si;
|
||||
|
||||
// get base name of source file
|
||||
// add unmodified source file name to compiler arguments, if there
|
||||
// is nothing to do for OPARI and TAU
|
||||
//
|
||||
file_base = file;
|
||||
si = file.rfind( '/' );
|
||||
if( si != std::string::npos )
|
||||
file_base = file.substr( si+1 );
|
||||
|
||||
// create object file name of source file
|
||||
if( !mod_action_flags )
|
||||
{
|
||||
addCompilerArg( file );
|
||||
}
|
||||
// otherwise, register source file for processing by OPARI and/or TAU
|
||||
//
|
||||
si = file_base.rfind( '.' );
|
||||
assert( si != std::string::npos );
|
||||
file_obj = file_base.substr( 0, si ) + ".o";
|
||||
else
|
||||
{
|
||||
std::string file_base;
|
||||
std::string file_obj;
|
||||
std::string::size_type si;
|
||||
|
||||
// store source/object file name for later processing by OPARI and/or TAU
|
||||
mod_files.push_back( std::make_pair( file, file_obj ) );
|
||||
// get base name of source file
|
||||
//
|
||||
file_base = file;
|
||||
si = file.rfind( '/' );
|
||||
if( si != std::string::npos )
|
||||
file_base = file.substr( si+1 );
|
||||
|
||||
// add modified source file name to compiler arguments
|
||||
//
|
||||
// create object file name of source file
|
||||
//
|
||||
si = file_base.rfind( '.' );
|
||||
assert( si != std::string::npos );
|
||||
file_obj = file_base.substr( 0, si ) + ".o";
|
||||
|
||||
si = file.rfind( '.' );
|
||||
assert( si != std::string::npos );
|
||||
// store source/object file and modification action flags
|
||||
mod_files.push_back( ModFileS( file, file_obj, mod_action_flags ) );
|
||||
|
||||
std::string base = file.substr( 0, si );
|
||||
std::string suffix = file.substr( si );
|
||||
std::string mod_file = base;
|
||||
// add modified source file name to compiler arguments
|
||||
//
|
||||
|
||||
if( preprocess )
|
||||
mod_file += ".cpp";
|
||||
if( uses_openmp )
|
||||
mod_file += ".pomp";
|
||||
if( inst_type == INST_TYPE_TAUINST && !skip_tauinst )
|
||||
mod_file += ".tau";
|
||||
si = file.rfind( '.' );
|
||||
assert( si != std::string::npos );
|
||||
|
||||
// convert Fortran source file suffix to upper case, in order to
|
||||
// invoke the C preprocessor before compiling
|
||||
if( fortran() && suffix.compare( 0, 2, ".f" ) == 0 )
|
||||
suffix.replace( 0, 2, ".F" );
|
||||
std::string base = file.substr( 0, si );
|
||||
std::string suffix = file.substr( si );
|
||||
std::string mod_file = base;
|
||||
|
||||
mod_file += suffix;
|
||||
if( preprocess )
|
||||
mod_file += ".cpp";
|
||||
if( ( mod_action_flags & ModFileS::MOD_ACTION_FLAG_OPARI ) != 0 )
|
||||
mod_file += ".pomp";
|
||||
if( ( mod_action_flags & ModFileS::MOD_ACTION_FLAG_TAUINST ) != 0 )
|
||||
mod_file += ".tau";
|
||||
|
||||
addCompilerArg( mod_file );
|
||||
// convert Fortran source file suffix to upper case, in order to
|
||||
// invoke the C preprocessor before compiling
|
||||
if( fortran() && suffix.compare( 0, 2, ".f" ) == 0 )
|
||||
suffix.replace( 0, 2, ".F" );
|
||||
|
||||
mod_file += suffix;
|
||||
|
||||
addCompilerArg( mod_file );
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -2407,3 +2675,44 @@ ConfigS::isInstAvail( const InstTypeT type ) const
|
||||
{
|
||||
return ( inst_avail & type );
|
||||
}
|
||||
|
||||
bool
|
||||
ConfigS::isFileExcluded( const std::vector<std::string>& excls,
|
||||
const std::string& file ) const
|
||||
{
|
||||
for( std::vector<std::string>::const_iterator it = excls.begin();
|
||||
it != excls.end(); ++it )
|
||||
{
|
||||
if( fnmatch( it->c_str(), file.c_str(), 0 ) == 0 )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
ConfigS::readExclFile( const std::string& file,
|
||||
std::vector<std::string>& excls ) const
|
||||
{
|
||||
std::ifstream in( file.c_str() );
|
||||
if( !in )
|
||||
{
|
||||
std::cerr << ExeName << ": Error: Could not open exclusion file "
|
||||
<< file << ". Aborting." << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
char buffer[1024];
|
||||
|
||||
while( in.getline( buffer, sizeof( buffer ) ) )
|
||||
{
|
||||
std::string line = buffer;
|
||||
trimString( line );
|
||||
if( line.length() > 0 )
|
||||
excls.push_back( line );
|
||||
}
|
||||
|
||||
in.close();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ compiler_flags_env=VT_CXXFLAGS
|
||||
compiler=@VT_WRAPPER_CXX_COMPILER@
|
||||
compiler_flags=@PTHREAD_CFLAGS@ @VT_WRAPPER_CXX_EXTRA_COMPILER_FLAGS@
|
||||
linker_flags=@VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_CXX_EXTRA_LIBS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @CUDATKLIBDIR@ @CUDARTLIB@ @VT_WRAPPER_CXX_EXTRA_LIBS@
|
||||
preprocessor=@VT_WRAPPER_CXX_CPP@
|
||||
preprocessor_flags=@VT_WRAPPER_CXX_EXTRA_CPPFLAGS@
|
||||
includedir=${includedir}
|
||||
@ -26,6 +26,8 @@ tauinst_bin=@VT_WRAPPER_TAUINST_BIN@
|
||||
tauinst_opts=@VT_WRAPPER_CXX_TAUINST_OPTS@
|
||||
tauinst_parse_bin=@VT_WRAPPER_CXX_TAUINST_PARSE_BIN@
|
||||
tauinst_parse_opts=@VT_WRAPPER_CXX_TAUINST_PARSE_OPTS@
|
||||
tauinst_comment_bin=@VT_WRAPPER_CXX_TAUINST_COMMENT_BIN@
|
||||
tauinst_comment_opts=@VT_WRAPPER_CXX_TAUINST_COMMENT_OPTS@
|
||||
inst_avail=@VT_WRAPPER_AVAIL_INST@
|
||||
inst_default=@VT_WRAPPER_DEFAULT_INST@
|
||||
partype_default=@VT_WRAPPER_CXX_DEFAULT_PARTYPE@
|
||||
|
@ -5,7 +5,7 @@ compiler_flags_env=VT_CFLAGS
|
||||
compiler=@VT_WRAPPER_CC_COMPILER@
|
||||
compiler_flags=@PTHREAD_CFLAGS@ @VT_WRAPPER_CC_EXTRA_COMPILER_FLAGS@
|
||||
linker_flags=@VT_WRAPPER_CC_EXTRA_LINKER_FLAGS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_CC_EXTRA_LIBS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @CUDATKLIBDIR@ @CUDARTLIB@ @VT_WRAPPER_CC_EXTRA_LIBS@
|
||||
preprocessor=@VT_WRAPPER_CC_CPP@
|
||||
preprocessor_flags=@VT_WRAPPER_CC_EXTRA_CPPFLAGS@
|
||||
includedir=${includedir}
|
||||
@ -26,6 +26,8 @@ tauinst_bin=@VT_WRAPPER_TAUINST_BIN@
|
||||
tauinst_opts=@VT_WRAPPER_CC_TAUINST_OPTS@
|
||||
tauinst_parse_bin=@VT_WRAPPER_CC_TAUINST_PARSE_BIN@
|
||||
tauinst_parse_opts=@VT_WRAPPER_CC_TAUINST_PARSE_OPTS@
|
||||
tauinst_comment_bin=@VT_WRAPPER_CC_TAUINST_COMMENT_BIN@
|
||||
tauinst_comment_opts=@VT_WRAPPER_CC_TAUINST_COMMENT_OPTS@
|
||||
inst_avail=@VT_WRAPPER_AVAIL_INST@
|
||||
inst_default=@VT_WRAPPER_DEFAULT_INST@
|
||||
partype_default=@VT_WRAPPER_CC_DEFAULT_PARTYPE@
|
||||
|
@ -5,7 +5,7 @@ compiler_flags_env=VT_FCFLAGS
|
||||
compiler=@VT_WRAPPER_FC_COMPILER@
|
||||
compiler_flags=@VT_WRAPPER_FC_EXTRA_COMPILER_FLAGS@
|
||||
linker_flags=@VT_WRAPPER_FC_EXTRA_LINKER_FLAGS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_FC_EXTRA_LIBS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @CUDATKLIBDIR@ @CUDARTLIB@ @VT_WRAPPER_FC_EXTRA_LIBS@
|
||||
preprocessor=@VT_WRAPPER_FC_CPP@
|
||||
preprocessor_flags=@VT_WRAPPER_FC_EXTRA_CPPFLAGS@
|
||||
includedir=${includedir}
|
||||
@ -26,6 +26,8 @@ tauinst_bin=@VT_WRAPPER_TAUINST_BIN@
|
||||
tauinst_opts=@VT_WRAPPER_FC_TAUINST_OPTS@
|
||||
tauinst_parse_bin=@VT_WRAPPER_FC_TAUINST_PARSE_BIN@
|
||||
tauinst_parse_opts=@VT_WRAPPER_FC_TAUINST_PARSE_OPTS@
|
||||
tauinst_comment_bin=@VT_WRAPPER_FC_TAUINST_COMMENT_BIN@
|
||||
tauinst_comment_opts=@VT_WRAPPER_FC_TAUINST_COMMENT_OPTS@
|
||||
inst_avail=@VT_WRAPPER_AVAIL_INST@
|
||||
inst_default=@VT_WRAPPER_DEFAULT_INST@
|
||||
partype_default=@VT_WRAPPER_FC_DEFAULT_PARTYPE@
|
||||
|
@ -5,7 +5,7 @@ compiler_flags_env=VT_NVCFLAGS
|
||||
compiler=@VT_WRAPPER_NVCC_COMPILER@
|
||||
compiler_flags=@PTHREAD_CFLAGS@ @VT_WRAPPER_NVCC_EXTRA_COMPILER_FLAGS@
|
||||
linker_flags=@VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_NVCC_EXTRA_LIBS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @LIBERTYLIBDIR@ @LIBERTYLIB@ @CUPTILIBDIR@ @CUPTILIB@ @CUDATKLIBDIR@ @CUDARTLIB@ @VT_WRAPPER_NVCC_EXTRA_LIBS@
|
||||
preprocessor=@VT_WRAPPER_NVCC_CPP@
|
||||
preprocessor_flags=@VT_WRAPPER_NVCC_EXTRA_CPPFLAGS@
|
||||
includedir=${includedir}
|
||||
@ -26,6 +26,8 @@ tauinst_bin=@VT_WRAPPER_TAUINST_BIN@
|
||||
tauinst_opts=@VT_WRAPPER_NVCC_TAUINST_OPTS@
|
||||
tauinst_parse_bin=@VT_WRAPPER_NVCC_TAUINST_PARSE_BIN@
|
||||
tauinst_parse_opts=@VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS@
|
||||
tauinst_comment_bin=@VT_WRAPPER_NVCC_TAUINST_COMMENT_BIN@
|
||||
tauinst_comment_opts=@VT_WRAPPER_NVCC_TAUINST_COMMENT_OPTS@
|
||||
inst_avail=@VT_WRAPPER_NVCC_AVAIL_INST@
|
||||
inst_default=@VT_WRAPPER_NVCC_DEFAULT_INST@
|
||||
partype_default=@VT_WRAPPER_NVCC_DEFAULT_PARTYPE@
|
||||
|
@ -6,7 +6,6 @@ libvt_util_la_SOURCES = \
|
||||
hash.h \
|
||||
installdirs.h \
|
||||
util.h \
|
||||
hash.c \
|
||||
installdirs.c \
|
||||
util.c
|
||||
|
||||
|
@ -1,68 +0,0 @@
|
||||
/*
|
||||
* this is taken from
|
||||
* 'http://burtleburtle.net/bob/hash/evahash.html'
|
||||
*/
|
||||
|
||||
#include "hash.h"
|
||||
|
||||
/* The mixing step */
|
||||
#define mix(a,b,c) \
|
||||
{ \
|
||||
a=a-b; a=a-c; a=a^(c>>13); \
|
||||
b=b-c; b=b-a; b=b^(a<<8); \
|
||||
c=c-a; c=c-b; c=c^(b>>13); \
|
||||
a=a-b; a=a-c; a=a^(c>>12); \
|
||||
b=b-c; b=b-a; b=b^(a<<16); \
|
||||
c=c-a; c=c-b; c=c^(b>>5); \
|
||||
a=a-b; a=a-c; a=a^(c>>3); \
|
||||
b=b-c; b=b-a; b=b^(a<<10); \
|
||||
c=c-a; c=c-b; c=c^(b>>15); \
|
||||
}
|
||||
|
||||
/* hash function */
|
||||
unsigned int vt_hash(register unsigned char* k,
|
||||
unsigned int length, unsigned int initval)
|
||||
{
|
||||
register unsigned int a,b,c; /* the internal state */
|
||||
unsigned int len; /* how many key bytes still need mixing */
|
||||
|
||||
/* Set up the internal state */
|
||||
len = length;
|
||||
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
|
||||
c = initval; /* variable initialization of internal state */
|
||||
|
||||
/*---------------------------------------- handle most of the key */
|
||||
while (len >= 12)
|
||||
{
|
||||
a=a+(k[0]+((unsigned int)k[1]<<8)+((unsigned int)k[2]<<16)
|
||||
+((unsigned int)k[3]<<24));
|
||||
b=b+(k[4]+((unsigned int)k[5]<<8)+((unsigned int)k[6]<<16)
|
||||
+((unsigned int)k[7]<<24));
|
||||
c=c+(k[8]+((unsigned int)k[9]<<8)+((unsigned int)k[10]<<16)
|
||||
+((unsigned int)k[11]<<24));
|
||||
mix(a,b,c);
|
||||
k = k+12; len = len-12;
|
||||
}
|
||||
|
||||
/*------------------------------------- handle the last 11 bytes */
|
||||
c = c+length;
|
||||
switch(len) /* all the case statements fall through */
|
||||
{
|
||||
case 11: c=c+((unsigned int)k[10]<<24);
|
||||
case 10: c=c+((unsigned int)k[9]<<16);
|
||||
case 9 : c=c+((unsigned int)k[8]<<8);
|
||||
/* the first byte of c is reserved for the length */
|
||||
case 8 : b=b+((unsigned int)k[7]<<24);
|
||||
case 7 : b=b+((unsigned int)k[6]<<16);
|
||||
case 6 : b=b+((unsigned int)k[5]<<8);
|
||||
case 5 : b=b+k[4];
|
||||
case 4 : a=a+((unsigned int)k[3]<<24);
|
||||
case 3 : a=a+((unsigned int)k[2]<<16);
|
||||
case 2 : a=a+((unsigned int)k[1]<<8);
|
||||
case 1 : a=a+k[0];
|
||||
/* case 0: nothing left to add */
|
||||
}
|
||||
mix(a,b,c);
|
||||
/*-------------------------------------------- report the result */
|
||||
return c;
|
||||
}
|
@ -1,18 +1,502 @@
|
||||
/*
|
||||
* this is taken from
|
||||
* 'http://burtleburtle.net/bob/hash/evahash.html'
|
||||
*/
|
||||
/**
|
||||
* Extract from Bob Jenkins' http://burtleburtle.net/bob/c/lookup3.c,
|
||||
* May 2006, Public Domain.
|
||||
*
|
||||
* These are functions for producing 32-bit hashes for hash table lookup.
|
||||
* hashword(), hashlittle(), hashbig(), mix(), and final() are externally
|
||||
* useful functions. You can use this free for any purpose. It's in the
|
||||
* public domain. It has no warranty.
|
||||
*
|
||||
* You probably want to use hashlittle(). hashlittle() and hashbig()
|
||||
* hash byte arrays. hashlittle() is is faster than hashbig() on
|
||||
* little-endian machines. Intel and AMD are little-endian machines.
|
||||
*
|
||||
* If you want to find a hash of, say, exactly 7 integers, do
|
||||
* a = i1; b = i2; c = i3;
|
||||
* mix(a,b,c);
|
||||
* a += i4; b += i5; c += i6;
|
||||
* mix(a,b,c);
|
||||
* a += i7;
|
||||
* final(a,b,c);
|
||||
* then use c as the hash value. If you have a variable length array of
|
||||
* 4-byte integers to hash, use hashword(). If you have a byte array (like
|
||||
* a character string), use hashlittle(). If you have several byte arrays, or
|
||||
* a mix of things, see the comments above hashlittle().
|
||||
*
|
||||
* Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
|
||||
* then mix those integers. This is fast (you can do a lot more thorough
|
||||
* mixing with 12*3 instructions on 3 integers than you can with 3 instructions
|
||||
* on 1 byte), but shoehorning those bytes into integers efficiently is messy.
|
||||
**/
|
||||
|
||||
#ifndef _HASH_H
|
||||
#define _HASH_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
# define EXTERN extern "C"
|
||||
#else
|
||||
# define EXTERN extern
|
||||
#endif
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h" /* defines WORDS_BIGENDIAN etc */
|
||||
#endif /* HAVE_CONFIG_H */
|
||||
|
||||
EXTERN unsigned int vt_hash(register unsigned char* k, unsigned int length, unsigned int initval);
|
||||
#include "vt_inttypes.h" /* defines uint32_t etc */
|
||||
|
||||
#include <stddef.h> /* defines size_t etc */
|
||||
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
# if WORDS_BIGENDIAN == 1
|
||||
# define HASH_LITTLE_ENDIAN 0
|
||||
# define HASH_BIG_ENDIAN 1
|
||||
# define vt_hash(key, length, initval) \
|
||||
vt_hashbig((key), (length), (initval))
|
||||
# else /* WORDS_BIGENDIAN == 0 */
|
||||
# define HASH_LITTLE_ENDIAN 1
|
||||
# define HASH_BIG_ENDIAN 0
|
||||
# define vt_hash(key, length, initval) \
|
||||
vt_hashlittle((key), (length), (initval))
|
||||
# endif /* WORDS_BIGENDIAN == 0/1 */
|
||||
#else /* WORDS_BIGENDIAN */
|
||||
# define HASH_LITTLE_ENDIAN 0
|
||||
# define HASH_BIG_ENDIAN 0
|
||||
# define vt_hash(key, length, initval) \
|
||||
vt_hashlittle((key), (length), (initval))
|
||||
#endif /* WORDS_BIGENDIAN */
|
||||
|
||||
#define vt_hashsize(n) ((uint32_t)1<<(n))
|
||||
#define vt_hashmask(n) (vt_hashsize(n)-1)
|
||||
#define vt_hashrot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
mix -- mix 3 32-bit values reversibly.
|
||||
|
||||
This is reversible, so any information in (a,b,c) before mix() is
|
||||
still in (a,b,c) after mix().
|
||||
|
||||
If four pairs of (a,b,c) inputs are run through mix(), or through
|
||||
mix() in reverse, there are at least 32 bits of the output that
|
||||
are sometimes the same for one pair and different for another pair.
|
||||
This was tested for:
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
|
||||
satisfy this are
|
||||
4 6 8 16 19 4
|
||||
9 15 3 18 27 15
|
||||
14 9 3 7 17 3
|
||||
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
|
||||
for "differ" defined as + with a one-bit base and a two-bit delta. I
|
||||
used http://burtleburtle.net/bob/hash/avalanche.html to choose
|
||||
the operations, constants, and arrangements of the variables.
|
||||
|
||||
This does not achieve avalanche. There are input bits of (a,b,c)
|
||||
that fail to affect some output bits of (a,b,c), especially of a. The
|
||||
most thoroughly mixed value is c, but it doesn't really even achieve
|
||||
avalanche in c.
|
||||
|
||||
This allows some parallelism. Read-after-writes are good at doubling
|
||||
the number of bits affected, so the goal of mixing pulls in the opposite
|
||||
direction as the goal of parallelism. I did what I could. Rotates
|
||||
seem to cost as much as shifts on every machine I could lay my hands
|
||||
on, and rotates are much kinder to the top and bottom bits, so I used
|
||||
rotates.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define vt_hashmix(a,b,c) \
|
||||
{ \
|
||||
a -= c; a ^= vt_hashrot(c, 4); c += b; \
|
||||
b -= a; b ^= vt_hashrot(a, 6); a += c; \
|
||||
c -= b; c ^= vt_hashrot(b, 8); b += a; \
|
||||
a -= c; a ^= vt_hashrot(c,16); c += b; \
|
||||
b -= a; b ^= vt_hashrot(a,19); a += c; \
|
||||
c -= b; c ^= vt_hashrot(b, 4); b += a; \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
final -- final mixing of 3 32-bit values (a,b,c) into c
|
||||
|
||||
Pairs of (a,b,c) values differing in only a few bits will usually
|
||||
produce values of c that look totally different. This was tested for
|
||||
* pairs that differed by one bit, by two bits, in any combination
|
||||
of top bits of (a,b,c), or in any combination of bottom bits of
|
||||
(a,b,c).
|
||||
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
|
||||
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
|
||||
is commonly produced by subtraction) look like a single 1-bit
|
||||
difference.
|
||||
* the base values were pseudorandom, all zero but one bit set, or
|
||||
all zero plus a counter that starts at zero.
|
||||
|
||||
These constants passed:
|
||||
14 11 25 16 4 14 24
|
||||
12 14 25 16 4 14 24
|
||||
and these came close:
|
||||
4 8 15 26 3 22 24
|
||||
10 8 15 26 3 22 24
|
||||
11 8 15 26 3 22 24
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
#define vt_hashfinal(a,b,c) \
|
||||
{ \
|
||||
c ^= b; c -= vt_hashrot(b,14); \
|
||||
a ^= c; a -= vt_hashrot(c,11); \
|
||||
b ^= a; b -= vt_hashrot(a,25); \
|
||||
c ^= b; c -= vt_hashrot(b,16); \
|
||||
a ^= c; a -= vt_hashrot(c,4); \
|
||||
b ^= a; b -= vt_hashrot(a,14); \
|
||||
c ^= b; c -= vt_hashrot(b,24); \
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
This works on all machines. To be useful, it requires
|
||||
-- that the key be an array of uint32_t's, and
|
||||
-- that the length be the number of uint32_t's in the key
|
||||
|
||||
The function hashword() is identical to hashlittle() on little-endian
|
||||
machines, and identical to hashbig() on big-endian machines,
|
||||
except that the length has to be measured in uint32_ts rather than in
|
||||
bytes. hashlittle() is more complicated than hashword() only because
|
||||
hashlittle() has to dance around fitting the key bytes into registers.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
static inline uint32_t vt_hashword(
|
||||
const uint32_t *k, /* the key, an array of uint32_t values */
|
||||
size_t length, /* the length of the key, in uint32_ts */
|
||||
uint32_t initval) /* the previous hash, or an arbitrary value */
|
||||
{
|
||||
uint32_t a,b,c;
|
||||
|
||||
/* Set up the internal state */
|
||||
a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
|
||||
|
||||
/*------------------------------------------------- handle most of the key */
|
||||
while (length > 3)
|
||||
{
|
||||
a += k[0];
|
||||
b += k[1];
|
||||
c += k[2];
|
||||
vt_hashmix(a,b,c);
|
||||
length -= 3;
|
||||
k += 3;
|
||||
}
|
||||
|
||||
/*------------------------------------------- handle the last 3 uint32_t's */
|
||||
switch(length) /* all the case statements fall through */
|
||||
{
|
||||
case 3 : c+=k[2];
|
||||
case 2 : b+=k[1];
|
||||
case 1 : a+=k[0];
|
||||
vt_hashfinal(a,b,c);
|
||||
case 0: /* case 0: nothing left to add */
|
||||
break;
|
||||
}
|
||||
/*------------------------------------------------------ report the result */
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
hashtriple() -- This is the same as hashword() for up to three single 4-byte
|
||||
integers to hash.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
static inline uint32_t vt_hashtriple( uint32_t a, uint32_t b, uint32_t c,
|
||||
uint32_t initval)
|
||||
{
|
||||
/* Set up the internal state */
|
||||
a += 0xdeadbeef + 12/*(3<<2)*/ + initval;
|
||||
b += 0xdeadbeef + 12/*(3<<2)*/ + initval;
|
||||
c += 0xdeadbeef + 12/*(3<<2)*/ + initval;
|
||||
|
||||
vt_hashfinal(a,b,c);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
hashlittle() -- hash a variable-length key into a 32-bit value
|
||||
k : the key (the unaligned variable-length array of bytes)
|
||||
length : the length of the key, counting by bytes
|
||||
initval : can be any 4-byte value
|
||||
Returns a 32-bit value. Every bit of the key affects every bit of
|
||||
the return value. Two keys differing by one or two bits will have
|
||||
totally different hash values.
|
||||
|
||||
The best hash table sizes are powers of 2. There is no need to do
|
||||
mod a prime (mod is sooo slow!). If you need less than 32 bits,
|
||||
use a bitmask. For example, if you need only 10 bits, do
|
||||
h = (h & hashmask(10));
|
||||
In which case, the hash table should have hashsize(10) elements.
|
||||
|
||||
If you are hashing n strings (uint8_t **)k, do it like this:
|
||||
for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
|
||||
|
||||
By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
|
||||
code any way you wish, private, educational, or commercial. It's free.
|
||||
|
||||
Use for hash table lookup, or anything where one collision in 2^^32 is
|
||||
acceptable. Do NOT use for cryptographic purposes.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
static inline uint32_t vt_hashlittle(const void *key, size_t length,
|
||||
uint32_t initval)
|
||||
{
|
||||
uint32_t a,b,c; /* internal state */
|
||||
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
|
||||
|
||||
/* Set up the internal state */
|
||||
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
||||
|
||||
u.ptr = key;
|
||||
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
|
||||
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
|
||||
|
||||
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
b += k[1];
|
||||
c += k[2];
|
||||
vt_hashmix(a,b,c);
|
||||
length -= 12;
|
||||
k += 3;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
/*
|
||||
* "k[2]&0xffffff" actually reads beyond the end of the string, but
|
||||
* then masks off the part it's not allowed to read. Because the
|
||||
* string is aligned, the masked-off tail is in the same word as the
|
||||
* rest of the string. Every machine with memory protection I've seen
|
||||
* does it on word boundaries, so is OK with this. But VALGRIND will
|
||||
* still catch it and complain. The masking trick does make the hash
|
||||
* noticably faster for short strings (like English words).
|
||||
*/
|
||||
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
|
||||
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
|
||||
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
|
||||
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
|
||||
case 5 : b+=k[1]&0xff; a+=k[0]; break;
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=k[0]&0xffffff; break;
|
||||
case 2 : a+=k[0]&0xffff; break;
|
||||
case 1 : a+=k[0]&0xff; break;
|
||||
case 0 : return c; /* zero length strings require no mixing */
|
||||
}
|
||||
|
||||
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
|
||||
const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
|
||||
const uint8_t *k8;
|
||||
|
||||
/*--------------- all but last block: aligned reads and different mixing */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0] + (((uint32_t)k[1])<<16);
|
||||
b += k[2] + (((uint32_t)k[3])<<16);
|
||||
c += k[4] + (((uint32_t)k[5])<<16);
|
||||
vt_hashmix(a,b,c);
|
||||
length -= 12;
|
||||
k += 6;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
k8 = (const uint8_t *)k;
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
|
||||
case 10: c+=k[4];
|
||||
b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 9 : c+=k8[8]; /* fall through */
|
||||
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
|
||||
case 6 : b+=k[2];
|
||||
a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 5 : b+=k8[4]; /* fall through */
|
||||
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
|
||||
break;
|
||||
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
|
||||
case 2 : a+=k[0];
|
||||
break;
|
||||
case 1 : a+=k8[0];
|
||||
break;
|
||||
case 0 : return c; /* zero length requires no mixing */
|
||||
}
|
||||
|
||||
} else { /* need to read the key one byte at a time */
|
||||
const uint8_t *k = (const uint8_t *)key;
|
||||
|
||||
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
a += ((uint32_t)k[1])<<8;
|
||||
a += ((uint32_t)k[2])<<16;
|
||||
a += ((uint32_t)k[3])<<24;
|
||||
b += k[4];
|
||||
b += ((uint32_t)k[5])<<8;
|
||||
b += ((uint32_t)k[6])<<16;
|
||||
b += ((uint32_t)k[7])<<24;
|
||||
c += k[8];
|
||||
c += ((uint32_t)k[9])<<8;
|
||||
c += ((uint32_t)k[10])<<16;
|
||||
c += ((uint32_t)k[11])<<24;
|
||||
vt_hashmix(a,b,c);
|
||||
length -= 12;
|
||||
k += 12;
|
||||
}
|
||||
|
||||
/*-------------------------------- last block: affect all 32 bits of (c) */
|
||||
switch(length) /* all the case statements fall through */
|
||||
{
|
||||
case 12: c+=((uint32_t)k[11])<<24;
|
||||
case 11: c+=((uint32_t)k[10])<<16;
|
||||
case 10: c+=((uint32_t)k[9])<<8;
|
||||
case 9 : c+=k[8];
|
||||
case 8 : b+=((uint32_t)k[7])<<24;
|
||||
case 7 : b+=((uint32_t)k[6])<<16;
|
||||
case 6 : b+=((uint32_t)k[5])<<8;
|
||||
case 5 : b+=k[4];
|
||||
case 4 : a+=((uint32_t)k[3])<<24;
|
||||
case 3 : a+=((uint32_t)k[2])<<16;
|
||||
case 2 : a+=((uint32_t)k[1])<<8;
|
||||
case 1 : a+=k[0];
|
||||
break;
|
||||
case 0 : return c;
|
||||
}
|
||||
}
|
||||
|
||||
vt_hashfinal(a,b,c);
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
-------------------------------------------------------------------------------
|
||||
hashbig() -- This is the same as hashword() on big-endian machines.
|
||||
It is different from hashlittle() on all machines. hashbig() takes advantage
|
||||
of big-endian byte ordering.
|
||||
-------------------------------------------------------------------------------
|
||||
*/
|
||||
static inline uint32_t vt_hashbig(const void *key, size_t length,
|
||||
uint32_t initval)
|
||||
{
|
||||
uint32_t a,b,c;
|
||||
union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
|
||||
|
||||
/* Set up the internal state */
|
||||
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
|
||||
|
||||
u.ptr = key;
|
||||
if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
|
||||
const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
|
||||
|
||||
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += k[0];
|
||||
b += k[1];
|
||||
c += k[2];
|
||||
vt_hashmix(a,b,c);
|
||||
length -= 12;
|
||||
k += 3;
|
||||
}
|
||||
|
||||
/*----------------------------- handle the last (probably partial) block */
|
||||
/*
|
||||
* "k[2]<<8" actually reads beyond the end of the string, but
|
||||
* then shifts out the part it's not allowed to read. Because the
|
||||
* string is aligned, the illegal read is in the same word as the
|
||||
* rest of the string. Every machine with memory protection I've seen
|
||||
* does it on word boundaries, so is OK with this. But VALGRIND will
|
||||
* still catch it and complain. The masking trick does make the hash
|
||||
* noticably faster for short strings (like English words).
|
||||
*/
|
||||
|
||||
switch(length)
|
||||
{
|
||||
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
|
||||
case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
|
||||
case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
|
||||
case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
|
||||
case 8 : b+=k[1]; a+=k[0]; break;
|
||||
case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
|
||||
case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
|
||||
case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
|
||||
case 4 : a+=k[0]; break;
|
||||
case 3 : a+=k[0]&0xffffff00; break;
|
||||
case 2 : a+=k[0]&0xffff0000; break;
|
||||
case 1 : a+=k[0]&0xff000000; break;
|
||||
case 0 : return c; /* zero length strings require no mixing */
|
||||
}
|
||||
|
||||
} else { /* need to read the key one byte at a time */
|
||||
const uint8_t *k = (const uint8_t *)key;
|
||||
|
||||
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
|
||||
while (length > 12)
|
||||
{
|
||||
a += ((uint32_t)k[0])<<24;
|
||||
a += ((uint32_t)k[1])<<16;
|
||||
a += ((uint32_t)k[2])<<8;
|
||||
a += ((uint32_t)k[3]);
|
||||
b += ((uint32_t)k[4])<<24;
|
||||
b += ((uint32_t)k[5])<<16;
|
||||
b += ((uint32_t)k[6])<<8;
|
||||
b += ((uint32_t)k[7]);
|
||||
c += ((uint32_t)k[8])<<24;
|
||||
c += ((uint32_t)k[9])<<16;
|
||||
c += ((uint32_t)k[10])<<8;
|
||||
c += ((uint32_t)k[11]);
|
||||
vt_hashmix(a,b,c);
|
||||
length -= 12;
|
||||
k += 12;
|
||||
}
|
||||
|
||||
/*-------------------------------- last block: affect all 32 bits of (c) */
|
||||
switch(length) /* all the case statements fall through */
|
||||
{
|
||||
case 12: c+=k[11];
|
||||
case 11: c+=((uint32_t)k[10])<<8;
|
||||
case 10: c+=((uint32_t)k[9])<<16;
|
||||
case 9 : c+=((uint32_t)k[8])<<24;
|
||||
case 8 : b+=k[7];
|
||||
case 7 : b+=((uint32_t)k[6])<<8;
|
||||
case 6 : b+=((uint32_t)k[5])<<16;
|
||||
case 5 : b+=((uint32_t)k[4])<<24;
|
||||
case 4 : a+=k[3];
|
||||
case 3 : a+=((uint32_t)k[2])<<8;
|
||||
case 2 : a+=((uint32_t)k[1])<<16;
|
||||
case 1 : a+=((uint32_t)k[0])<<24;
|
||||
break;
|
||||
case 0 : return c;
|
||||
}
|
||||
}
|
||||
|
||||
vt_hashfinal(a,b,c);
|
||||
return c;
|
||||
}
|
||||
|
||||
#endif /* HASH_H */
|
||||
|
||||
|
@ -482,6 +482,7 @@ EXTRA_DIST = \
|
||||
vt_pform_altix.c \
|
||||
vt_pform_bgl.c \
|
||||
vt_pform_bgp.c \
|
||||
vt_pform_bgq.c \
|
||||
vt_pform_crayt3e.c \
|
||||
vt_pform_crayx1.c \
|
||||
vt_pform_crayxe.c \
|
||||
|
@ -33,16 +33,16 @@
|
||||
|
||||
#if (defined(HAVE_DL) && HAVE_DL) && (defined(HAVE_DECL_RTLD_DEFAULT) && HAVE_DECL_RTLD_DEFAULT)
|
||||
# include <dlfcn.h>
|
||||
# define GET_ADDR_OF_UNDEF_FUNC(func) \
|
||||
DEREF_IA64_FUNC_PTR(dlsym(RTLD_DEFAULT, (func)))
|
||||
# define GET_SO_FUNC_ADDR(func) \
|
||||
GET_IA64_FUNC_ADDR(dlsym(RTLD_DEFAULT, (func)))
|
||||
#else /* HAVE_DL && HAVE_DECL_RTLD_DEFAULT */
|
||||
# define GET_ADDR_OF_UNDEF_FUNC(func) 0
|
||||
# define GET_SO_FUNC_ADDR(func) 0
|
||||
#endif /* HAVE_DL && HAVE_DECL_RTLD_DEFAULT */
|
||||
|
||||
#ifdef __ia64__
|
||||
# define DEREF_IA64_FUNC_PTR(ptr) ((ptr) ? *(void**)(ptr) : (ptr))
|
||||
# define GET_IA64_FUNC_ADDR(addr) (long)((addr) ? *(void**)(addr) : (addr))
|
||||
#else /* __ia64__ */
|
||||
# define DEREF_IA64_FUNC_PTR(ptr) (ptr)
|
||||
# define GET_IA64_FUNC_ADDR(addr) (long)(addr)
|
||||
#endif /* __ia64__ */
|
||||
|
||||
#ifdef VT_COMPINST_CRAYCCE
|
||||
@ -198,7 +198,7 @@ static void get_symtab(void)
|
||||
char delim[2] = " ";
|
||||
int nc = 1;
|
||||
|
||||
long addr = -1;
|
||||
long addr = 0;
|
||||
char* filename = NULL;
|
||||
char* funcname = NULL;
|
||||
unsigned int lno = VT_NO_LNO;
|
||||
@ -246,6 +246,14 @@ static void get_symtab(void)
|
||||
if ( line[strlen(line)-1] == '\n' )
|
||||
line[strlen(line)-1] = '\0';
|
||||
|
||||
/* ignore line if it is empty */
|
||||
if ( *line == '\0' )
|
||||
continue;
|
||||
|
||||
/* ignore nm input file name */
|
||||
if ( line[strlen(line)-1] == ':' )
|
||||
continue;
|
||||
|
||||
/* split line to columns */
|
||||
col = strtok(line, delim);
|
||||
do
|
||||
@ -253,11 +261,11 @@ static void get_symtab(void)
|
||||
if ( nc == 1 ) /* column 1 (address) */
|
||||
{
|
||||
/* if there is no address in the first column the symbol could be
|
||||
undefined; try get its address later (nc==3) */
|
||||
defined within a shared object; try get its address later (nc==3) */
|
||||
if ( strlen(col) == 1 )
|
||||
{
|
||||
nc++; /* <- will be 3 in the next round */
|
||||
strcpy(delim, "\t");
|
||||
*delim = '\t';
|
||||
}
|
||||
/* otherwise, convert address string */
|
||||
else
|
||||
@ -273,22 +281,27 @@ static void get_symtab(void)
|
||||
parse_error = 1;
|
||||
break;
|
||||
}
|
||||
strcpy(delim, "\t");
|
||||
|
||||
*delim = '\t';
|
||||
}
|
||||
else if ( nc == 3 ) /* column 3 (symbol) */
|
||||
{
|
||||
funcname = col;
|
||||
strcpy(delim, ":");
|
||||
long soaddr;
|
||||
|
||||
/* try to get address of undefined function, if necessary */
|
||||
if ( addr == -1 )
|
||||
addr = (long)GET_ADDR_OF_UNDEF_FUNC(funcname);
|
||||
funcname = col;
|
||||
|
||||
/* the symbol might be defined within a shared object; try to get
|
||||
its real address */
|
||||
if ( ( soaddr = GET_SO_FUNC_ADDR(funcname) ) != 0 )
|
||||
addr = soaddr;
|
||||
|
||||
/* ignore function, if its address could not be determined */
|
||||
if ( addr == 0 )
|
||||
break;
|
||||
|
||||
*delim = ':';
|
||||
}
|
||||
else if( nc == 4 ) /* column 4 (filename) */
|
||||
else if ( nc == 4 ) /* column 4 (filename) */
|
||||
{
|
||||
filename = col;
|
||||
}
|
||||
@ -458,11 +471,11 @@ void gnu_finalize()
|
||||
*/
|
||||
|
||||
void __cyg_profile_func_enter(void* func, void* callsite) {
|
||||
void* funcptr;
|
||||
long addr;
|
||||
uint64_t time;
|
||||
HashNode* hn;
|
||||
|
||||
funcptr = DEREF_IA64_FUNC_PTR(func);
|
||||
addr = GET_IA64_FUNC_ADDR(func);
|
||||
|
||||
/* -- if not yet initialized, initialize VampirTrace -- */
|
||||
if ( gnu_init ) {
|
||||
@ -482,7 +495,7 @@ void __cyg_profile_func_enter(void* func, void* callsite) {
|
||||
time = vt_pform_wtime();
|
||||
|
||||
/* -- get region identifier -- */
|
||||
if ( (hn = hash_get((long)funcptr))) {
|
||||
if ( (hn = hash_get(addr)) ) {
|
||||
if ( hn->vtid == VT_NO_ID ) {
|
||||
/* -- region entered the first time, register region -- */
|
||||
#if (defined(VT_MT) || defined(VT_HYB))
|
||||
@ -508,10 +521,10 @@ void __cyg_profile_func_enter(void* func, void* callsite) {
|
||||
*/
|
||||
|
||||
void __cyg_profile_func_exit(void* func, void* callsite) {
|
||||
void* funcptr;
|
||||
long addr;
|
||||
uint64_t time;
|
||||
|
||||
funcptr = DEREF_IA64_FUNC_PTR(func);
|
||||
addr = GET_IA64_FUNC_ADDR(func);
|
||||
|
||||
/* -- if VampirTrace already finalized, return -- */
|
||||
if ( !vt_is_alive ) return;
|
||||
@ -521,7 +534,7 @@ void __cyg_profile_func_exit(void* func, void* callsite) {
|
||||
time = vt_pform_wtime();
|
||||
|
||||
/* -- write exit record -- */
|
||||
if ( hash_get((long)funcptr) ) {
|
||||
if ( hash_get(addr) ) {
|
||||
vt_exit(VT_CURRENT_THREAD, &time);
|
||||
}
|
||||
|
||||
|
@ -454,6 +454,7 @@ typedef struct vtcudaDev_st
|
||||
buffer_t conf_stack; /**< top of the kernel configure stack */
|
||||
VTCUDABufEvt *evtbuf; /**< the preallocated cuda event list */
|
||||
VTCUDABufEvt *evtbuf_pos; /**< current unused event space */
|
||||
uint8_t reset; /**< has the device been reset? */
|
||||
struct vtcudaDev_st *next; /**< pointer to next element in list */
|
||||
}VTCUDADevice;
|
||||
|
||||
@ -466,7 +467,7 @@ static VTCUDADevice* cudaDevices = NULL;
|
||||
* function pointer.
|
||||
*/
|
||||
typedef struct knSymbol_st {
|
||||
const char* pointer; /**< the host function */
|
||||
VT_CUDARTWRAP_COMPAT_PTR pointer; /**< the host function */
|
||||
const char* knSymbolName; /**< name of the CUDA kernel symbol */
|
||||
/*char *name[VTGPU_KERNEL_STRING_SIZE]; *< demangled name of the cuda kernel */
|
||||
uint32_t rid; /**< region id for this kernel */
|
||||
@ -511,7 +512,7 @@ static cudaError_t (*cudaPointerGetAttributes_ptr)(struct cudaPointerAttributes
|
||||
*/
|
||||
static void VTCUDAflush(VTCUDADevice*, uint32_t);
|
||||
static VTCUDADevice* VTCUDAgetDevice(uint32_t ptid);
|
||||
static VTCUDAkernelSymbol* getKernelByHostFunction(const char* hostFun);
|
||||
static VTCUDAkernelSymbol* getKernelByHostFunction(VT_CUDARTWRAP_COMPAT_PTR hostFun);
|
||||
|
||||
/*
|
||||
* Checks if a CUDA runtime API call returns successful and respectively prints
|
||||
@ -814,19 +815,19 @@ static void VTCUDAremoveDevice(VTCUDADevice *vtDev)
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleans up the structure of a VampirTrace CUDA device.
|
||||
* Reset the structure of a VampirTrace CUDA device.
|
||||
*
|
||||
* @param ptid the VampirTrace thread, which executes this cleanup
|
||||
* @param vtDev pointer to VampirTrace CUDA device structure to be cleaned up
|
||||
* @param cleanEvents cleanup CUDA events? 1 - yes, 0 - no
|
||||
*/
|
||||
static void VTCUDAcleanupDevice(uint32_t ptid, VTCUDADevice *vtDev,
|
||||
uint8_t cleanEvents)
|
||||
static void VTCUDAresetDevice(uint32_t ptid, VTCUDADevice *vtDev,
|
||||
uint8_t cleanEvents)
|
||||
{
|
||||
/* check if device already cleanup (e.g. with cudaThreadExit() call) */
|
||||
if(vtDev == NULL) return;
|
||||
if(vtDev == NULL || vtDev->reset == 1) return;
|
||||
|
||||
vt_cntl_msg(2, "[CUDART] Cleanup device %d (tid: %d)", vtDev->device, ptid);
|
||||
vt_cntl_msg(2, "[CUDART] Reset device %d (tid: %d)", vtDev->device, ptid);
|
||||
|
||||
/* do not call CUDA functions, if debugging mode */
|
||||
if(vt_gpu_debug) cleanEvents = 0;
|
||||
@ -871,18 +872,6 @@ static void VTCUDAcleanupDevice(uint32_t ptid, VTCUDADevice *vtDev,
|
||||
}
|
||||
#endif
|
||||
|
||||
/* write idle end time to CUDA stream 0 */
|
||||
if(vt_gpu_trace_idle == 1){
|
||||
uint64_t idle_end = vt_pform_wtime();
|
||||
vt_exit(vtDev->strmList->tid, &idle_end);
|
||||
}
|
||||
|
||||
/* cleanup stream list */
|
||||
if(vtDev->strmList != NULL){
|
||||
free(vtDev->strmList);
|
||||
vtDev->strmList = NULL;
|
||||
}
|
||||
|
||||
if(trace_events){
|
||||
/* destroy CUDA events (cudaThreadExit() implicitly destroys events) */
|
||||
if(vtDev->evtbuf != NULL){
|
||||
@ -898,10 +887,8 @@ static void VTCUDAcleanupDevice(uint32_t ptid, VTCUDADevice *vtDev,
|
||||
VT_CUDART_CALL(ret, "cudaEventDestroy failed");
|
||||
}
|
||||
|
||||
/* free the event buffer */
|
||||
free(vtDev->evtbuf);
|
||||
vtDev->evtbuf = NULL;
|
||||
vtDev->evtbuf_pos = NULL;
|
||||
/* reset event buffer */
|
||||
vtDev->evtbuf_pos = vtDev->evtbuf;
|
||||
}
|
||||
|
||||
/* destroy synchronization events */
|
||||
@ -913,13 +900,20 @@ static void VTCUDAcleanupDevice(uint32_t ptid, VTCUDADevice *vtDev,
|
||||
}
|
||||
|
||||
/* cleanup entry buffer */
|
||||
if(vtDev->asyncbuf != NULL){
|
||||
free(vtDev->asyncbuf);
|
||||
vtDev->asyncbuf = NULL;
|
||||
vtDev->buf_pos = vtDev->asyncbuf;
|
||||
vtDev->conf_stack = vtDev->buf_size;
|
||||
}
|
||||
|
||||
/* reset stream list (set streams as destroyed to be reusable later on) */
|
||||
{
|
||||
VTCUDAStrm *tmpStrm = vtDev->strmList;
|
||||
while(tmpStrm != NULL){
|
||||
tmpStrm->destroyed = 1;
|
||||
tmpStrm = tmpStrm->next;
|
||||
}
|
||||
}
|
||||
|
||||
/* free cuda malloc entries, if application didn't do this yet */
|
||||
/* free CUDA malloc entries, if application didn't do this yet */
|
||||
while(vtDev->mallocList != NULL){
|
||||
VTCUDAmalloc *tmpM = vtDev->mallocList;
|
||||
|
||||
@ -930,6 +924,44 @@ static void VTCUDAcleanupDevice(uint32_t ptid, VTCUDADevice *vtDev,
|
||||
free(tmpM);
|
||||
tmpM = NULL;
|
||||
}
|
||||
vtDev->mallocList = NULL;
|
||||
|
||||
/* reset other device parameters */
|
||||
vtDev->reset = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleans up the structure of a VampirTrace CUDA device.
|
||||
*
|
||||
* @param ptid the VampirTrace thread, which executes this cleanup
|
||||
* @param vtDev pointer to VampirTrace CUDA device structure to be cleaned up
|
||||
* @param cleanEvents cleanup CUDA events? 1 - yes, 0 - no
|
||||
*/
|
||||
static void VTCUDAcleanupDevice(uint32_t ptid, VTCUDADevice *vtDev,
|
||||
uint8_t cleanEvents)
|
||||
{
|
||||
/* check if device already cleanup (e.g. with cudaThreadExit() call) */
|
||||
if(vtDev == NULL) return;
|
||||
|
||||
VTCUDAresetDevice(ptid, vtDev, cleanEvents);
|
||||
|
||||
vt_cntl_msg(2, "[CUDART] Cleanup device %d (tid: %d)", vtDev->device, ptid);
|
||||
|
||||
/* write idle end time to CUDA stream 0 */
|
||||
if(vt_gpu_trace_idle == 1){
|
||||
uint64_t idle_end = vt_pform_wtime();
|
||||
vt_exit(vtDev->strmList->tid, &idle_end);
|
||||
}
|
||||
|
||||
/* cleanup stream list */
|
||||
while(vtDev->strmList != NULL){
|
||||
VTCUDAStrm *tmpStrm = vtDev->strmList;
|
||||
|
||||
vtDev->strmList = tmpStrm->next;
|
||||
free(tmpStrm);
|
||||
tmpStrm = NULL;
|
||||
}
|
||||
vtDev->strmList = NULL;
|
||||
|
||||
/* free malloc of VTCUDADevice, set pointer to this VT device NULL */
|
||||
VTCUDAremoveDevice(vtDev);
|
||||
@ -1329,6 +1361,7 @@ static VTCUDADevice* VTCUDAcreateDevice(uint32_t ptid, int device)
|
||||
vtDev->evtbuf_pos = NULL;
|
||||
vtDev->strmList = NULL;
|
||||
vtDev->strmNum = 2;
|
||||
vtDev->reset = 0;
|
||||
vtDev->next = NULL;
|
||||
|
||||
#if (defined(CUDART_VERSION) && (CUDART_VERSION >= 3000))
|
||||
@ -1409,6 +1442,51 @@ static VTCUDADevice* VTCUDAcreateDevice(uint32_t ptid, int device)
|
||||
return vtDev;
|
||||
}
|
||||
|
||||
static void VTCUDAsetupDevice(VTCUDADevice* vtDev, int device)
|
||||
{
|
||||
/* async buffer or events may not be used */
|
||||
if(trace_events){
|
||||
int cuDev_save = 0;
|
||||
|
||||
/* set the device to be created, if it is not the current yet
|
||||
(needed for peer2peer copy only) */
|
||||
VT_CUDART_CALL(cudaGetDevice_ptr(&cuDev_save),"cudaGetDevice()");
|
||||
if(cuDev_save != device){
|
||||
VT_CUDART_CALL(cudaSetDevice(device), "cudaSetDevice()");
|
||||
}
|
||||
|
||||
/* --- set VampirTrace - CUDA time synchronization --- */
|
||||
VT_CUDART_CALL(cudaEventCreate_ptr(&(vtDev->sync.strtEvt)),
|
||||
"cudaEventCreate(syncStrtEvt) failed!");
|
||||
|
||||
VT_CUDART_CALL(cudaEventCreate_ptr(&(vtDev->sync.stopEvt)),
|
||||
"cudaEventCreate(syncStopEvt) failed!");
|
||||
|
||||
/* record init event for later synchronization with VampirTrace time */
|
||||
vtDev->sync.strtTime = VTCUDAsynchronizeEvt(vtDev->sync.strtEvt);
|
||||
|
||||
/* set initial memory copy timestamp, if no memory copies are done */
|
||||
vtDev->sync.lastTime = vtDev->sync.strtTime;
|
||||
|
||||
/*if(vt_gpu_debug < 2)*/
|
||||
{/* create CUDA events */
|
||||
size_t i;
|
||||
cudaError_t ret = cudaSuccess;
|
||||
for(i = 0; i < maxEvtNum; i++){
|
||||
cudaEventCreate_ptr(&((vtDev->evtbuf[i]).strt));
|
||||
ret = cudaEventCreate_ptr(&((vtDev->evtbuf[i]).stop));
|
||||
}
|
||||
VT_CUDART_CALL(ret, "cudaEventCreate failed");
|
||||
}
|
||||
|
||||
if(cuDev_save != device){
|
||||
VT_CUDART_CALL(cudaSetDevice(cuDev_save), "cudaSetDevice()");
|
||||
}
|
||||
}
|
||||
|
||||
vtDev->reset = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invokes the device creation for VTCUDA.
|
||||
*
|
||||
@ -1522,6 +1600,9 @@ static VTCUDADevice* VTCUDAcheckThread(cudaStream_t cuStrm, uint32_t ptid,
|
||||
ptrLastStrm->next = VTCUDAcreateStream(vtDev, cuStrm);
|
||||
*vtStrm = ptrLastStrm->next;
|
||||
}
|
||||
|
||||
if(vtDev->reset == 1)
|
||||
VTCUDAsetupDevice(vtDev, device);
|
||||
|
||||
time_check = vt_pform_wtime();
|
||||
vt_exit(ptid, &time_check);
|
||||
@ -1914,14 +1995,16 @@ static void insertKernelSymbol(const char* hostFun, const char* devFunc)
|
||||
kname = vt_cuda_demangleKernel(devFunc);
|
||||
|
||||
/*check to see if demangling failed (name was not mangled). */
|
||||
if(kname == NULL){
|
||||
if(kname == NULL || *kname == '\0'){
|
||||
kname = (char *)devFunc;
|
||||
if(kname == NULL) kname = "unknownKernel";
|
||||
}
|
||||
|
||||
if(vt_cudart_filter){
|
||||
int32_t climit;
|
||||
|
||||
RFG_Filter_get(vt_cudart_filter, kname, NULL, &climit, NULL, NULL);
|
||||
RFG_Filter_getRegionRules(vt_cudart_filter, kname, NULL, &climit,
|
||||
NULL, NULL);
|
||||
|
||||
if(climit == 0){
|
||||
CUDARTWRAP_UNLOCK();
|
||||
@ -1953,7 +2036,7 @@ static void insertKernelSymbol(const char* hostFun, const char* devFunc)
|
||||
* @return the kernel or NULL, if nothing was found
|
||||
* @todo linear search could be replaced with hash
|
||||
*/
|
||||
static VTCUDAkernelSymbol* getKernelByHostFunction(const char* hostFun)
|
||||
static VTCUDAkernelSymbol* getKernelByHostFunction(VT_CUDARTWRAP_COMPAT_PTR hostFun)
|
||||
{
|
||||
VTCUDAkernelSymbol *actual = NULL;
|
||||
|
||||
@ -2291,12 +2374,12 @@ cudaError_t cudaMemcpy2DArrayToArray(struct cudaArray *dst, size_t wOffsetDst,
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaMemcpyToSymbol -- */
|
||||
cudaError_t cudaMemcpyToSymbol(const char *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind)
|
||||
cudaError_t cudaMemcpyToSymbol(VT_CUDARTWRAP_COMPAT_PTR symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyToSymbol",
|
||||
cudaError_t , (const char *, const void *, size_t , size_t , enum cudaMemcpyKind ),
|
||||
cudaError_t , (VT_CUDARTWRAP_COMPAT_PTR, const void *, size_t , size_t , enum cudaMemcpyKind ),
|
||||
NULL, 0);
|
||||
|
||||
VT_CUDART_MEMCPY(symbol, src, kind, count,
|
||||
@ -2307,12 +2390,12 @@ cudaError_t cudaMemcpyToSymbol(const char *symbol, const void *src, size_t coun
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaMemcpyFromSymbol -- */
|
||||
cudaError_t cudaMemcpyFromSymbol(void *dst, const char *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind)
|
||||
cudaError_t cudaMemcpyFromSymbol(void *dst, VT_CUDARTWRAP_COMPAT_PTR symbol, size_t count, size_t offset, enum cudaMemcpyKind kind)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyFromSymbol",
|
||||
cudaError_t , (void *, const char *, size_t , size_t , enum cudaMemcpyKind ),
|
||||
cudaError_t , (void *, VT_CUDARTWRAP_COMPAT_PTR, size_t , size_t , enum cudaMemcpyKind ),
|
||||
NULL, 0);
|
||||
|
||||
VT_CUDART_MEMCPY(dst, symbol, kind, count,
|
||||
@ -2420,12 +2503,12 @@ cudaError_t cudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, const struct c
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaMemcpyToSymbolAsync -- */
|
||||
cudaError_t cudaMemcpyToSymbolAsync(const char *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream)
|
||||
cudaError_t cudaMemcpyToSymbolAsync(VT_CUDARTWRAP_COMPAT_PTR symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyToSymbolAsync",
|
||||
cudaError_t , (const char *, const void *, size_t , size_t , enum cudaMemcpyKind , cudaStream_t ),
|
||||
cudaError_t , (VT_CUDARTWRAP_COMPAT_PTR, const void *, size_t , size_t , enum cudaMemcpyKind , cudaStream_t ),
|
||||
NULL, 0);
|
||||
|
||||
CUDA_MEMCPY_ASYNC(kind, count, stream,
|
||||
@ -2436,12 +2519,12 @@ cudaError_t cudaMemcpyToSymbolAsync(const char *symbol, const void *src, size_t
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaMemcpyFromSymbolAsync -- */
|
||||
cudaError_t cudaMemcpyFromSymbolAsync(void *dst, const char *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream)
|
||||
cudaError_t cudaMemcpyFromSymbolAsync(void *dst, VT_CUDARTWRAP_COMPAT_PTR symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyFromSymbolAsync",
|
||||
cudaError_t , (void *, const char *, size_t , size_t , enum cudaMemcpyKind , cudaStream_t ),
|
||||
cudaError_t , (void *, VT_CUDARTWRAP_COMPAT_PTR, size_t , size_t , enum cudaMemcpyKind , cudaStream_t ),
|
||||
NULL, 0);
|
||||
|
||||
CUDA_MEMCPY_ASYNC(kind, count, stream,
|
||||
@ -2531,7 +2614,7 @@ cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cu
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaLaunch -- */
|
||||
cudaError_t cudaLaunch(const char *entry)
|
||||
cudaError_t cudaLaunch(VT_CUDARTWRAP_COMPAT_PTR entry)
|
||||
{
|
||||
cudaError_t ret;
|
||||
VTCUDADevice *vtDev = NULL;
|
||||
@ -2546,7 +2629,7 @@ cudaError_t cudaLaunch(const char *entry)
|
||||
#endif
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaLaunch",
|
||||
cudaError_t , (const char *), NULL, 0);
|
||||
cudaError_t , (VT_CUDARTWRAP_COMPAT_PTR), NULL, 0);
|
||||
|
||||
if(vt_cudart_trace_enabled){
|
||||
VT_CHECK_THREAD;
|
||||
@ -2930,7 +3013,7 @@ cudaError_t cudaDeviceReset()
|
||||
vt_cntl_msg(2, "cudaDeviceReset called (thread; %d)", ptid);
|
||||
/* cleanup the CUDA device associated to this thread */
|
||||
CUDARTWRAP_LOCK();
|
||||
VTCUDAcleanupDevice(ptid, vtDev, 1);
|
||||
VTCUDAresetDevice(ptid, vtDev, 1);
|
||||
CUDARTWRAP_UNLOCK();
|
||||
VT_LIBWRAP_FUNC_START(vt_cudart_lw); /* no extra if(trace_enabled) */
|
||||
}
|
||||
|
@ -142,12 +142,12 @@ cudaError_t cudaMemset2D(void *devPtr, size_t pitch, int value, size_t width, s
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaGetSymbolAddress -- */
|
||||
|
||||
cudaError_t cudaGetSymbolAddress(void **devPtr, const char *symbol)
|
||||
cudaError_t cudaGetSymbolAddress(void **devPtr, VT_CUDARTWRAP_COMPAT_PTR symbol)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaGetSymbolAddress",
|
||||
cudaError_t , (void **, const char *),
|
||||
cudaError_t , (void **, VT_CUDARTWRAP_COMPAT_PTR),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
@ -161,12 +161,12 @@ cudaError_t cudaGetSymbolAddress(void **devPtr, const char *symbol)
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaGetSymbolSize -- */
|
||||
|
||||
cudaError_t cudaGetSymbolSize(size_t *size, const char *symbol)
|
||||
cudaError_t cudaGetSymbolSize(size_t *size, VT_CUDARTWRAP_COMPAT_PTR symbol)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaGetSymbolSize",
|
||||
cudaError_t , (size_t *, const char *),
|
||||
cudaError_t , (size_t *, VT_CUDARTWRAP_COMPAT_PTR),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
@ -389,12 +389,12 @@ cudaError_t cudaGetTextureAlignmentOffset(size_t *offset, const struct textureR
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaGetTextureReference -- */
|
||||
|
||||
cudaError_t cudaGetTextureReference(const struct textureReference **texref, const char *symbol)
|
||||
cudaError_t cudaGetTextureReference(const struct textureReference **texref, VT_CUDARTWRAP_COMPAT_PTR symbol)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaGetTextureReference",
|
||||
cudaError_t , (const struct textureReference **, const char *),
|
||||
cudaError_t , (const struct textureReference **, VT_CUDARTWRAP_COMPAT_PTR),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
@ -498,12 +498,12 @@ cudaError_t cudaSetupArgument(const void *arg, size_t size, size_t offset)
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaFuncGetAttributes -- */
|
||||
cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const char *func)
|
||||
cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, VT_CUDARTWRAP_COMPAT_PTR func)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaFuncGetAttributes",
|
||||
cudaError_t , (struct cudaFuncAttributes *, const char *),
|
||||
cudaError_t , (struct cudaFuncAttributes *, VT_CUDARTWRAP_COMPAT_PTR),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
@ -794,12 +794,12 @@ cudaError_t cudaMemGetInfo(size_t *free, size_t *total)
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaFuncSetCacheConfig -- */
|
||||
cudaError_t cudaFuncSetCacheConfig(const char *func, enum cudaFuncCache cacheConfig)
|
||||
cudaError_t cudaFuncSetCacheConfig(VT_CUDARTWRAP_COMPAT_PTR func, enum cudaFuncCache cacheConfig)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaFuncSetCacheConfig",
|
||||
cudaError_t , (const char *, enum cudaFuncCache ),
|
||||
cudaError_t , (VT_CUDARTWRAP_COMPAT_PTR, enum cudaFuncCache ),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
@ -966,12 +966,12 @@ cudaError_t cudaGetSurfaceAlignmentOffset(size_t *offset, const struct surfaceR
|
||||
#endif
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaGetSurfaceReference -- */
|
||||
cudaError_t cudaGetSurfaceReference(const struct surfaceReference **surfref, const char *symbol)
|
||||
cudaError_t cudaGetSurfaceReference(const struct surfaceReference **surfref, VT_CUDARTWRAP_COMPAT_PTR symbol)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaGetSurfaceReference",
|
||||
cudaError_t , (const struct surfaceReference **, const char *),
|
||||
cudaError_t , (const struct surfaceReference **, VT_CUDARTWRAP_COMPAT_PTR),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
@ -1562,12 +1562,12 @@ cudaError_t cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config)
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaFuncSetSharedMemConfig -- */
|
||||
cudaError_t cudaFuncSetSharedMemConfig(const char *func, enum cudaSharedMemConfig config)
|
||||
cudaError_t cudaFuncSetSharedMemConfig(VT_CUDARTWRAP_COMPAT_PTR func, enum cudaSharedMemConfig config)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaFuncSetSharedMemConfig",
|
||||
cudaError_t , (const char *, enum cudaSharedMemConfig ),
|
||||
cudaError_t , (VT_CUDARTWRAP_COMPAT_PTR, enum cudaSharedMemConfig ),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
|
@ -26,6 +26,12 @@
|
||||
|
||||
/*#if (defined(VT_CUDARTWRAP))*/
|
||||
|
||||
#if (defined(CUDART_VERSION) && (CUDART_VERSION < 5000))
|
||||
#define VT_CUDARTWRAP_COMPAT_PTR const char *
|
||||
#else
|
||||
#define VT_CUDARTWRAP_COMPAT_PTR const void *
|
||||
#endif
|
||||
|
||||
/* library wrapper object */
|
||||
EXTERN VTLibwrap* vt_cudart_lw;
|
||||
|
||||
|
@ -748,8 +748,9 @@ static void vt_cuptiact_writeKernelRecord(CUpti_ActivityKernel *kernel,
|
||||
}else{
|
||||
char *knName = vt_cuda_demangleKernel(kernel->name);
|
||||
|
||||
if(knName == NULL) {
|
||||
if(knName == NULL || *knName == '\0') {
|
||||
knName = (char *)kernel->name;
|
||||
|
||||
if(knName == NULL) knName = "unknownKernel";
|
||||
}
|
||||
|
||||
|
@ -1227,6 +1227,28 @@ int vt_env_mpitrace()
|
||||
return mpitrace;
|
||||
}
|
||||
|
||||
int vt_env_mpi_ignore_filter()
|
||||
{
|
||||
static int mpi_ignore_filter = -1;
|
||||
char* tmp;
|
||||
|
||||
if (mpi_ignore_filter == -1)
|
||||
{
|
||||
tmp = getenv("VT_MPI_IGNORE_FILTER");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
vt_cntl_msg(2, "VT_MPI_IGNORE_FILTER=%s", tmp);
|
||||
|
||||
mpi_ignore_filter = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
mpi_ignore_filter = 0;
|
||||
}
|
||||
}
|
||||
return mpi_ignore_filter;
|
||||
}
|
||||
|
||||
int vt_env_mpicheck()
|
||||
{
|
||||
static int mpicheck = -1;
|
||||
@ -1927,7 +1949,7 @@ int vt_env_gputrace_kernel()
|
||||
/* perhaps user wrote 'yes' or 'true' */
|
||||
if(cudakernel == 0 && parse_bool(tmp) == 1) cudakernel = 1;
|
||||
|
||||
if(cudakernel > 0)
|
||||
if(cudakernel == 1)
|
||||
vt_warning("VT_GPUTRACE_KERNEL is deprecated, "
|
||||
"use option 'kernel' with VT_GPUTRACE instead!");
|
||||
}
|
||||
|
@ -62,6 +62,7 @@ EXTERN char* vt_env_iolibpathname(void);
|
||||
EXTERN int vt_env_libctrace(void);
|
||||
EXTERN int vt_env_omptrace(void);
|
||||
EXTERN int vt_env_mpitrace(void);
|
||||
EXTERN int vt_env_mpi_ignore_filter(void);
|
||||
EXTERN int vt_env_mpicheck(void);
|
||||
EXTERN int vt_env_mpicheck_errexit(void);
|
||||
EXTERN char* vt_env_rusage(void);
|
||||
|
@ -10,6 +10,8 @@
|
||||
* See the file COPYING in the package base directory for details
|
||||
**/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
@ -176,6 +176,14 @@ uint32_t vt_gpu_get_config(void)
|
||||
feature = strtok(NULL, sep);
|
||||
}
|
||||
|
||||
/* "memusage" requires "runtime" to be set */
|
||||
if(vt_gpu_trace_memusage == 1 &&
|
||||
(vt_gpu_config & VT_GPU_TRACE_RUNTIME_API) != VT_GPU_TRACE_RUNTIME_API){
|
||||
vt_warning("[GPU] The option 'memusage' requires 'runtime' to be set! "
|
||||
"Setting option 'runtime'.");
|
||||
vt_gpu_config |= VT_GPU_TRACE_RUNTIME_API;
|
||||
}
|
||||
|
||||
/* environment variables for further refinement */
|
||||
if(vt_env_gputrace_kernel() > 1)
|
||||
vt_gpu_trace_kernels = (uint8_t)vt_env_gputrace_kernel();
|
||||
@ -407,14 +415,14 @@ char* vt_cuda_demangleKernel(const char* mangled)
|
||||
/***************************** hashing of strings *****************************/
|
||||
#include "util/hash.h"
|
||||
|
||||
#define VT_GPU_HASHTABLE_SIZE 1021
|
||||
/* size of hash table (must be a power of two!) */
|
||||
#define VT_GPU_HASHTABLE_SIZE 1024
|
||||
|
||||
static vt_gpu_hn_string_t* vt_gpu_string_htab[VT_GPU_HASHTABLE_SIZE];
|
||||
|
||||
void* vt_gpu_stringHashPut(const char* n, uint32_t rid)
|
||||
{
|
||||
uint32_t id = (uint32_t)vt_hash((uint8_t*)n, strlen(n), 0)
|
||||
% VT_GPU_HASHTABLE_SIZE;
|
||||
uint32_t id = vt_hash(n, strlen(n), 0) & (VT_GPU_HASHTABLE_SIZE - 1);
|
||||
vt_gpu_hn_string_t *add =
|
||||
(vt_gpu_hn_string_t*)malloc(sizeof(vt_gpu_hn_string_t));
|
||||
|
||||
@ -428,8 +436,7 @@ void* vt_gpu_stringHashPut(const char* n, uint32_t rid)
|
||||
|
||||
void* vt_gpu_stringHashGet(const char* n)
|
||||
{
|
||||
uint32_t id = (uint32_t)vt_hash((uint8_t*)n, strlen(n), 0)
|
||||
% VT_GPU_HASHTABLE_SIZE;
|
||||
uint32_t id = vt_hash(n, strlen(n), 0) & (VT_GPU_HASHTABLE_SIZE - 1);
|
||||
vt_gpu_hn_string_t *curr = vt_gpu_string_htab[id];
|
||||
|
||||
while ( curr ) {
|
||||
|
@ -34,7 +34,10 @@
|
||||
|
||||
#if PAPI_VER_CURRENT >= PAPI_VERSION_NUMBER(3,9,0,0)
|
||||
# define PAPIC
|
||||
#endif
|
||||
#endif /* PAPI_VER_CURRENT >= 3.9.0.0 */
|
||||
#if PAPI_VER_CURRENT >= PAPI_VERSION_NUMBER(5,0,0,0)
|
||||
# define PAPIV
|
||||
#endif /* PAPI_VER_CURRENT >= 5.0.0.0 */
|
||||
|
||||
#ifndef TIMER_PAPI_REAL_CYC
|
||||
# define TIMER_PAPI_REAL_CYC 10
|
||||
@ -260,7 +263,11 @@ static void metric_error(int errcode, char *note)
|
||||
{
|
||||
char errstring[PAPI_MAX_STR_LEN];
|
||||
|
||||
#ifdef PAPIV
|
||||
PAPI_perror(errstring);
|
||||
#else
|
||||
PAPI_perror(errcode, errstring, PAPI_MAX_STR_LEN);
|
||||
#endif
|
||||
if (errcode == PAPI_ESYS) {
|
||||
strncat(errstring, ": ", PAPI_MAX_STR_LEN-strlen(errstring));
|
||||
strncat(errstring, strerror(errno), PAPI_MAX_STR_LEN-strlen(errstring));
|
||||
@ -274,7 +281,11 @@ static void metric_warning(int errcode, char *note)
|
||||
{
|
||||
char errstring[PAPI_MAX_STR_LEN];
|
||||
|
||||
#ifdef PAPIV
|
||||
PAPI_perror(errstring);
|
||||
#else
|
||||
PAPI_perror(errcode, errstring, PAPI_MAX_STR_LEN);
|
||||
#endif
|
||||
if (errcode == PAPI_ESYS) {
|
||||
strncat(errstring, ": ", PAPI_MAX_STR_LEN-strlen(errstring));
|
||||
strncat(errstring, strerror(errno), PAPI_MAX_STR_LEN-strlen(errstring));
|
||||
|
@ -90,6 +90,24 @@ struct VTWin
|
||||
|
||||
#endif /* HAVE_MPI2_1SIDED */
|
||||
|
||||
/*
|
||||
*-----------------------------------------------------------------------------
|
||||
* Global variables
|
||||
*-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* group of MPI_COMM_WORLD */
|
||||
MPI_Group vt_mpi_comm_world_group;
|
||||
|
||||
/* group of MPI_COMM_SELF */
|
||||
MPI_Group vt_mpi_comm_self_group;
|
||||
|
||||
/* process group id of MPI_COMM_WORLD */
|
||||
uint32_t vt_mpi_comm_world_cid = (uint32_t)-1;
|
||||
|
||||
/* process group id of MPI_COMM_SELF */
|
||||
uint32_t vt_mpi_comm_self_cid = (uint32_t)-1;
|
||||
|
||||
/*
|
||||
*-----------------------------------------------------------------------------
|
||||
* Local variables
|
||||
@ -99,12 +117,6 @@ struct VTWin
|
||||
/* MPI_COMM_WORLD definition */
|
||||
static struct VTWorld world;
|
||||
|
||||
/* MPI_COMM_WORLD process group id */
|
||||
static uint32_t world_cid = (uint32_t)-1;
|
||||
|
||||
/* MPI_COMM_SELF process group id */
|
||||
static uint32_t self_cid = (uint32_t)-1;
|
||||
|
||||
/* index to group array */
|
||||
static uint32_t last_group = 0;
|
||||
|
||||
@ -153,6 +165,19 @@ static uint8_t comm_initialized = 0;
|
||||
*-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static uint32_t comm_search(MPI_Comm comm)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
|
||||
while ((i < last_comm) && (comms[i].comm != comm))
|
||||
i++;
|
||||
|
||||
if (i != last_comm)
|
||||
return i;
|
||||
else
|
||||
return (uint32_t)-1;
|
||||
}
|
||||
|
||||
static uint32_t group_search(MPI_Group group)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
@ -234,7 +259,10 @@ void vt_comm_init()
|
||||
vt_error();
|
||||
#endif /* HAVE_MPI2_1SIDED */
|
||||
|
||||
PMPI_Comm_group(MPI_COMM_WORLD, &world.group);
|
||||
PMPI_Comm_group(MPI_COMM_WORLD, &vt_mpi_comm_world_group);
|
||||
PMPI_Comm_group(MPI_COMM_SELF, &vt_mpi_comm_self_group);
|
||||
|
||||
world.group = vt_mpi_comm_world_group;
|
||||
PMPI_Group_size(world.group, &world.size);
|
||||
world.size_grpv = world.size / 8 + (world.size % 8 ? 1 : 0);
|
||||
|
||||
@ -295,6 +323,24 @@ uint32_t vt_rank_to_pe(VT_MPI_INT rank, MPI_Comm comm)
|
||||
return (uint32_t)global_rank;
|
||||
}
|
||||
|
||||
uint32_t vt_rank_to_pe_by_group(VT_MPI_INT rank, MPI_Group group)
|
||||
{
|
||||
VT_MPI_INT global_rank;
|
||||
|
||||
#if defined(HAVE_DECL_MPI_ROOT) && HAVE_DECL_MPI_ROOT
|
||||
if ( rank == MPI_ROOT )
|
||||
{
|
||||
global_rank = (VT_MPI_INT)vt_my_trace;
|
||||
}
|
||||
else
|
||||
#endif /* HAVE_DECL_MPI_ROOT */
|
||||
{
|
||||
PMPI_Group_translate_ranks(group, 1, &rank, world.group, &global_rank);
|
||||
}
|
||||
|
||||
return (uint32_t)global_rank;
|
||||
}
|
||||
|
||||
/*
|
||||
*-----------------------------------------------------------------------------
|
||||
* Communicator management
|
||||
@ -353,9 +399,9 @@ void vt_comm_create(MPI_Comm comm)
|
||||
VTTHRD_UNLOCK_IDS();
|
||||
#endif /* VT_MT || VT_HYB */
|
||||
|
||||
/* save communicator id for fast access in vt_comm_id */
|
||||
if (comm == MPI_COMM_WORLD) world_cid = cid;
|
||||
else if (comm == MPI_COMM_SELF) self_cid = cid;
|
||||
/* save communicator id for fast access in VT_COMM_ID */
|
||||
if (comm == MPI_COMM_WORLD) vt_mpi_comm_world_cid = cid;
|
||||
else if (comm == MPI_COMM_SELF) vt_mpi_comm_self_cid = cid;
|
||||
|
||||
/* enter comm in comms[] array */
|
||||
comms[last_comm].comm = comm;
|
||||
@ -376,15 +422,12 @@ void vt_comm_free(MPI_Comm comm)
|
||||
/* if more than one communicator exists, we need to search for the entry */
|
||||
else if (last_comm > 1)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint32_t i;
|
||||
|
||||
while(i < last_comm && comms[i].comm != comm)
|
||||
i++;
|
||||
|
||||
if (i < last_comm--)
|
||||
if ((i = comm_search(comm)) != (uint32_t)-1)
|
||||
{
|
||||
/* swap deletion candidate with last entry in the list */
|
||||
comms[i] = comms[last_comm];
|
||||
comms[i] = comms[--last_comm];
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -399,15 +442,9 @@ void vt_comm_free(MPI_Comm comm)
|
||||
|
||||
uint32_t vt_comm_id(MPI_Comm comm)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint32_t i;
|
||||
|
||||
if (comm == MPI_COMM_WORLD) return world_cid;
|
||||
else if (comm == MPI_COMM_SELF) return self_cid;
|
||||
|
||||
while(i < last_comm && comms[i].comm != comm)
|
||||
i++;
|
||||
|
||||
if (i != last_comm)
|
||||
if ((i = comm_search(comm)) != (uint32_t)-1)
|
||||
{
|
||||
return comms[i].cid;
|
||||
}
|
||||
@ -498,12 +535,9 @@ void vt_group_free(MPI_Group group)
|
||||
|
||||
uint32_t vt_group_id(MPI_Group group)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
uint32_t i;
|
||||
|
||||
while ((i < last_group) && (groups[i].group != group))
|
||||
i++;
|
||||
|
||||
if (i != last_group)
|
||||
if ((i = group_search(group)) != (uint32_t)-1)
|
||||
{
|
||||
return groups[i].gid;
|
||||
}
|
||||
|
@ -22,12 +22,32 @@
|
||||
#include "config.h"
|
||||
|
||||
#include "vt_inttypes.h"
|
||||
#include "vt_trc.h"
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
/* MPI communicator |-> VampirTrace communicator id */
|
||||
#define VT_COMM_ID(c) \
|
||||
(((c)==MPI_COMM_WORLD) ? vt_mpi_comm_world_cid : \
|
||||
((c)==MPI_COMM_SELF) ? vt_mpi_comm_self_cid : \
|
||||
vt_comm_id(c))
|
||||
|
||||
/* Rank with respect to arbitrary communicator |-> global rank */
|
||||
#define VT_RANK_TO_PE(r,c) \
|
||||
(((c)==MPI_COMM_WORLD) ? (uint32_t)r : \
|
||||
((c)==MPI_COMM_SELF) ? (uint32_t)vt_my_trace : \
|
||||
vt_rank_to_pe(r,c))
|
||||
|
||||
/* Rank with respect to arbitrary group |-> global rank */
|
||||
#define VT_RANK_TO_PE_BY_GROUP(r,g) \
|
||||
(((g)==vt_mpi_comm_world_group) ? (uint32_t)r : \
|
||||
((g)==vt_mpi_comm_self_group) ? (uint32_t)vt_my_trace : \
|
||||
vt_rank_to_pe_by_group(r,g))
|
||||
|
||||
EXTERN void vt_comm_init(void);
|
||||
EXTERN void vt_comm_finalize(void);
|
||||
EXTERN uint32_t vt_rank_to_pe(VT_MPI_INT rank, MPI_Comm comm);
|
||||
EXTERN uint32_t vt_rank_to_pe_by_group(VT_MPI_INT rank, MPI_Group group);
|
||||
|
||||
EXTERN void vt_group_create(MPI_Group group);
|
||||
EXTERN void vt_group_free(MPI_Group group);
|
||||
@ -44,10 +64,16 @@ EXTERN void vt_win_id(MPI_Win win, MPI_Comm* comm, uint32_t* gid, uint32_t*
|
||||
EXTERN void vt_win_set_gid(MPI_Win win, uint32_t gid);
|
||||
#endif /* HAVE_MPI2_1SIDED */
|
||||
|
||||
/* MPI communicator |-> VampirTrace communicator id */
|
||||
#define VT_COMM_ID(c) vt_comm_id(c)
|
||||
/* group of MPI_COMM_WORLD */
|
||||
EXTERN MPI_Group vt_mpi_comm_world_group;
|
||||
|
||||
/* Rank with respect to arbitrary communicator |-> global rank */
|
||||
#define VT_RANK_TO_PE(r,c) (((c)==MPI_COMM_WORLD) ? (uint32_t)r : vt_rank_to_pe(r,c))
|
||||
/* group of MPI_COMM_SELF */
|
||||
EXTERN MPI_Group vt_mpi_comm_self_group;
|
||||
|
||||
/* process group id of MPI_COMM_WORLD */
|
||||
EXTERN uint32_t vt_mpi_comm_world_cid;
|
||||
|
||||
/* process group id of MPI_COMM_SELF */
|
||||
EXTERN uint32_t vt_mpi_comm_self_cid;
|
||||
|
||||
#endif
|
||||
|
@ -32,6 +32,7 @@
|
||||
*-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* size of hash table (must be a power of two!) */
|
||||
#define HASH_MAX 1024
|
||||
|
||||
typedef struct HN_file {
|
||||
@ -44,7 +45,7 @@ static HashNode_file* htab_mpifile[HASH_MAX];
|
||||
|
||||
static void hash_put( const char* n, uint32_t i )
|
||||
{
|
||||
uint32_t id = (uint32_t)vt_hash((uint8_t*)n, strlen(n), 0) & (HASH_MAX - 1);
|
||||
uint32_t id = vt_hash(n, strlen(n), 0) & (HASH_MAX - 1);
|
||||
|
||||
HashNode_file* add = (HashNode_file*)malloc(sizeof(HashNode_file));
|
||||
add->fname = vt_strdup(n);
|
||||
@ -55,7 +56,7 @@ static void hash_put( const char* n, uint32_t i )
|
||||
|
||||
static HashNode_file* hash_get( const char* n )
|
||||
{
|
||||
uint32_t id = (uint32_t)vt_hash((uint8_t*)n, strlen(n), 0) & (HASH_MAX - 1);
|
||||
uint32_t id = vt_hash(n, strlen(n), 0) & (HASH_MAX - 1);
|
||||
|
||||
HashNode_file* curr = htab_mpifile[id];
|
||||
while ( curr ) {
|
||||
|
@ -10,6 +10,8 @@
|
||||
* See the file COPYING in the package base directory for details
|
||||
**/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
@ -64,6 +66,10 @@ void vt_request_create(MPI_Request request,
|
||||
{
|
||||
struct VTRequestBlock *new_block;
|
||||
|
||||
MPI_Datatype type;
|
||||
MPI_Group group;
|
||||
VT_MPI_INT intercomm;
|
||||
|
||||
lastidx++;
|
||||
if (lastidx >= VT_REQBLK_SIZE)
|
||||
{
|
||||
@ -100,6 +106,22 @@ void vt_request_create(MPI_Request request,
|
||||
{
|
||||
lastreq++;
|
||||
}
|
||||
|
||||
/* ask for group of comm */
|
||||
PMPI_Comm_test_inter(comm, &intercomm);
|
||||
if (intercomm)
|
||||
PMPI_Comm_remote_group(comm, &group);
|
||||
else
|
||||
PMPI_Comm_group(comm, &group);
|
||||
|
||||
/* duplicate data type due to it could be freed before the communication
|
||||
is completed */
|
||||
#if defined(HAVE_MPI_TYPE_DUP) && HAVE_MPI_TYPE_DUP
|
||||
PMPI_Type_dup(datatype, &type);
|
||||
#else /* HAVE_MPI_TYPE_DUP */
|
||||
type = datatype;
|
||||
#endif /* HAVE_MPI_TYPE_DUP */
|
||||
|
||||
/* store request information */
|
||||
lastreq->request = request;
|
||||
lastreq->flags = ERF_NONE;
|
||||
@ -107,8 +129,9 @@ void vt_request_create(MPI_Request request,
|
||||
lastreq->tag = tag;
|
||||
lastreq->dest = dest;
|
||||
lastreq->bytes = bytes;
|
||||
lastreq->datatype = datatype;
|
||||
lastreq->comm = comm;
|
||||
lastreq->datatype = type;
|
||||
lastreq->group = group;
|
||||
lastreq->cid = VT_COMM_ID(comm);
|
||||
}
|
||||
|
||||
void vt_iorequest_create( MPI_Request request,
|
||||
@ -120,6 +143,8 @@ void vt_iorequest_create( MPI_Request request,
|
||||
{
|
||||
struct VTRequestBlock *new_block;
|
||||
|
||||
MPI_Datatype type;
|
||||
|
||||
lastidx++;
|
||||
if (lastidx >= VT_REQBLK_SIZE)
|
||||
{
|
||||
@ -156,9 +181,18 @@ void vt_iorequest_create( MPI_Request request,
|
||||
{
|
||||
lastreq++;
|
||||
}
|
||||
|
||||
/* duplicate data type due to it could be freed before the I/O operation
|
||||
is completed */
|
||||
#if defined(HAVE_MPI_TYPE_DUP) && HAVE_MPI_TYPE_DUP
|
||||
PMPI_Type_dup(datatype, &type);
|
||||
#else /* HAVE_MPI_TYPE_DUP */
|
||||
type = datatype;
|
||||
#endif /* HAVE_MPI_TYPE_DUP */
|
||||
|
||||
/* store request information */
|
||||
lastreq->request = request;
|
||||
lastreq->datatype = datatype;
|
||||
lastreq->datatype = type;
|
||||
lastreq->flags = ERF_IO;
|
||||
lastreq->matchingid = matchingid;
|
||||
lastreq->handleid = handleid;
|
||||
@ -198,6 +232,11 @@ struct VTRequest* vt_request_get(MPI_Request request)
|
||||
|
||||
void vt_request_free(struct VTRequest* req)
|
||||
{
|
||||
#if defined(HAVE_MPI_TYPE_DUP) && HAVE_MPI_TYPE_DUP
|
||||
/* since the stored data type was duplicated on request creation, free them */
|
||||
PMPI_Type_free(&(req->datatype));
|
||||
#endif /* HAVE_MPI_TYPE_DUP */
|
||||
|
||||
/* delete request by copying last request in place of req */
|
||||
if (!lastreq) {
|
||||
vt_error_msg("INTERNAL ERROR in request handling - no last request");
|
||||
@ -248,8 +287,8 @@ void vt_check_request(uint32_t tid, uint64_t* time, struct VTRequest* req,
|
||||
PMPI_Type_size(req->datatype, &sz);
|
||||
PMPI_Get_count(status, req->datatype, &count);
|
||||
vt_mpi_recv(tid, time,
|
||||
VT_RANK_TO_PE(status->MPI_SOURCE, req->comm),
|
||||
VT_COMM_ID(req->comm), status->MPI_TAG, count * sz);
|
||||
VT_RANK_TO_PE_BY_GROUP(status->MPI_SOURCE, req->group),
|
||||
req->cid, status->MPI_TAG, count * sz);
|
||||
}
|
||||
|
||||
if (record_event && (req->flags & ERF_IO))
|
||||
|
@ -39,7 +39,9 @@ struct VTRequest {
|
||||
int dest;
|
||||
int bytes;
|
||||
MPI_Datatype datatype;
|
||||
MPI_Comm comm;
|
||||
MPI_Group group;
|
||||
uint32_t cid;
|
||||
|
||||
uint64_t matchingid;
|
||||
uint64_t handleid;
|
||||
uint32_t fileid;
|
||||
|
@ -72,9 +72,13 @@
|
||||
VT_MEMHOOKS_ON(); \
|
||||
VTTHRD_MPI_TRACING_ENABLED(VTThrdv[tid]) = env_mpitrace
|
||||
|
||||
/* initialized once from environment variable */
|
||||
/* flag: MPI tracing enabled (env. VT_MPITRACE)? */
|
||||
static uint8_t env_mpitrace = 1;
|
||||
|
||||
/* flag: trace MPI communication events although its corresponding functions
|
||||
are filtered (env. VT_MPI_IGNORE_FILTER)? */
|
||||
static uint8_t env_mpi_ignore_filter = 0;
|
||||
|
||||
/* dummy function 'user' entered */
|
||||
static uint8_t vt_enter_user_called = 0;
|
||||
|
||||
@ -145,9 +149,13 @@ VT_MPI_INT MPI_Init(VT_MPI_INT* argc, char*** argv)
|
||||
/* get calling thread id */
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
/* shall I trace MPI events? */
|
||||
/* MPI tracing enabled? */
|
||||
env_mpitrace = vt_env_mpitrace();
|
||||
|
||||
/* trace MPI communication events although its corresponding functions
|
||||
are filtered? */
|
||||
env_mpi_ignore_filter = vt_env_mpi_ignore_filter();
|
||||
|
||||
if (IS_MPI_TRACE_ON(tid))
|
||||
{
|
||||
uint8_t was_recorded;
|
||||
@ -221,9 +229,13 @@ VT_MPI_INT MPI_Init_thread(VT_MPI_INT* argc, char*** argv, VT_MPI_INT required,
|
||||
/* get calling thread id */
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
/* shall I trace MPI events? */
|
||||
/* MPI tracing enabled? */
|
||||
env_mpitrace = vt_env_mpitrace();
|
||||
|
||||
/* trace MPI communication events although its corresponding functions
|
||||
are filtered? */
|
||||
env_mpi_ignore_filter = vt_env_mpi_ignore_filter();
|
||||
|
||||
if (IS_MPI_TRACE_ON(tid))
|
||||
{
|
||||
uint8_t was_recorded;
|
||||
@ -1559,7 +1571,7 @@ VT_MPI_INT MPI_Send(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -1613,7 +1625,7 @@ VT_MPI_INT MPI_Bsend(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -1667,7 +1679,7 @@ VT_MPI_INT MPI_Rsend(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -1721,7 +1733,7 @@ VT_MPI_INT MPI_Ssend(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -1792,7 +1804,8 @@ VT_MPI_INT MPI_Recv(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (source != MPI_PROC_NULL && result == MPI_SUCCESS && was_recorded)
|
||||
if (source != MPI_PROC_NULL && result == MPI_SUCCESS &&
|
||||
(was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -1845,7 +1858,7 @@ VT_MPI_INT MPI_Sendrecv(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(sendtype, &sz);
|
||||
@ -1876,7 +1889,8 @@ VT_MPI_INT MPI_Sendrecv(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (source != MPI_PROC_NULL && result == MPI_SUCCESS && was_recorded)
|
||||
if (source != MPI_PROC_NULL && result == MPI_SUCCESS &&
|
||||
(was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(recvtype, &sz);
|
||||
@ -1934,7 +1948,7 @@ VT_MPI_INT MPI_Sendrecv_replace(void* buf, VT_MPI_INT count,
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
if (dest != MPI_PROC_NULL && was_recorded )
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
vt_mpi_send(tid, &time, VT_RANK_TO_PE(dest, comm),
|
||||
VT_COMM_ID(comm), sendtag, count * sz);
|
||||
@ -2007,7 +2021,7 @@ VT_MPI_INT MPI_Isend(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -2133,7 +2147,7 @@ VT_MPI_INT MPI_Ibsend(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -2202,7 +2216,7 @@ VT_MPI_INT MPI_Issend(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -2271,7 +2285,7 @@ VT_MPI_INT MPI_Irsend(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (dest != MPI_PROC_NULL && was_recorded)
|
||||
if (dest != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
PMPI_Type_size(datatype, &sz);
|
||||
@ -2359,7 +2373,8 @@ VT_MPI_INT MPI_Wait(MPI_Request* request, MPI_Status* status)
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_check_request(tid, &time, orig_req, status, was_recorded);
|
||||
vt_check_request(tid, &time, orig_req, status,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -2423,7 +2438,7 @@ VT_MPI_INT MPI_Waitall(VT_MPI_INT count, MPI_Request* requests,
|
||||
{
|
||||
orig_req = vt_saved_request_get(i);
|
||||
vt_check_request(tid, &time, orig_req, &(array_of_statuses[i]),
|
||||
was_recorded);
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2485,7 +2500,8 @@ VT_MPI_INT MPI_Waitany(VT_MPI_INT count, MPI_Request* requests,
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
orig_req = vt_saved_request_get(*index);
|
||||
vt_check_request(tid, &time, orig_req, status, was_recorded);
|
||||
vt_check_request(tid, &time, orig_req, status,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -2553,7 +2569,7 @@ VT_MPI_INT MPI_Waitsome(VT_MPI_INT incount, MPI_Request* array_of_requests,
|
||||
{
|
||||
orig_req = vt_saved_request_get(array_of_indices[i]);
|
||||
vt_check_request(tid, &time, orig_req, &(array_of_statuses[i]),
|
||||
was_recorded);
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2615,7 +2631,10 @@ VT_MPI_INT MPI_Test(MPI_Request* request, VT_MPI_INT* flag, MPI_Status* status)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (*flag)
|
||||
vt_check_request(tid, &time, orig_req, status, was_recorded);
|
||||
{
|
||||
vt_check_request(tid, &time, orig_req, status,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -2680,7 +2699,8 @@ VT_MPI_INT MPI_Testany(VT_MPI_INT count, MPI_Request* array_of_requests,
|
||||
if (*flag && *index != MPI_UNDEFINED)
|
||||
{
|
||||
orig_req = vt_saved_request_get(*index);
|
||||
vt_check_request(tid, &time, orig_req, status, was_recorded);
|
||||
vt_check_request(tid, &time, orig_req, status,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
}
|
||||
|
||||
@ -2749,7 +2769,8 @@ VT_MPI_INT MPI_Testall(VT_MPI_INT count, MPI_Request* array_of_requests,
|
||||
{
|
||||
orig_req = vt_saved_request_get(i);
|
||||
vt_check_request(tid, &time, orig_req,
|
||||
&(array_of_statuses[i]), was_recorded);
|
||||
&(array_of_statuses[i]),
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2819,7 +2840,7 @@ VT_MPI_INT MPI_Testsome(VT_MPI_INT incount, MPI_Request* array_of_requests,
|
||||
{
|
||||
orig_req = vt_saved_request_get(array_of_indices[i]);
|
||||
vt_check_request(tid, &time, orig_req, &(array_of_statuses[i]),
|
||||
was_recorded);
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
}
|
||||
|
||||
@ -3155,9 +3176,12 @@ VT_MPI_INT MPI_Start(MPI_Request* request)
|
||||
{
|
||||
req->flags |= ERF_IS_ACTIVE;
|
||||
if ((req->flags & ERF_SEND) && (req->dest != MPI_PROC_NULL) &&
|
||||
(was_recorded))
|
||||
vt_mpi_send(tid, &time, VT_RANK_TO_PE(req->dest, req->comm),
|
||||
VT_COMM_ID(req->comm), req->tag, req->bytes);
|
||||
(was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
vt_mpi_send(tid, &time,
|
||||
VT_RANK_TO_PE_BY_GROUP(req->dest, req->group),
|
||||
req->cid, req->tag, req->bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3220,11 +3244,13 @@ VT_MPI_INT MPI_Startall(VT_MPI_INT count, MPI_Request* array_of_requests)
|
||||
{
|
||||
req->flags |= ERF_IS_ACTIVE;
|
||||
if ((req->flags & ERF_SEND) &&
|
||||
(req->dest != MPI_PROC_NULL) && (was_recorded))
|
||||
vt_mpi_send(tid, &time,
|
||||
VT_RANK_TO_PE(req->dest, req->comm),
|
||||
VT_COMM_ID(req->comm), req->tag,
|
||||
req->bytes);
|
||||
(req->dest != MPI_PROC_NULL) &&
|
||||
(was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
vt_mpi_send(tid, &time,
|
||||
VT_RANK_TO_PE_BY_GROUP(req->dest, req->group),
|
||||
req->cid, req->tag, req->bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3398,7 +3424,7 @@ VT_MPI_INT MPI_Allreduce(void* sendbuf, void* recvbuf, VT_MPI_INT count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT sz;
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTThrdv[tid]);
|
||||
@ -3425,7 +3451,8 @@ VT_MPI_INT MPI_Allreduce(void* sendbuf, void* recvbuf, VT_MPI_INT count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -3465,7 +3492,7 @@ VT_MPI_INT MPI_Barrier(MPI_Comm comm)
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTThrdv[tid]);
|
||||
vt_mpi_collbegin(tid, &time, vt_mpi_regid[VT__MPI_BARRIER],
|
||||
@ -3487,7 +3514,8 @@ VT_MPI_INT MPI_Barrier(MPI_Comm comm)
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -3528,7 +3556,7 @@ VT_MPI_INT MPI_Bcast(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (root != MPI_PROC_NULL && was_recorded)
|
||||
if (root != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT me, sendcount, sz;
|
||||
uint8_t iam_root;
|
||||
@ -3576,7 +3604,8 @@ VT_MPI_INT MPI_Bcast(void* buf, VT_MPI_INT count, MPI_Datatype datatype,
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(root != MPI_PROC_NULL && was_recorded));
|
||||
(root != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter)));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -3619,7 +3648,7 @@ VT_MPI_INT MPI_Gather(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (root != MPI_PROC_NULL && was_recorded)
|
||||
if (root != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT me, N, ssz, rsz;
|
||||
uint8_t iam_root;
|
||||
@ -3685,7 +3714,8 @@ VT_MPI_INT MPI_Gather(void* sendbuf, VT_MPI_INT sendcount,
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(root != MPI_PROC_NULL && was_recorded));
|
||||
(root != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter)));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -3728,7 +3758,7 @@ VT_MPI_INT MPI_Reduce(void* sendbuf, void* recvbuf, VT_MPI_INT count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (root != MPI_PROC_NULL && was_recorded)
|
||||
if (root != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT me, recvcount, sz;
|
||||
uint8_t iam_root;
|
||||
@ -3822,7 +3852,7 @@ VT_MPI_INT MPI_Gatherv(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (root != MPI_PROC_NULL && was_recorded)
|
||||
if (root != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT me, N, recvcount, sendsz, recvsz, i;
|
||||
uint8_t iam_root;
|
||||
@ -3886,7 +3916,8 @@ VT_MPI_INT MPI_Gatherv(void* sendbuf, VT_MPI_INT sendcount,
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(root != MPI_PROC_NULL && was_recorded));
|
||||
(root != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter)));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -3930,7 +3961,7 @@ VT_MPI_INT MPI_Allgather(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT N, sendsz, recvsz;
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTThrdv[tid]);
|
||||
@ -3970,7 +4001,8 @@ VT_MPI_INT MPI_Allgather(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4014,7 +4046,7 @@ VT_MPI_INT MPI_Allgatherv(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT N, recvcount, sendsz, recvsz, i;
|
||||
|
||||
@ -4059,7 +4091,8 @@ VT_MPI_INT MPI_Allgatherv(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4103,7 +4136,7 @@ VT_MPI_INT MPI_Alltoall(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT N, sendsz, recvsz;
|
||||
|
||||
@ -4136,7 +4169,8 @@ VT_MPI_INT MPI_Alltoall(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4181,7 +4215,7 @@ VT_MPI_INT MPI_Alltoallv(void* sendbuf, VT_MPI_INT* sendcounts,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT N, sendcount = 0, recvcount = 0, sendsz, recvsz, i;
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTThrdv[tid]);
|
||||
@ -4218,7 +4252,8 @@ VT_MPI_INT MPI_Alltoallv(void* sendbuf, VT_MPI_INT* sendcounts,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4260,7 +4295,7 @@ VT_MPI_INT MPI_Scan(void* sendbuf, void* recvbuf, VT_MPI_INT count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT me, sz;
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTThrdv[tid]);
|
||||
@ -4290,7 +4325,8 @@ VT_MPI_INT MPI_Scan(void* sendbuf, void* recvbuf, VT_MPI_INT count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4333,7 +4369,7 @@ VT_MPI_INT MPI_Scatter(void* sendbuf, VT_MPI_INT sendcount,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (root != MPI_PROC_NULL && was_recorded)
|
||||
if (root != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT me, N, sendsz, recvsz;
|
||||
uint8_t iam_root;
|
||||
@ -4398,7 +4434,8 @@ VT_MPI_INT MPI_Scatter(void* sendbuf, VT_MPI_INT sendcount,
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(root != MPI_PROC_NULL && was_recorded));
|
||||
(root != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter)));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4443,7 +4480,7 @@ VT_MPI_INT MPI_Scatterv(void* sendbuf, VT_MPI_INT* sendcounts,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (root != MPI_PROC_NULL && was_recorded)
|
||||
if (root != MPI_PROC_NULL && (was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
VT_MPI_INT me, N, sendcount, sendsz, recvsz, i;
|
||||
uint8_t iam_root;
|
||||
@ -4507,7 +4544,8 @@ VT_MPI_INT MPI_Scatterv(void* sendbuf, VT_MPI_INT* sendcounts,
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(root != MPI_PROC_NULL && was_recorded));
|
||||
(root != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter)));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4550,7 +4588,7 @@ VT_MPI_INT MPI_Reduce_scatter(void* sendbuf, void* recvbuf,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT me, N, recvcount, sz, i;
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTThrdv[tid]);
|
||||
@ -4584,7 +4622,8 @@ VT_MPI_INT MPI_Reduce_scatter(void* sendbuf, void* recvbuf,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -4647,7 +4686,8 @@ VT_MPI_INT MPI_Put(void* origin_addr, VT_MPI_INT origin_count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (target_rank != MPI_PROC_NULL && was_recorded)
|
||||
if (target_rank != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
MPI_Comm comm;
|
||||
VT_MPI_INT sz;
|
||||
@ -4715,7 +4755,8 @@ VT_MPI_INT MPI_Get(void* origin_addr, VT_MPI_INT origin_count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (target_rank != MPI_PROC_NULL && was_recorded)
|
||||
if (target_rank != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
MPI_Comm comm;
|
||||
VT_MPI_INT sz;
|
||||
@ -4783,7 +4824,8 @@ VT_MPI_INT MPI_Accumulate(void* origin_addr, VT_MPI_INT origin_count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (target_rank != MPI_PROC_NULL && was_recorded)
|
||||
if (target_rank != MPI_PROC_NULL &&
|
||||
(was_recorded || env_mpi_ignore_filter))
|
||||
{
|
||||
MPI_Comm comm;
|
||||
VT_MPI_INT sz;
|
||||
@ -4848,7 +4890,7 @@ VT_MPI_INT MPI_Win_fence(VT_MPI_INT assert, MPI_Win win)
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
MPI_Comm comm;
|
||||
uint32_t gid, wid;
|
||||
@ -4955,7 +4997,7 @@ VT_MPI_INT MPI_Win_complete(MPI_Win win)
|
||||
|
||||
vt_win_id(win, &comm, &gid, &wid);
|
||||
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
vt_comment(tid, &time, "__RMASPECIALGROUP__");
|
||||
vt_mpi_rma_end(tid, &time, gid, wid);
|
||||
@ -5061,7 +5103,7 @@ VT_MPI_INT MPI_Win_wait(MPI_Win win)
|
||||
|
||||
vt_win_id(win, &comm, &gid, &wid);
|
||||
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
vt_mpi_rma_end(tid, &time, gid, wid);
|
||||
|
||||
vt_win_set_gid(win, VT_COMM_ID(comm));
|
||||
@ -5119,7 +5161,7 @@ VT_MPI_INT MPI_Win_test(MPI_Win win, VT_MPI_INT* flag)
|
||||
|
||||
vt_win_id(win, &comm, &gid, &wid);
|
||||
|
||||
if (*flag && was_recorded)
|
||||
if (*flag && (was_recorded || env_mpi_ignore_filter))
|
||||
vt_mpi_rma_end(tid, &time, gid, wid);
|
||||
|
||||
if (*flag)
|
||||
@ -5227,7 +5269,7 @@ VT_MPI_INT MPI_Win_unlock(VT_MPI_INT rank, MPI_Win win)
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
MPI_Comm comm;
|
||||
uint32_t gid, wid;
|
||||
@ -5293,7 +5335,7 @@ VT_MPI_INT MPI_Alltoallw(void* sendbuf, VT_MPI_INT* sendcounts,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT N, sendcount = 0, recvcount = 0, sendsz, recvsz, i;
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTThrdv[tid]);
|
||||
@ -5330,7 +5372,8 @@ VT_MPI_INT MPI_Alltoallw(void* sendbuf, VT_MPI_INT* sendcounts,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
@ -5372,7 +5415,7 @@ VT_MPI_INT MPI_Exscan(void* sendbuf, void* recvbuf, VT_MPI_INT count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
if (was_recorded)
|
||||
if (was_recorded || env_mpi_ignore_filter)
|
||||
{
|
||||
VT_MPI_INT me, sz;
|
||||
|
||||
@ -5403,7 +5446,8 @@ VT_MPI_INT MPI_Exscan(void* sendbuf, void* recvbuf, VT_MPI_INT count,
|
||||
if (!is_mpi_multithreaded)
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
{
|
||||
vt_mpi_collend(tid, &time, matchid, &comm, was_recorded);
|
||||
vt_mpi_collend(tid, &time, matchid, &comm,
|
||||
(was_recorded || env_mpi_ignore_filter));
|
||||
}
|
||||
|
||||
vt_exit(tid, &time);
|
||||
|
@ -372,11 +372,7 @@ static void hash_put_msg(VTSum* sum, uint32_t peer, uint32_t cid, uint32_t tag,
|
||||
uint32_t id;
|
||||
VTSum_msgHashNode* add;
|
||||
|
||||
id = 0;
|
||||
if ( peer > 0 ) id = vt_hash((uint8_t*)&peer, sizeof(uint32_t), id);
|
||||
if ( cid > 0 ) id = vt_hash((uint8_t*)&cid, sizeof(uint32_t), id);
|
||||
if ( tag > 0 ) id = vt_hash((uint8_t*)&tag, sizeof(uint32_t), id);
|
||||
id &= (VTSUM_HASH_MAX - 1);
|
||||
id = vt_hashtriple(peer, cid, tag, 0) & (VTSUM_HASH_MAX - 1);
|
||||
|
||||
add = (VTSum_msgHashNode*)malloc(sizeof(VTSum_msgHashNode));
|
||||
add->peer = peer;
|
||||
@ -395,11 +391,7 @@ static VTSum_msgHashNode* hash_get_msg(VTSum* sum, uint32_t peer, uint32_t cid,
|
||||
uint32_t id;
|
||||
VTSum_msgHashNode* curr;
|
||||
|
||||
id = 0;
|
||||
if ( peer > 0 ) id = vt_hash((uint8_t*)&peer, sizeof(uint32_t), id);
|
||||
if ( cid > 0 ) id = vt_hash((uint8_t*)&cid, sizeof(uint32_t), id);
|
||||
if ( tag > 0 ) id = vt_hash((uint8_t*)&tag, sizeof(uint32_t), id);
|
||||
id &= (VTSUM_HASH_MAX - 1);
|
||||
id = vt_hashtriple(peer, cid, tag, 0) & (VTSUM_HASH_MAX - 1);
|
||||
|
||||
curr = sum->msg_stat_htab[id];
|
||||
while ( curr ) {
|
||||
@ -442,10 +434,7 @@ static void hash_put_collop(VTSum* sum, uint32_t rid, uint32_t cid,
|
||||
uint32_t id;
|
||||
VTSum_collopHashNode* add;
|
||||
|
||||
id = 0;
|
||||
if ( rid > 0 ) id = vt_hash((uint8_t*)&rid, sizeof(uint32_t), id);
|
||||
if ( cid > 0 ) id = vt_hash((uint8_t*)&cid, sizeof(uint32_t), id);
|
||||
id &= (VTSUM_HASH_MAX - 1);
|
||||
id = vt_hashtriple(rid, cid, 0, 0) & (VTSUM_HASH_MAX - 1);
|
||||
|
||||
add = (VTSum_collopHashNode*)malloc(sizeof(VTSum_collopHashNode));
|
||||
add->rid = rid;
|
||||
@ -463,10 +452,7 @@ static VTSum_collopHashNode* hash_get_collop(VTSum* sum, uint32_t rid,
|
||||
uint32_t id;
|
||||
VTSum_collopHashNode* curr;
|
||||
|
||||
id = 0;
|
||||
if ( rid > 0 ) id = vt_hash((uint8_t*)&rid, sizeof(uint32_t), id);
|
||||
if ( cid > 0 ) id = vt_hash((uint8_t*)&cid, sizeof(uint32_t), id);
|
||||
id &= (VTSUM_HASH_MAX - 1);
|
||||
id = vt_hashtriple(rid, cid, 0, 0) & (VTSUM_HASH_MAX - 1);
|
||||
|
||||
curr = sum->collop_stat_htab[id];
|
||||
while ( curr ) {
|
||||
|
@ -29,13 +29,13 @@
|
||||
# define TIMER_PAPI_REAL_USEC 11
|
||||
#endif
|
||||
|
||||
#if TIMER != TIMER_BGP_GET_TIMEBASE && \
|
||||
#if TIMER != TIMER_GET_TIMEBASE && \
|
||||
TIMER != TIMER_PAPI_REAL_CYC && \
|
||||
TIMER != TIMER_PAPI_REAL_USEC
|
||||
# error Unknown timer specified! Check the timer configuration in 'config.h'.
|
||||
#endif
|
||||
|
||||
#if TIMER == TIMER_BGP_GET_TIMEBASE
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
static uint64_t vt_ticks_per_sec = 1;
|
||||
#elif TIMER == TIMER_PAPI_REAL_CYC
|
||||
extern uint64_t vt_metric_clckrt(void);
|
||||
@ -50,7 +50,7 @@ static _BGP_Personality_t mybgp;
|
||||
/* platform specific initialization */
|
||||
void vt_pform_init() {
|
||||
Kernel_GetPersonality(&mybgp, sizeof(_BGP_Personality_t));
|
||||
#if TIMER == TIMER_BGP_GET_TIMEBASE
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
vt_ticks_per_sec = (uint64_t)BGP_Personality_clockMHz(&mybgp) * 1000000LL;
|
||||
#elif TIMER == TIMER_PAPI_REAL_USEC
|
||||
vt_time_base = vt_metric_real_usec();
|
||||
@ -79,7 +79,7 @@ char* vt_pform_exec()
|
||||
|
||||
/* clock resolution */
|
||||
uint64_t vt_pform_clockres() {
|
||||
#if TIMER == TIMER_BGP_GET_TIMEBASE
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
return vt_ticks_per_sec;
|
||||
#elif TIMER == TIMER_PAPI_REAL_CYC
|
||||
return vt_metric_clckrt();
|
||||
@ -90,7 +90,7 @@ uint64_t vt_pform_clockres() {
|
||||
|
||||
/* local or global wall-clock time in seconds */
|
||||
uint64_t vt_pform_wtime() {
|
||||
#if TIMER == TIMER_BGP_GET_TIMEBASE
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
return (uint64_t)_bgp_GetTimeBase();
|
||||
#elif TIMER == TIMER_PAPI_REAL_CYC
|
||||
return vt_metric_real_cyc();
|
||||
|
133
ompi/contrib/vt/vt/vtlib/vt_pform_bgq.c
Обычный файл
133
ompi/contrib/vt/vt/vtlib/vt_pform_bgq.c
Обычный файл
@ -0,0 +1,133 @@
|
||||
/**
|
||||
* VampirTrace
|
||||
* http://www.tu-dresden.de/zih/vampirtrace
|
||||
*
|
||||
* Copyright (c) 2005-2012, ZIH, TU Dresden, Federal Republic of Germany
|
||||
*
|
||||
* Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing
|
||||
* Centre, Federal Republic of Germany
|
||||
*
|
||||
* See the file COPYING in the package base directory for details
|
||||
**/
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <firmware/include/personality.h>
|
||||
#include <hwi/include/common/uci.h>
|
||||
|
||||
#include "vt_pform.h"
|
||||
#include "vt_defs.h"
|
||||
|
||||
#ifndef TIMER_PAPI_REAL_CYC
|
||||
# define TIMER_PAPI_REAL_CYC 10
|
||||
#endif
|
||||
#ifndef TIMER_PAPI_REAL_USEC
|
||||
# define TIMER_PAPI_REAL_USEC 11
|
||||
#endif
|
||||
|
||||
#if TIMER != TIMER_GET_TIMEBASE && \
|
||||
TIMER != TIMER_PAPI_REAL_CYC && \
|
||||
TIMER != TIMER_PAPI_REAL_USEC
|
||||
# error Unknown timer specified! Check the timer configuration in 'config.h'.
|
||||
#endif
|
||||
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
# include <hwi/include/bqc/A2_inlines.h>
|
||||
static uint64_t vt_ticks_per_sec = 1;
|
||||
#elif TIMER == TIMER_PAPI_REAL_CYC
|
||||
extern uint64_t vt_metric_clckrt(void);
|
||||
extern uint64_t vt_metric_real_cyc(void);
|
||||
#elif TIMER == TIMER_PAPI_REAL_USEC
|
||||
extern uint64_t vt_metric_real_usec(void);
|
||||
static uint64_t vt_time_base = 0;
|
||||
#endif
|
||||
|
||||
static int torus_coord[6];
|
||||
|
||||
static Personality_t mybgq;
|
||||
|
||||
/* platform specific initialization */
|
||||
void vt_pform_init() {
|
||||
Kernel_GetPersonality(&mybgq, sizeof(Personality_t));
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
vt_ticks_per_sec = (uint64_t)mybgq.Kernel_Config.FreqMHz * 1000000LL;
|
||||
#elif TIMER == TIMER_PAPI_REAL_USEC
|
||||
vt_time_base = vt_metric_real_usec();
|
||||
#endif
|
||||
|
||||
torus_coord[0] = mybgq.Network_Config.Acoord;
|
||||
torus_coord[1] = mybgq.Network_Config.Bcoord;
|
||||
torus_coord[2] = mybgq.Network_Config.Ccoord;
|
||||
torus_coord[3] = mybgq.Network_Config.Dcoord;
|
||||
torus_coord[4] = mybgq.Network_Config.Ecoord;
|
||||
torus_coord[5] = Kernel_ProcessorID();
|
||||
}
|
||||
|
||||
/* directory of global file system */
|
||||
char* vt_pform_gdir() {
|
||||
return ".";
|
||||
}
|
||||
|
||||
/* directory of local file system */
|
||||
char* vt_pform_ldir() {
|
||||
#ifdef DEFAULT_PFORM_LDIR
|
||||
return DEFAULT_PFORM_LDIR;
|
||||
#else
|
||||
return ".";
|
||||
#endif
|
||||
}
|
||||
|
||||
/* full path of executable */
|
||||
char* vt_pform_exec()
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* clock resolution */
|
||||
uint64_t vt_pform_clockres() {
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
return vt_ticks_per_sec;
|
||||
#elif TIMER == TIMER_PAPI_REAL_CYC
|
||||
return vt_metric_clckrt();
|
||||
#elif TIMER == TIMER_PAPI_REAL_USEC
|
||||
return 1000000LL;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* local or global wall-clock time in seconds */
|
||||
uint64_t vt_pform_wtime() {
|
||||
#if TIMER == TIMER_GET_TIMEBASE
|
||||
return (uint64_t)GetTimeBase();
|
||||
#elif TIMER == TIMER_PAPI_REAL_CYC
|
||||
return vt_metric_real_cyc();
|
||||
#elif TIMER == TIMER_PAPI_REAL_USEC
|
||||
return vt_metric_real_usec() - vt_time_base;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* unique numeric SMP-node identifier */
|
||||
long vt_pform_node_id() {
|
||||
BG_UniversalComponentIdentifier uci = mybgq.Kernel_Config.UCI;
|
||||
/* use upper part of UCI (26bit; upto ComputeCard; ignore lower 38bit)
|
||||
* but only use the 20 bits (FFFFF) that describe row,col,mp,nb,cc */
|
||||
return ((uci>>38)&0xFFFFF);
|
||||
}
|
||||
|
||||
/* unique string SMP-node identifier */
|
||||
char* vt_pform_node_name() {
|
||||
static char buf[48];
|
||||
BG_UniversalComponentIdentifier uci = mybgq.Kernel_Config.UCI;
|
||||
unsigned int row, col, mp, nb, cc;
|
||||
bg_decodeComputeCardOnNodeBoardUCI(uci, &row, &col, &mp, &nb, &cc);
|
||||
sprintf(buf, "R%x%x-M%d-N%02x-J%02x <%d,%d,%d,%d,%d>", row, col, mp, nb, cc,
|
||||
torus_coord[0], torus_coord[1], torus_coord[2],
|
||||
torus_coord[3], torus_coord[4]);
|
||||
return buf;
|
||||
}
|
||||
|
||||
/* number of CPUs */
|
||||
int vt_pform_num_cpus() {
|
||||
return 64;
|
||||
}
|
||||
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
x
Ссылка в новой задаче
Block a user