diff --git a/ompi/contrib/vt/vt/ChangeLog b/ompi/contrib/vt/vt/ChangeLog index 4ec451992f..3da8bbaca1 100644 --- a/ompi/contrib/vt/vt/ChangeLog +++ b/ompi/contrib/vt/vt/ChangeLog @@ -1,7 +1,62 @@ -5.11.2openmpi +5.12openmpi + - updated version of internal OTF to 1.10openmpi + (see extlib/otf/ChangeLog) + - added support for CUDA runtime tracing via CUPTI callbacks + - added support for process group counters + - extended internal API to: + - create process groups + - assign attributes to process groups + (e.g. is communicator, has counters) + - improved MPI group/communicator handling: + - merge groups which have the same members to one group + - name groups to "MPI Group X" + - added writing of process begin/end records + - added environment variable VT_ONOFF_CHECK_STACK_BALANCE to + enable/disable check for stack level balance when switching tracing + on/off by using the user API + - added configure option '--[enable|disable]-fortran' to control + building of Fortran support + - install a symbolic link 'vtfiltergen[-mpi]' which is a synonym for + 'vtfilter[-mpi] --gen' + - disabled unnecessary time synchronization on NEC SX platforms + - removed limitation of maximum defined MPI handles (groups, comms., + and windows) in a MPI program + - fixed incorrect handling of MPI_IN_PLACE + - fixed erroneous handling of MPI_Aint within the Fortran MPI wrappers + - fixed error handling on flushing the trace buffer + - fixed potential memory corruption when flushing the trace buffer + multiple times (VT_MAX_FLUSHES != 1) + - fixed numbering in thread names + - vtdyn: + - detach from application process before continuing its + execution (allows signal trapping (e.g. Ctrl+C) within the + application) + - fixed selection of functions to be instrumented + - save floating point registers before calling instrumentation + to ensure correct program behavior + - compiler wrappers: + - added options '-vt:preprocess', '-vt:cpp', and '-vt:cppflags' + to preprocess source files before parsing by OPARI and/or TAU + - added option '-vt:nocleanup' to keep intermediate files + - vtsetup: (see tools/vtsetup/ChangeLog:1.0.1) + +5.11.3 + - if possible, get resource usage counters for the calling thread + instead of the process + - removed MPI_Address and MPI_Get_address from MPI wrappers + - fixed incomplete tracking of MPI inter-communicators + - vtunify: fixed parsing of negative time offsets within unify + control data + +5.11.2 + - updated version of internal OTF to 1.9.2sawfish + (see extlib/otf/ChangeLog) - added options '-vt:showme-' to the compiler wrapper to show the compiler/linker flags that would be supplied to the underlying compiler + - add path to MPI library when linking with the compiler wrappers + - do not build vtrun script on BlueGene; shell scripts doesn't work on + the back-end - fixed segmentation fault in vtunify-mpi which might occur during gathering local marker definitions/spots to master process diff --git a/ompi/contrib/vt/vt/VERSION b/ompi/contrib/vt/vt/VERSION index d36af0ab8d..37183008ef 100644 --- a/ompi/contrib/vt/vt/VERSION +++ b/ompi/contrib/vt/vt/VERSION @@ -1 +1 @@ -5.11.2openmpi +5.12openmpi diff --git a/ompi/contrib/vt/vt/acinclude.m4 b/ompi/contrib/vt/vt/acinclude.m4 index 9b926b7717..76ac3aa52e 100644 --- a/ompi/contrib/vt/vt/acinclude.m4 +++ b/ompi/contrib/vt/vt/acinclude.m4 @@ -6,8 +6,9 @@ m4_include(config/m4/acinclude.cpc.m4) m4_include(config/m4/acinclude.cross.m4) m4_include(config/m4/acinclude.csfs.m4) m4_include(config/m4/acinclude.ctool.m4) -m4_include(config/m4/acinclude.cudawrap.m4) +m4_include(config/m4/acinclude.cuda.m4) m4_include(config/m4/acinclude.cupti.m4) +m4_include(config/m4/acinclude.cudawrap.m4) m4_include(config/m4/acinclude.cxxrtlib.m4) m4_include(config/m4/acinclude.dl.m4) m4_include(config/m4/acinclude.dyninst.m4) diff --git a/ompi/contrib/vt/vt/config/defaults/bgp b/ompi/contrib/vt/vt/config/defaults/bgp index 1ee03af0f6..e733f12f95 100644 --- a/ompi/contrib/vt/vt/config/defaults/bgp +++ b/ompi/contrib/vt/vt/config/defaults/bgp @@ -14,4 +14,4 @@ enable_shared="no" enable_cpuidtrace="no" with_cross_prefix="bg" with_mpibgp="yes" -with_cxxrtlib="-L/opt/ibmcmp/lib/bg/bglib -libmc++ -lstdc++" +with_cxxrtlib="-L/opt/ibmcmp/lib/bg/bglib -L/opt/ibmcmp/lib/bg/9.0/bglib -libmc++ -lstdc++" diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.compwrap.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.compwrap.m4 index 3d8fcf6eda..66ea919835 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.compwrap.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.compwrap.m4 @@ -4,6 +4,8 @@ AC_DEFUN([ACVT_COMPWRAP], VT_WRAPPER_CC_EXTRA_COMPILER_FLAGS= VT_WRAPPER_CC_EXTRA_LINKER_FLAGS= VT_WRAPPER_CC_EXTRA_LIBS= + VT_WRAPPER_CC_CPP=$CPP + VT_WRAPPER_CC_EXTRA_CPPFLAGS= VT_WRAPPER_CC_DYNINST_COMPILER_FLAGS= VT_WRAPPER_CC_TAUINST_OPTS= VT_WRAPPER_CC_TAUINST_PARSE_BIN= @@ -15,6 +17,8 @@ AC_DEFUN([ACVT_COMPWRAP], VT_WRAPPER_CXX_EXTRA_COMPILER_FLAGS= VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS= VT_WRAPPER_CXX_EXTRA_LIBS= + VT_WRAPPER_CXX_CPP=$CXXCPP + VT_WRAPPER_CXX_EXTRA_CPPFLAGS= VT_WRAPPER_CXX_DYNINST_COMPILER_FLAGS= VT_WRAPPER_CXX_TAUINST_OPTS= VT_WRAPPER_CXX_TAUINST_PARSE_BIN= @@ -26,6 +30,8 @@ AC_DEFUN([ACVT_COMPWRAP], VT_WRAPPER_F77_EXTRA_COMPILER_FLAGS= VT_WRAPPER_F77_EXTRA_LINKER_FLAGS= VT_WRAPPER_F77_EXTRA_LIBS= + VT_WRAPPER_F77_CPP=$CPP + VT_WRAPPER_F77_EXTRA_CPPFLAGS= VT_WRAPPER_F77_DYNINST_COMPILER_FLAGS= VT_WRAPPER_F77_TAUINST_OPTS= VT_WRAPPER_F77_TAUINST_PARSE_BIN= @@ -37,6 +43,8 @@ AC_DEFUN([ACVT_COMPWRAP], VT_WRAPPER_FC_EXTRA_COMPILER_FLAGS= VT_WRAPPER_FC_EXTRA_LINKER_FLAGS= VT_WRAPPER_FC_EXTRA_LIBS= + VT_WRAPPER_FC_CPP=$CPP + VT_WRAPPER_FC_EXTRA_CPPFLAGS= VT_WRAPPER_FC_DYNINST_COMPILER_FLAGS= VT_WRAPPER_FC_TAUINST_OPTS= VT_WRAPPER_FC_TAUINST_PARSE_BIN= @@ -48,6 +56,9 @@ AC_DEFUN([ACVT_COMPWRAP], VT_WRAPPER_NVCC_EXTRA_COMPILER_FLAGS= VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS= VT_WRAPPER_NVCC_EXTRA_LIBS= + VT_WRAPPER_NVCC_CPP=$CPP + VT_WRAPPER_NVCC_EXTRA_CPPFLAGS= + VT_WRAPPER_NVCC_DYNINST_COMPILER_FLAGS= VT_WRAPPER_NVCC_TAUINST_OPTS= VT_WRAPPER_NVCC_TAUINST_PARSE_BIN= @@ -92,6 +103,17 @@ AC_DEFUN([ACVT_COMPWRAP], [extra libraries to link when using vtcc]), [VT_WRAPPER_CC_EXTRA_LIBS=$withval]) + AC_ARG_WITH(wrapper-cc-cpp, + AC_HELP_STRING([--with-wrapper-cc-cpp=WRAPPERCCCPP], + [C preprocessor command for vtcc, default: CPP]), + [VT_WRAPPER_CC_CPP=$withval]) + + AC_ARG_WITH(wrapper-cc-cppflags, + AC_HELP_STRING([--with-wrapper-cc-cppflags=WRAPPERCCCPPFLAGS], + [extra preprocessor flags to add when using vtcc -vt:preprocess]), + [VT_WRAPPER_CC_EXTRA_CPPFLAGS=$withval]) + + AC_ARG_WITH(wrapper-cc-default-partype, AC_HELP_STRING([--with-wrapper-cc-default-partype=TYPE], [default parallelization type for vtcc (seq,mt,mpi,hyb), default: $VT_WRAPPER_CC_DEFAULT_PARTYPE]), @@ -126,6 +148,16 @@ AC_DEFUN([ACVT_COMPWRAP], [extra libraries to link when using vtcxx]), [VT_WRAPPER_CXX_EXTRA_LIBS=$withval]) + AC_ARG_WITH(wrapper-cxx-cpp, + AC_HELP_STRING([--with-wrapper-cxx-cpp=WRAPPERCXXCPP], + [C++ preprocessor command for vtcxx, default: CXXCPP]), + [VT_WRAPPER_CXX_CPP=$withval]) + + AC_ARG_WITH(wrapper-cxx-cppflags, + AC_HELP_STRING([--with-wrapper-cxx-cppflags=WRAPPERCXXCPPFLAGS], + [extra preprocessor flags to add when using vtcxx -vt:preprocess]), + [VT_WRAPPER_CXX_EXTRA_CPPFLAGS=$withval]) + AC_ARG_WITH(wrapper-cxx-default-partype, AC_HELP_STRING([--with-wrapper-cxx-default-partype=TYPE], [default parallelization type for vtcxx (seq,mt,mpi,hyb), default: $VT_WRAPPER_CXX_DEFAULT_PARTYPE]), @@ -160,6 +192,16 @@ AC_DEFUN([ACVT_COMPWRAP], [extra libraries to link when using vtf77]), [VT_WRAPPER_F77_EXTRA_LIBS=$withval]) + AC_ARG_WITH(wrapper-f77-cpp, + AC_HELP_STRING([--with-wrapper-f77-cpp=WRAPPERF77CPP], + [C preprocessor command for vtf77, default: CPP]), + [VT_WRAPPER_F77_CPP=$withval]) + + AC_ARG_WITH(wrapper-f77-cppflags, + AC_HELP_STRING([--with-wrapper-f77-cppflags=WRAPPERF77CPPFLAGS], + [extra preprocessor flags to add when using vtf77 -vt:preprocess]), + [VT_WRAPPER_F77_EXTRA_CPPFLAGS=$withval]) + AC_ARG_WITH(wrapper-f77-default-partype, AC_HELP_STRING([--with-wrapper-f77-default-partype=TYPE], [default parallelization type for vtf77 (seq,mt,mpi,hyb), default: $VT_WRAPPER_F77_DEFAULT_PARTYPE]), @@ -194,6 +236,16 @@ AC_DEFUN([ACVT_COMPWRAP], [extra libraries to link when using vtf90]), [VT_WRAPPER_FC_EXTRA_LIBS=$withval]) + AC_ARG_WITH(wrapper-fc-cpp, + AC_HELP_STRING([--with-wrapper-fc-cpp=WRAPPERFCCPP], + [C preprocessor command for vtf90, default: CPP]), + [VT_WRAPPER_FC_CPP=$withval]) + + AC_ARG_WITH(wrapper-fc-cppflags, + AC_HELP_STRING([--with-wrapper-fc-cppflags=WRAPPERFCCPPFLAGS], + [extra preprocessor flags to add when using vtf90 -vt:preprocess]), + [VT_WRAPPER_fC_EXTRA_CPPFLAGS=$withval]) + AC_ARG_WITH(wrapper-fc-default-partype, AC_HELP_STRING([--with-wrapper-fc-default-partype=TYPE], [default parallelization type for vtf90 (seq,mt,mpi,hyb), default: $VT_WRAPPER_FC_DEFAULT_PARTYPE]), @@ -228,6 +280,16 @@ AC_DEFUN([ACVT_COMPWRAP], [extra libraries to link when using vtnvcc]), [VT_WRAPPER_NVCC_EXTRA_LIBS=$withval]) + AC_ARG_WITH(wrapper-nvcc-cpp, + AC_HELP_STRING([--with-wrapper-nvcc-cpp=WRAPPERNVCCCPP], + [C preprocessor command for vtnvcc, default: CPP]), + [VT_WRAPPER_NVCC_CPP=$withval]) + + AC_ARG_WITH(wrapper-nvcc-cppflags, + AC_HELP_STRING([--with-wrapper-nvcc-cppflags=WRAPPERNVCCCPPFLAGS], + [extra preprocessor flags to add when using vtnvcc -vt:preprocess]), + [VT_WRAPPER_NVCC_EXTRA_CPPFLAGS=$withval]) + AC_ARG_WITH(wrapper-nvcc-default-partype, AC_HELP_STRING([--with-wrapper-nvcc-default-partype=TYPE], [default parallelization type for vtnvcc (seq,mt,mpi,hyb), default: $VT_WRAPPER_NVCC_DEFAULT_PARTYPE]), @@ -297,34 +359,40 @@ AC_DEFUN([ACVT_COMPWRAP], VT_WRAPPER_AVAIL_INST="$VT_WRAPPER_AVAIL_INST dyninst" ]) - AS_IF([test x"$have_tauinst" = "xyes"], - [ - pdt_mpiincdir= - pdt_fmpiincdir= + mpiincdir= + fmpiincdir= - AS_IF([test x"$have_mpi" = "xyes"], + AS_IF([test x"$have_mpi" = "xyes"], + [ + AS_IF([test x"$inside_openmpi" = "xyes"], + [mpiincdir="-I\${includedir}/.."], + [mpiincdir="$MPIINCDIR"]) + AS_IF([test x"$have_fmpi" = "xyes"], [ AS_IF([test x"$inside_openmpi" = "xyes"], - [pdt_mpiincdir="-I\${includedir}/.."], - [pdt_mpiincdir="$MPIINCDIR"]) - AS_IF([test x"$have_fmpi" = "xyes"], - [ - AS_IF([test x"$inside_openmpi" = "xyes"], - [pdt_fmpiincdir="$pdt_mpiincdir"], - [pdt_fmpiincdir="$FMPIINCDIR"]) - ]) + [fmpiincdir="$mpiincdir"], + [fmpiincdir="$FMPIINCDIR"]) ]) + ]) + VT_WRAPPER_CC_EXTRA_CPPFLAGS="$VT_WRAPPER_EXTRA_CPPFLAGS $mpiincdir" + VT_WRAPPER_CXX_EXTRA_CPPFLAGS="$VT_WRAPPER_EXTRA_CPPFLAGS $mpiincdir" + VT_WRAPPER_F77_EXTRA_CPPFLAGS="$VT_WRAPPER_EXTRA_CPPFLAGS $fmpiincdir" + VT_WRAPPER_FC_EXTRA_CPPFLAGS="$VT_WRAPPER_EXTRA_CPPFLAGS $fmpiincdir" + VT_WRAPPER_NVCC_EXTRA_CPPFLAGS="$VT_WRAPPER_EXTRA_CPPFLAGS $mpiincdir" + + AS_IF([test x"$have_tauinst" = "xyes"], + [ VT_WRAPPER_TAUINST_BIN="$tauinst_cmd" VT_WRAPPER_CC_TAUINST_OPTS="-c -spec \${datadir}/TAUINST.SPEC" VT_WRAPPER_CC_TAUINST_PARSE_BIN="$tauinst_cparse_cmd" - VT_WRAPPER_CC_TAUINST_PARSE_OPTS="$pdt_mpiincdir" + VT_WRAPPER_CC_TAUINST_PARSE_OPTS="$mpiincdir" VT_WRAPPER_CXX_TAUINST_OPTS="-c++ -spec \${datadir}/TAUINST.SPEC" VT_WRAPPER_CXX_TAUINST_PARSE_BIN="$tauinst_cxxparse_cmd" VT_WRAPPER_CXX_TAUINST_PARSE_OPTS="$VT_WRAPPER_CC_TAUINST_PARSE_OPTS" VT_WRAPPER_F77_TAUINST_OPTS="-fortran -spec \${datadir}/TAUINST.SPEC" VT_WRAPPER_F77_TAUINST_PARSE_BIN="$tauinst_fparse_cmd" - VT_WRAPPER_F77_TAUINST_PARSE_OPTS="$pdt_fmpiincdir" + VT_WRAPPER_F77_TAUINST_PARSE_OPTS="$fmpiincdir" VT_WRAPPER_FC_TAUINST_OPTS="$VT_WRAPPER_F77_TAUINST_OPTS" VT_WRAPPER_FC_TAUINST_PARSE_BIN="$VT_WRAPPER_F77_TAUINST_PARSE_BIN" VT_WRAPPER_FC_TAUINST_PARSE_OPTS="$VT_WRAPPER_F77_TAUINST_PARSE_OPTS" @@ -338,6 +406,8 @@ AC_DEFUN([ACVT_COMPWRAP], AC_SUBST(VT_WRAPPER_CC_EXTRA_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_CC_EXTRA_LINKER_FLAGS) AC_SUBST(VT_WRAPPER_CC_EXTRA_LIBS) + AC_SUBST(VT_WRAPPER_CC_CPP) + AC_SUBST(VT_WRAPPER_CC_EXTRA_CPPFLAGS) AC_SUBST(VT_WRAPPER_CC_DYNINST_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_CC_TAUINST_OPTS) AC_SUBST(VT_WRAPPER_CC_TAUINST_PARSE_BIN) @@ -349,6 +419,8 @@ AC_DEFUN([ACVT_COMPWRAP], AC_SUBST(VT_WRAPPER_CXX_EXTRA_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS) AC_SUBST(VT_WRAPPER_CXX_EXTRA_LIBS) + AC_SUBST(VT_WRAPPER_CXX_CPP) + AC_SUBST(VT_WRAPPER_CXX_EXTRA_CPPFLAGS) AC_SUBST(VT_WRAPPER_CXX_DYNINST_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_CXX_TAUINST_OPTS) AC_SUBST(VT_WRAPPER_CXX_TAUINST_PARSE_BIN) @@ -360,6 +432,8 @@ AC_DEFUN([ACVT_COMPWRAP], AC_SUBST(VT_WRAPPER_F77_EXTRA_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_F77_EXTRA_LINKER_FLAGS) AC_SUBST(VT_WRAPPER_F77_EXTRA_LIBS) + AC_SUBST(VT_WRAPPER_F77_CPP) + AC_SUBST(VT_WRAPPER_F77_EXTRA_CPPFLAGS) AC_SUBST(VT_WRAPPER_F77_DYNINST_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_F77_TAUINST_OPTS) AC_SUBST(VT_WRAPPER_F77_TAUINST_PARSE_BIN) @@ -371,6 +445,8 @@ AC_DEFUN([ACVT_COMPWRAP], AC_SUBST(VT_WRAPPER_FC_EXTRA_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_FC_EXTRA_LINKER_FLAGS) AC_SUBST(VT_WRAPPER_FC_EXTRA_LIBS) + AC_SUBST(VT_WRAPPER_FC_CPP) + AC_SUBST(VT_WRAPPER_FC_EXTRA_CPPFLAGS) AC_SUBST(VT_WRAPPER_FC_DYNINST_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_FC_TAUINST_OPTS) AC_SUBST(VT_WRAPPER_FC_TAUINST_PARSE_BIN) @@ -382,6 +458,8 @@ AC_DEFUN([ACVT_COMPWRAP], AC_SUBST(VT_WRAPPER_NVCC_EXTRA_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS) AC_SUBST(VT_WRAPPER_NVCC_EXTRA_LIBS) + AC_SUBST(VT_WRAPPER_NVCC_CPP) + AC_SUBST(VT_WRAPPER_NVCC_EXTRA_CPPFLAGS) AC_SUBST(VT_WRAPPER_NVCC_DYNINST_COMPILER_FLAGS) AC_SUBST(VT_WRAPPER_NVCC_TAUINST_OPTS) AC_SUBST(VT_WRAPPER_NVCC_TAUINST_PARSE_BIN) diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.cuda.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.cuda.m4 new file mode 100644 index 0000000000..e5c8504ead --- /dev/null +++ b/ompi/contrib/vt/vt/config/m4/acinclude.cuda.m4 @@ -0,0 +1,159 @@ +AC_DEFUN([ACVT_CUDA], +[ + cuda_error="no" + cudart_error="no" + have_cuda="no" + have_cudart="no" + + CUDATKDIR= + CUDATKINCDIR= + CUDATKLIBDIR= + CUDALIB= + CUDARTLIB= + + AC_ARG_VAR(NVCC, [NVIDIA CUDA compiler command]) + + AC_ARG_WITH(cuda-dir, + AC_HELP_STRING([--with-cuda-dir=CUDATKDIR], + [give the path for CUDA Toolkit, default: /usr/local/cuda]), + [CUDATKDIR="$withval/"], [CUDATKDIR="/usr/local/cuda/"]) + + AC_ARG_WITH(cuda-inc-dir, + AC_HELP_STRING([--with-cuda-inc-dir=CUDATKINCDIR], + [give the path for CUDA-Toolkit-include files, default: CUDATKDIR/include]), + [CUDATKINCDIR="-I$withval/"], + [AS_IF([test x"$CUDATKDIR" != x], [CUDATKINCDIR="-I$CUDATKDIR"include/])]) + + AC_ARG_WITH(cuda-lib-dir, + AC_HELP_STRING([--with-cuda-lib-dir=CUDATKLIBDIR], + [give the path for CUDA-Toolkit-libraries, default: CUDATKDIR/lib64]), + [CUDATKLIBDIR="-L$withval/"], + [AS_IF([test x"$CUDATKDIR" != x], [CUDATKLIBDIR="-L$CUDATKDIR"lib64/])]) + + AC_ARG_WITH(cuda-lib, + AC_HELP_STRING([--with-cuda-lib=CUDALIB], [use given CUDA driver library, default: -lcuda]), + [CUDALIB="$withval"]) + + AC_ARG_WITH(cudart-lib, + AC_HELP_STRING([--with-cudart-lib=CUDARTLIB], [use given CUDA runtime library, default: -lcudart]), + [CUDARTLIB="$withval"]) + + AS_IF([test x"$cuda_error" = "xno"], + [ + sav_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" + AC_CHECK_HEADER([cuda.h], [], + [ + AC_MSG_NOTICE([error: no cuda.h found; check path for CUDA Toolkit first...]) + cuda_error="yes" + ]) + CPPFLAGS=$sav_CPPFLAGS + ]) + + AS_IF([test x"$CUDALIB" = x -a x"$cuda_error" = "xno"], + [ + sav_LIBS=$LIBS + LIBS="$LIBS $CUDATKLIBDIR -lcuda" + AC_MSG_CHECKING([whether linking with -lcuda works]) + AC_TRY_LINK([],[], + [AC_MSG_RESULT([yes]); CUDALIB=-lcuda],[AC_MSG_RESULT([no])]) + LIBS=$sav_LIBS + ]) + + AS_IF([test x"$CUDALIB" = x -a x"$cuda_error" = "xno"], + [ + AC_MSG_NOTICE([error: no libcuda found; check path for CUDA Toolkit first...]) + cuda_error="yes" + ]) + + AS_IF([test x"$cuda_error" = "xno"], + [ + AC_MSG_CHECKING([whether CUDA driver version >= 3.0]) + + sav_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" + AC_TRY_COMPILE([#include "cuda.h"], + [ +#ifndef CUDA_VERSION +# error "CUDA_VERSION not defined" +#elif CUDA_VERSION < 3000 +# error "CUDA_VERSION < 3000" +#endif + ], + [AC_MSG_RESULT([yes])], + [ + AC_MSG_RESULT([no]) + AC_MSG_NOTICE([error: CUDA driver version could not be determined and/or is incompatible (< 3.0) + See \`config.log' for more details.]) + cuda_error="yes" + ]) + CPPFLAGS=$sav_CPPFLAGS + ]) + + AS_IF([test x"$cudart_error" = "xno"], + [ + sav_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" + AC_CHECK_HEADER([cuda_runtime_api.h], [], + [ + AC_MSG_NOTICE([error: no cuda_runtime_api.h found; check path for CUDA Toolkit first...]) + cudart_error="yes" + ]) + CPPFLAGS=$sav_CPPFLAGS + ]) + + AS_IF([test x"$CUDARTLIB" = x -a x"$cudart_error" = "xno"], + [ + sav_LIBS=$LIBS + LIBS="$LIBS $CUDATKLIBDIR -lcudart" + AC_MSG_CHECKING([whether linking with -lcudart works]) + AC_TRY_LINK([],[], + [AC_MSG_RESULT([yes]); CUDARTLIB=-lcudart],[AC_MSG_RESULT([no])]) + LIBS=$sav_LIBS + ]) + + AS_IF([test x"$CUDARTLIB" = x -a x"$cudart_error" = "xno"], + [ + AC_MSG_NOTICE([error: no libcudart found; check path for CUDA Toolkit first...]) + cudart_error="yes" + ]) + + AS_IF([test x"$cudart_error" = "xno"], + [ + AC_MSG_CHECKING([whether CUDA runtime version >= 3.0]) + + sav_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" + AC_TRY_COMPILE([#include "cuda_runtime_api.h"], + [ +#ifndef CUDART_VERSION +# error "CUDART_VERSION not defined" +#elif CUDART_VERSION < 3000 +# error "CUDART_VERSION < 3000" +#endif + ], + [AC_MSG_RESULT([yes])], + [ + AC_MSG_RESULT([no]) + AC_MSG_NOTICE([error: CUDA runtime version could not be determined and/or is incompatible (< 3.0) +See \`config.log' for more details.]) + cudart_error="yes" + ]) + CPPFLAGS=$sav_CPPFLAGS + ]) + + AS_IF([test x"$cudart_error" = "xno"], + [ + AC_CHECK_PROG(NVCC, nvcc, nvcc, , [$PATH$PATH_SEPARATOR$CUDATKDIR"bin/"]) + have_cudart="yes" + ]) + + AS_IF([test x"$cuda_error" = "xno"], + [ + have_cuda="yes" + ]) + + AC_SUBST(CUDATKINCDIR) + AC_SUBST(CUDATKLIBDIR) + AC_SUBST(CUDATKLIB) +]) diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.cudawrap.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.cudawrap.m4 index 24f0004816..ce023c4dba 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.cudawrap.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.cudawrap.m4 @@ -1,209 +1,25 @@ AC_DEFUN([ACVT_CUDAWRAP], [ - cudawrap_error="no" - cudartwrap_error="no" have_cudawrap="no" have_cudartwrap="no" - CUDATKDIR= - CUDATKINCDIR= - CUDATKLIBDIR= - CUDALIB= - CUDARTLIB= - cudalib_pathname= cudartlib_pathname= - AC_ARG_VAR(NVCC, [NVIDIA CUDA compiler command]) + AC_REQUIRE([ACVT_CUDA]) - AC_ARG_WITH(cuda-dir, - AC_HELP_STRING([--with-cuda-dir=CUDATKDIR], - [give the path for CUDA Toolkit, default: /usr/local/cuda]), - [CUDATKDIR="$withval/"], [CUDATKDIR="/usr/local/cuda/"]) - - AC_ARG_WITH(cuda-inc-dir, - AC_HELP_STRING([--with-cuda-inc-dir=CUDATKINCDIR], - [give the path for CUDA-Toolkit-include files, default: CUDATKDIR/include]), - [CUDATKINCDIR="-I$withval/"], - [AS_IF([test x"$CUDATKDIR" != x], [CUDATKINCDIR="-I$CUDATKDIR"include/])]) - - AC_ARG_WITH(cuda-lib-dir, - AC_HELP_STRING([--with-cuda-lib-dir=CUDATKLIBDIR], - [give the path for CUDA-Toolkit-libraries, default: CUDATKDIR/lib64]), - [CUDATKLIBDIR="-L$withval/"], - [AS_IF([test x"$CUDATKDIR" != x], [CUDATKLIBDIR="-L$CUDATKDIR"lib64/])]) - - AC_ARG_WITH(cuda-lib, - AC_HELP_STRING([--with-cuda-lib=CUDALIB], [use given CUDA driver library, default: -lcuda]), - [CUDALIB="$withval"]) - - AC_ARG_WITH(cudart-lib, - AC_HELP_STRING([--with-cudart-lib=CUDARTLIB], [use given CUDA runtime library, default: -lcudart]), - [CUDARTLIB="$withval"]) - - AC_ARG_WITH(cuda-shlib, - AC_HELP_STRING([--with-cuda-shlib=CUDASHLIB], [give the pathname for the shared CUDA driver library, default: automatically by configure]), + AS_IF([test x"$cudart_error" = "xno"], [ - AS_IF([test x"$withval" = "xyes" -o x"$withval" = "xno"], - [AC_MSG_ERROR([value of '--with-cuda-shlib' not properly set])]) - cudalib_pathname=$withval - ]) - - AC_ARG_WITH(cudart-shlib, - AC_HELP_STRING([--with-cudart-shlib=CUDARTSHLIB], [give the pathname for the shared CUDA runtime library, default: automatically by configure]), - [ - AS_IF([test x"$withval" = "xyes" -o x"$withval" = "xno"], - [AC_MSG_ERROR([value of '--with-cudart-shlib' not properly set])]) - cudartlib_pathname=$withval - ]) - - AS_IF([test x"$cudawrap_error" = "xno"], - [ - sav_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" - AC_CHECK_HEADER([cuda.h], [], + AC_ARG_WITH(cudart-shlib, + AC_HELP_STRING([--with-cudart-shlib=CUDARTSHLIB], [give the pathname for the shared CUDA runtime library, default: automatically by configure]), [ - AC_MSG_NOTICE([error: no cuda.h found; check path for CUDA Toolkit first...]) - cudawrap_error="yes" - ]) - CPPFLAGS=$sav_CPPFLAGS - ]) - - AS_IF([test x"$CUDALIB" = x -a x"$cudawrap_error" = "xno"], - [ - sav_LIBS=$LIBS - LIBS="$LIBS $CUDATKLIBDIR -lcuda" - AC_MSG_CHECKING([whether linking with -lcuda works]) - AC_TRY_LINK([],[], - [AC_MSG_RESULT([yes]); CUDALIB=-lcuda],[AC_MSG_RESULT([no])]) - LIBS=$sav_LIBS - ]) - - AS_IF([test x"$CUDALIB" = x -a x"$cudawrap_error" = "xno"], - [ - AC_MSG_NOTICE([error: no libcuda found; check path for CUDA Toolkit first...]) - cudawrap_error="yes" - ]) - - AS_IF([test x"$cudawrap_error" = "xno"], - [ - AC_MSG_CHECKING([whether CUDA driver version >= 3.0]) - - sav_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" - AC_TRY_COMPILE([#include "cuda.h"], - [ -#ifndef CUDA_VERSION -# error "CUDA_VERSION not defined" -#elif CUDA_VERSION < 3000 -# error "CUDA_VERSION < 3000" -#endif - ], - [AC_MSG_RESULT([yes])], - [ - AC_MSG_RESULT([no]) - AC_MSG_NOTICE([error: CUDA driver version could not be determined and/or is incompatible (< 3.0) - See \`config.log' for more details.]) - cudawrap_error="yes" - ]) - CPPFLAGS=$sav_CPPFLAGS - ]) - - AS_IF([test x"$cudawrap_error" = "xno"], - [ - AC_MSG_CHECKING([for pathname of CUDA driver library]) - - AS_IF([test x"$cudalib_pathname" != x], - [ - AC_MSG_RESULT([skipped (--with-cuda-shlib=$cudalib_pathname)]) - ], - [ - AS_IF([test x"$have_rtld_next" = "xyes"], - [ - AC_MSG_RESULT([not needed]) - ], - [ - AS_IF([test x"$CUDATKLIBDIR" != x], - [cudalib_dir=`echo $CUDATKLIBDIR | sed s/\-L//`]) - cudalib_pathname=$cudalib_dir`echo $CUDALIB | sed s/\-l/lib/`".so" - - AS_IF([! test -f $cudalib_pathname], - [ - AC_MSG_RESULT([unknown]) - AC_MSG_NOTICE([error: could not determine pathname of CUDA driver library]) - cudawrap_error="yes" - ], - [ - AC_MSG_RESULT([$cudalib_pathname]) - ]) - ]) + AS_IF([test x"$withval" = "xyes" -o x"$withval" = "xno"], + [AC_MSG_ERROR([value of '--with-cudart-shlib' not properly set])]) + cudartlib_pathname=$withval ]) ]) - AS_IF([test x"$cudawrap_error" = "xno"], - [ - AS_IF([test x"$cudalib_pathname" != x], - [ - AC_DEFINE_UNQUOTED([DEFAULT_CUDALIB_PATHNAME], - ["$cudalib_pathname"], [pathname of CUDA driver library]) - ]) - - have_cudawrap="yes" - ]) - - AS_IF([test x"$cudartwrap_error" = "xno"], - [ - sav_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" - AC_CHECK_HEADER([cuda_runtime_api.h], [], - [ - AC_MSG_NOTICE([error: no cuda_runtime_api.h found; check path for CUDA Toolkit first...]) - cudartwrap_error="yes" - ]) - CPPFLAGS=$sav_CPPFLAGS - ]) - - AS_IF([test x"$CUDARTLIB" = x -a x"$cudartwrap_error" = "xno"], - [ - sav_LIBS=$LIBS - LIBS="$LIBS $CUDATKLIBDIR -lcudart" - AC_MSG_CHECKING([whether linking with -lcudart works]) - AC_TRY_LINK([],[], - [AC_MSG_RESULT([yes]); CUDARTLIB=-lcudart],[AC_MSG_RESULT([no])]) - LIBS=$sav_LIBS - ]) - - AS_IF([test x"$CUDARTLIB" = x -a x"$cudartwrap_error" = "xno"], - [ - AC_MSG_NOTICE([error: no libcudart found; check path for CUDA Toolkit first...]) - cudartwrap_error="yes" - ]) - - AS_IF([test x"$cudartwrap_error" = "xno"], - [ - AC_MSG_CHECKING([whether CUDA runtime version >= 3.0]) - - sav_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" - AC_TRY_COMPILE([#include "cuda_runtime_api.h"], - [ -#ifndef CUDART_VERSION -# error "CUDART_VERSION not defined" -#elif CUDART_VERSION < 3000 -# error "CUDART_VERSION < 3000" -#endif - ], - [AC_MSG_RESULT([yes])], - [ - AC_MSG_RESULT([no]) - AC_MSG_NOTICE([error: CUDA runtime version could not be determined and/or is incompatible (< 3.0) -See \`config.log' for more details.]) - cudartwrap_error="yes" - ]) - CPPFLAGS=$sav_CPPFLAGS - ]) - - AS_IF([test x"$cudartwrap_error" = "xno"], + AS_IF([test x"$cudart_error" = "xno"], [ AC_MSG_CHECKING([for pathname of CUDA runtime library]) @@ -225,7 +41,7 @@ See \`config.log' for more details.]) [ AC_MSG_RESULT([unknown]) AC_MSG_NOTICE([error: could not determine pathname of CUDA runtime library]) - cudartwrap_error="yes" + cudart_error="yes" ], [ AC_MSG_RESULT([$cudartlib_pathname]) @@ -234,25 +50,67 @@ See \`config.log' for more details.]) ]) ]) - AS_IF([test x"$cudartwrap_error" = "xno"], + AS_IF([test x"$cudart_error" = "xno"], [ - AC_CHECK_PROG(NVCC, nvcc, nvcc, , [$PATH$PATH_SEPARATOR$CUDATKDIR"bin/"]) - AS_IF([test x"$cudartlib_pathname" != x], [ AC_DEFINE_UNQUOTED([DEFAULT_CUDARTLIB_PATHNAME], ["$cudartlib_pathname"], [pathname of CUDA runtime library]) ]) - have_cudartwrap="yes" ]) - AS_IF([test x"$cudawrap_error" = "xno" -a x"$cudartwrap_error" = "xno"], + + AS_IF([test x"$cuda_error" = "xno"], [ - ACVT_CUPTI + AC_ARG_WITH(cuda-shlib, + AC_HELP_STRING([--with-cuda-shlib=CUDASHLIB], [give the pathname for the shared CUDA driver library, default: automatically by configure]), + [ + AS_IF([test x"$withval" = "xyes" -o x"$withval" = "xno"], + [AC_MSG_ERROR([value of '--with-cuda-shlib' not properly set])]) + cudalib_pathname=$withval + ]) + ]) + + AS_IF([test x"$cuda_error" = "xno"], + [ + AC_MSG_CHECKING([for pathname of CUDA driver library]) + + AS_IF([test x"$cudalib_pathname" != x], + [ + AC_MSG_RESULT([skipped (--with-cuda-shlib=$cudalib_pathname)]) + ], + [ + AS_IF([test x"$have_rtld_next" = "xyes"], + [ + AC_MSG_RESULT([not needed]) + ], + [ + AS_IF([test x"$CUDATKLIBDIR" != x], + [cudalib_dir=`echo $CUDATKLIBDIR | sed s/\-L//`]) + cudalib_pathname=$cudalib_dir`echo $CUDALIB | sed s/\-l/lib/`".so" + + AS_IF([! test -f $cudalib_pathname], + [ + AC_MSG_RESULT([unknown]) + AC_MSG_NOTICE([error: could not determine pathname of CUDA driver library]) + cuda_error="yes" + ], + [ + AC_MSG_RESULT([$cudalib_pathname]) + ]) + ]) + ]) + ]) + + AS_IF([test x"$cuda_error" = "xno"], + [ + AS_IF([test x"$cudalib_pathname" != x], + [ + AC_DEFINE_UNQUOTED([DEFAULT_CUDALIB_PATHNAME], + ["$cudalib_pathname"], [pathname of CUDA driver library]) + ]) + have_cudawrap="yes" ]) - AC_SUBST(CUDATKINCDIR) - AC_SUBST(CUDATKLIBDIR) - AC_SUBST(CUDATKLIB) ]) diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.cupti.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.cupti.m4 index 1aa336f721..f9819c9953 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.cupti.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.cupti.m4 @@ -8,6 +8,8 @@ AC_DEFUN([ACVT_CUPTI], CUPTILIBDIR= CUPTILIB= + AC_REQUIRE([ACVT_CUDA]) + AC_ARG_WITH(cupti-dir, AC_HELP_STRING([--with-cupti-dir=CUPTIDIR], [give the path for CUPTI, default: /usr]), @@ -31,9 +33,9 @@ AC_DEFUN([ACVT_CUPTI], sav_CPPFLAGS=$CPPFLAGS CPPFLAGS="$CPPFLAGS $CUPTIINCDIR $CUDATKINCDIR" - AC_CHECK_HEADER([cupti_events.h], [], + AC_CHECK_HEADER([cupti.h], [], [ - AC_MSG_NOTICE([error: no cupti_events.h found; check path for CUPTI package first...]) + AC_MSG_NOTICE([error: no cupti.h found; check path for CUPTI package first...]) cupti_error="yes" ]) CPPFLAGS=$sav_CPPFLAGS @@ -54,11 +56,13 @@ AC_DEFUN([ACVT_CUPTI], cupti_error="yes" ]) - AC_MSG_CHECKING([whether CUDA runtime version >= 4.0]) + AS_IF([test x"$cupti_error" = "xno"], + [ + AC_MSG_CHECKING([whether CUDA runtime version >= 4.0]) - sav_CPPFLAGS=$CPPFLAGS - CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" - AC_TRY_COMPILE([#include "cuda_runtime_api.h"], + sav_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS $CUDATKINCDIR" + AC_TRY_COMPILE([#include "cuda_runtime_api.h"], [ #ifndef CUDART_VERSION # error "CUDART_VERSION not defined" @@ -66,14 +70,15 @@ AC_DEFUN([ACVT_CUPTI], # error "CUDART_VERSION < 4000" #endif ], - [AC_MSG_RESULT([yes])], - [ - AC_MSG_RESULT([no]) - AC_MSG_NOTICE([error: CUDA runtime version could not be determined and/or is incompatible (< 4.0) + [AC_MSG_RESULT([yes])], + [ + AC_MSG_RESULT([no]) + AC_MSG_NOTICE([error: CUDA runtime version could not be determined and/or is incompatible (< 4.0) See \`config.log' for more details.]) - cupti_error="yes" + cupti_error="yes" + ]) + CPPFLAGS=$sav_CPPFLAGS ]) - CPPFLAGS=$sav_CPPFLAGS AS_IF([test x"$cupti_error" = "xno"], [have_cupti="yes"]) diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.libwrap.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.libwrap.m4 index e86bb291c9..b1d78ed032 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.libwrap.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.libwrap.m4 @@ -96,7 +96,7 @@ AC_DEFUN([ACVT_LIBWRAP], cuda) ACVT_CONF_SUBTITLE([CUDA]) ACVT_CUDAWRAP - AS_IF([test x"$have_cudawrap" = "xyes"], [have_libwrap="yes"], + AS_IF([test x"$have_cudartwrap" = "xyes"], [have_libwrap="yes"], [ AS_IF([test x"$force_cudawrap" = "xyes"], [libwrap_error="yes"; break]) diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4 index 8b570db396..e7ebda9045 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.mpi.m4 @@ -48,7 +48,8 @@ AC_DEFUN([ACVT_MPI], MPIINCDIR="-I$top_vt_srcdir/../../../include -I$top_vt_builddir/../../../include" FMPIINCDIR="$MPIINCDIR" - MPILIBDIR="-L$top_vt_builddir/../../../.libs" + # MPILIBDIR is used in the compiler wrapper configuration files; set LDFLAGS instead + LDFLAGS="$LDFLAGS -L$top_vt_builddir/../../../.libs" enable_mpi="yes" with_openmpi="yes" @@ -431,21 +432,24 @@ AC_DEFUN([ACVT_MPI], AC_CHECK_PROGS(MPICC, mpicc hcc mpcc_r mpcc mpxlc_r mpxlc mpixlc_r mpixlc cmpicc mpiicc) AS_IF([test x"$MPICC" != x], [ - mpicc=`echo $MPICC | cut -d ' ' -f 1` - which_mpicc=`which $mpicc 2>/dev/null` - AS_IF([test x"$which_mpicc" = x], [AC_MSG_ERROR([$mpicc not found])]) - - mpi_bin_dir=`dirname $which_mpicc` - AS_IF([test "$mpi_bin_dir" != "/usr/bin" -a "$mpi_bin_dir" != "/SX/usr/bin"], + AS_IF([test x"$inside_openmpi" = "xno"], [ - AS_IF([test x"$MPIDIR" = x], - [MPIDIR=`echo $mpi_bin_dir | sed -e 's/bin//'`]) - AS_IF([test x"$MPIINCDIR" = x], - [MPIINCDIR=-I`echo $mpi_bin_dir | sed -e 's/bin/include/'`]) - AS_IF([test x"$FMPIINCDIR" = x], - [FMPIINCDIR=$MPIINCDIR]) - AS_IF([test x"$MPILIBDIR" = x], - [MPILIBDIR=-L`echo $mpi_bin_dir | sed -e 's/bin/lib/'`]) + mpicc=`echo $MPICC | cut -d ' ' -f 1` + which_mpicc=`which $mpicc 2>/dev/null` + AS_IF([test x"$which_mpicc" = x], [AC_MSG_ERROR([$mpicc not found])]) + + mpi_bin_dir=`dirname $which_mpicc` + AS_IF([test "$mpi_bin_dir" != "/usr/bin" -a "$mpi_bin_dir" != "/SX/usr/bin"], + [ + AS_IF([test x"$MPIDIR" = x], + [MPIDIR=`echo $mpi_bin_dir | sed -e 's/bin//'`]) + AS_IF([test x"$MPIINCDIR" = x], + [MPIINCDIR=-I`echo $mpi_bin_dir | sed -e 's/bin/include/'`]) + AS_IF([test x"$FMPIINCDIR" = x], + [FMPIINCDIR=$MPIINCDIR]) + AS_IF([test x"$MPILIBDIR" = x], + [MPILIBDIR=-L`echo $mpi_bin_dir | sed -e 's/bin/lib/'`]) + ]) ]) ], [ @@ -612,75 +616,78 @@ dnl check for PMPILIB dnl check for FMPILIB - AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], + AS_IF([test x"$F77" != x], [ - sav_LIBS=$LIBS - LIBS="$LIBS $MPILIBDIR -lmpi_f77 $MPILIB" - AC_MSG_CHECKING([whether linking with -lmpi_f77 works]) - AC_TRY_LINK([],[], - [AC_MSG_RESULT([yes]); FMPILIB=-lmpi_f77],[AC_MSG_RESULT([no])]) - LIBS=$sav_LIBS - ]) - - AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], - [ - sav_LIBS=$LIBS - LIBS="$LIBS $MPILIBDIR -lmpibinding_f77" - AC_MSG_CHECKING([whether linking with -lmpibinding_f77 works]) - AC_TRY_LINK([],[], - [AC_MSG_RESULT([yes]); FMPILIB=-lmpibinding_f77],[AC_MSG_RESULT([no])]) - LIBS=$sav_LIBS - ]) - - AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], - [ - sav_LIBS=$LIBS - LIBS="$LIBS $MPILIBDIR -lfmpich" - AC_MSG_CHECKING([whether linking with -lfmpich works]) - AC_TRY_LINK([],[], - [AC_MSG_RESULT([yes]); FMPILIB=-lfmpich],[AC_MSG_RESULT([no])]) - LIBS=$sav_LIBS - ]) - - AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], - [ - sav_LIBS=$LIBS - LIBS="$LIBS $MPILIBDIR -llamf77mpi" - AC_MSG_CHECKING([whether linking with -llamf77mpi works]) - AC_TRY_LINK([],[], - [AC_MSG_RESULT([yes]); FMPILIB=-llamf77mpi],[AC_MSG_RESULT([no])]) - LIBS=$sav_LIBS - ]) - - AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], - [ - sav_LIBS=$LIBS - LIBS="$LIBS $MPILIBDIR -lfmpi" - AC_MSG_CHECKING([whether linking with -lfmpi works]) - AC_TRY_LINK([],[], - [AC_MSG_RESULT([yes]); FMPILIB=-lfmpi],[AC_MSG_RESULT([no])]) - LIBS=$sav_LIBS - ]) - - AS_IF([test x"$mpi_error" = "xno"], - [ - AS_IF([test x"$FMPILIB" = x], + AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], [ - AS_IF([test x"$check_fmpiwraplib" = "xyes"], - [ - AC_MSG_WARN([no libmpi_f77, libmpibinding_f77, libfmpich, liblamf77mpi, or libfmpi found; build libvt-fmpi]) - FMPILIB="-lvt-fmpi" - ]) - ], + sav_LIBS=$LIBS + LIBS="$LIBS $MPILIBDIR -lmpi_f77 $MPILIB" + AC_MSG_CHECKING([whether linking with -lmpi_f77 works]) + AC_TRY_LINK([],[], + [AC_MSG_RESULT([yes]); FMPILIB=-lmpi_f77],[AC_MSG_RESULT([no])]) + LIBS=$sav_LIBS + ]) + + AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], [ - AS_IF([test x"$FMPILIB" = "x-lvt-fmpi"], + sav_LIBS=$LIBS + LIBS="$LIBS $MPILIBDIR -lmpibinding_f77" + AC_MSG_CHECKING([whether linking with -lmpibinding_f77 works]) + AC_TRY_LINK([],[], + [AC_MSG_RESULT([yes]); FMPILIB=-lmpibinding_f77],[AC_MSG_RESULT([no])]) + LIBS=$sav_LIBS + ]) + + AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], + [ + sav_LIBS=$LIBS + LIBS="$LIBS $MPILIBDIR -lfmpich" + AC_MSG_CHECKING([whether linking with -lfmpich works]) + AC_TRY_LINK([],[], + [AC_MSG_RESULT([yes]); FMPILIB=-lfmpich],[AC_MSG_RESULT([no])]) + LIBS=$sav_LIBS + ]) + + AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], + [ + sav_LIBS=$LIBS + LIBS="$LIBS $MPILIBDIR -llamf77mpi" + AC_MSG_CHECKING([whether linking with -llamf77mpi works]) + AC_TRY_LINK([],[], + [AC_MSG_RESULT([yes]); FMPILIB=-llamf77mpi],[AC_MSG_RESULT([no])]) + LIBS=$sav_LIBS + ]) + + AS_IF([test x"$FMPILIB" = x -a x"$mpi_error" = "xno"], + [ + sav_LIBS=$LIBS + LIBS="$LIBS $MPILIBDIR -lfmpi" + AC_MSG_CHECKING([whether linking with -lfmpi works]) + AC_TRY_LINK([],[], + [AC_MSG_RESULT([yes]); FMPILIB=-lfmpi],[AC_MSG_RESULT([no])]) + LIBS=$sav_LIBS + ]) + + AS_IF([test x"$mpi_error" = "xno"], + [ + AS_IF([test x"$FMPILIB" = x], [ - AS_IF([test x"$check_fmpiwraplib" = "xno"], - [FMPILIB=]) + AS_IF([test x"$check_fmpiwraplib" = "xyes"], + [ + AC_MSG_WARN([no libmpi_f77, libmpibinding_f77, libfmpich, liblamf77mpi, or libfmpi found; build libvt-fmpi]) + FMPILIB="-lvt-fmpi" + ]) ], [ - AS_IF([test x"$force_fmpiwraplib" = "xyes"], - [FMPILIB="-lvt-fmpi"], [check_fmpiwraplib="no"]) + AS_IF([test x"$FMPILIB" = "x-lvt-fmpi"], + [ + AS_IF([test x"$check_fmpiwraplib" = "xno"], + [FMPILIB=]) + ], + [ + AS_IF([test x"$force_fmpiwraplib" = "xyes"], + [FMPILIB="-lvt-fmpi"], [check_fmpiwraplib="no"]) + ]) ]) ]) ]) @@ -877,16 +884,19 @@ AC_DEFUN([ACVT_FMPIWRAPLIB], AC_CHECK_PROGS(MPIF77, mpif77 hf77 mpxlf_r mpxlf mpf77 cmpifc mpif90 mpxlf95_r mpxlf90_r mpxlf95 mpxlf90 mpf90 cmpif90c) AS_IF([test x"$MPIF77" != x], [ - mpif77=`echo $MPIF77 | cut -d ' ' -f 1` - which_mpif77=`which $mpif77 2>/dev/null` - AS_IF([test x"$which_mpif77" = x], [AC_MSG_ERROR([$mpif77 not found])]) - - mpi_bin_dir=`dirname $which_mpif77` - AS_IF([test "$mpi_bin_dir" != "/usr/bin" -a "$mpi_bin_dir" != "/SX/usr/bin" -a x"$FMPIINCDIR" != x-I"$mpi_inc_dir"], + AS_IF([test x"$inside_openmpi" = "xno"], [ - mpi_inc_dir=-I`echo $mpi_bin_dir | sed -e 's/bin/include/'` - AS_IF([test x"$FMPIINCDIR" != x"$mpi_inc_dir"], - [FMPIINCDIR="$FMPIINCDIR -I`echo $mpi_bin_dir | sed -e 's/bin/include/'`"]) + mpif77=`echo $MPIF77 | cut -d ' ' -f 1` + which_mpif77=`which $mpif77 2>/dev/null` + AS_IF([test x"$which_mpif77" = x], [AC_MSG_ERROR([$mpif77 not found])]) + + mpi_bin_dir=`dirname $which_mpif77` + AS_IF([test "$mpi_bin_dir" != "/usr/bin" -a "$mpi_bin_dir" != "/SX/usr/bin" -a x"$FMPIINCDIR" != x-I"$mpi_inc_dir"], + [ + mpi_inc_dir=-I`echo $mpi_bin_dir | sed -e 's/bin/include/'` + AS_IF([test x"$FMPIINCDIR" != x"$mpi_inc_dir"], + [FMPIINCDIR="$FMPIINCDIR -I`echo $mpi_bin_dir | sed -e 's/bin/include/'`"]) + ]) ]) ], [ diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.rusage.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.rusage.m4 index e7f0e0a450..ec7bbdd476 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.rusage.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.rusage.m4 @@ -22,7 +22,15 @@ AC_DEFUN([ACVT_RUSAGE], AC_CHECK_FUNC([getrusage], [], [rusage_error="yes"]) ]) - AS_IF([test x"$rusage_error" = "xno"], [have_rusage="yes"]) + AS_IF([test x"$rusage_error" = "xno"], + [ + sav_CPPFLAGS=$CPPFLAGS + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" + AC_CHECK_DECLS([RUSAGE_THREAD], [], [], [#include ]) + CPPFLAGS=$sav_CPPFLAGS + + have_rusage="yes" + ]) ]) ]) diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.tauinst.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.tauinst.m4 index 23042323b0..01332b14ca 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.tauinst.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.tauinst.m4 @@ -68,9 +68,15 @@ AC_DEFUN([ACVT_TAUINST], AC_CHECK_PROG(tauinst_cxxparse_cmd, cxxparse, cxxparse) AS_IF([test x"$tauinst_cxxparse_cmd" = x], [AC_MSG_WARN([no cxxparse found; C++ source code cannot be instrumented by TAU])]) - AC_CHECK_PROGS(tauinst_fparse_cmd, f95parse f90parse gfparse) - AS_IF([test x"$tauinst_fparse_cmd" = x], - [AC_MSG_WARN([no f95parse, f90parse, or gfparse found; Fortran source code cannot be instrumented by TAU])]) + AS_IF([test x"$F77" != x], + [ + AC_CHECK_PROGS(tauinst_fparse_cmd, f95parse f90parse gfparse) + AS_IF([test x"$tauinst_fparse_cmd" = x], + [AC_MSG_WARN([no f95parse, f90parse, or gfparse found; Fortran source code cannot be instrumented by TAU])]) + ], + [ + tauinst_fparse_cmd= + ]) AS_IF([test x"$tauinst_cparse_cmd$tauinst_cxxparse_cmd$tauinst_fparse_cmd" = x], [ diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.timer.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.timer.m4 index 6564ea6743..47a3be904e 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.timer.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.timer.m4 @@ -129,7 +129,7 @@ AC_DEFUN([ACVT_TIMER], AC_MSG_NOTICE([selected timer: $timer]) case $timer in - TIMER_BGP_GET_TIMEBASE | TIMER_BGP_GET_TIMEBASE | TIMER_GETTIMEOFDAY) + TIMER_RTS_GET_TIMEBASE | TIMER_BGP_GET_TIMEBASE | TIMER_SYSSX_HGTIME | TIMER_GETTIMEOFDAY) timer_is_global=yes timer_is_global_def=1 ;; diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.vtrun.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.vtrun.m4 index 08080e3994..82a1a68750 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.vtrun.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.vtrun.m4 @@ -30,9 +30,18 @@ AC_DEFUN([ACVT_RUN], AS_IF([test x"$check_vtrun" = "xyes"], [ - AC_MSG_CHECKING([whether we can build shared libraries]) - AS_IF([test x"$enable_shared" = "xyes"], - [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]); vtrun_error="yes"]) + AS_IF([test "$PLATFORM" = "bgl" -o "$PLATFORM" = "bgp"], + [ + AC_MSG_NOTICE([error: application execution wrapper not supported on this platform]) + vtrun_error="yes" + ]) + + AS_IF([test x"$vtrun_error" = "xno"], + [ + AC_MSG_CHECKING([whether we can build shared libraries]) + AS_IF([test x"$enable_shared" = "xyes"], + [AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]); vtrun_error="yes"]) + ]) AS_IF([test x"$vtrun_error" = "xno"], [ diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.vtsetup.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.vtsetup.m4 index 5a43d00cfa..522c282286 100644 --- a/ompi/contrib/vt/vt/config/m4/acinclude.vtsetup.m4 +++ b/ompi/contrib/vt/vt/config/m4/acinclude.vtsetup.m4 @@ -33,7 +33,6 @@ AC_DEFUN([ACVT_SETUP], AC_CHECK_PROG(JAVA, java, java) AS_IF([test x"$JAVA" = x], [ - AC_MSG_RESULT([no]) AC_MSG_NOTICE([error: no java found; check path for JAVA package first...]) vtsetup_error="yes" ]) diff --git a/ompi/contrib/vt/vt/config/mpigen/c_dont_generate.txt b/ompi/contrib/vt/vt/config/mpigen/c_dont_generate.txt index 589b05f863..632f563352 100644 --- a/ompi/contrib/vt/vt/config/mpigen/c_dont_generate.txt +++ b/ompi/contrib/vt/vt/config/mpigen/c_dont_generate.txt @@ -1,6 +1,8 @@ # This file lists all calls, which should not be generated # automatically by mk_c_wrapper.sh +MPI_Address +MPI_Get_address MPI_Pcontrol MPI_Wtick MPI_Wtime @@ -38,6 +40,8 @@ MPI_Cart_sub MPI_Comm_create MPI_Comm_dup MPI_Comm_free +MPI_Comm_group +MPI_Comm_remote_group MPI_Comm_split MPI_Exscan MPI_Gather @@ -97,6 +101,7 @@ MPI_Win_complete MPI_Win_create MPI_Win_fence MPI_Win_free +MPI_Win_get_group MPI_Win_lock MPI_Win_post MPI_Win_start diff --git a/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in b/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in index 15c0505060..7c49a37c53 100644 --- a/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in +++ b/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in @@ -275,8 +275,6 @@ cat <$tmp.awk # by MPI_Fint. # ------------------------------------------------- function converttype(type) { - gsub("MPI_Aint","MPI_Fint",type) - if (ENVIRON["COMM_CONVERT"] == 1) gsub("MPI_Comm","MPI_Fint",type) @@ -326,9 +324,6 @@ function print_convert_function(type,para,decl) { if (type == "char*") { if (decl) printf" char* %sC = NULL;\n", para else printf" vt_string_f2c(%s, %s_len, &%sC);\n", para, para, para - } else if (index(type,"MPI_Aint") > 0) { - if (decl) printf" MPI_Aint %sC;\n",para - else printf" %sC = (MPI_Aint)%s%s;\n",para,pointer,para } else if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) { if (decl) printf" MPI_Comm %sC;\n", para else printf" %sC = MPI_Comm_f2c(%s%s);\n",para,pointer,para @@ -380,8 +375,6 @@ function print_convert_function(type,para,decl) { if (type == "char*") { if (decl) printf " char %sC[1024];\n", para - } else if (index(type,"MPI_Aint") > 0) { - if (decl) printf" MPI_Aint %sC;\n",para } else if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) { if (decl) printf" MPI_Comm %sC;\n",para else printf" %sC = MPI_COMM_NULL;\n",para @@ -440,13 +433,7 @@ function print_convert_function(type,para,decl) { len_parameter = lentemp[2] #now we have to handle all the imaginable handles - if (index(type,"MPI_Aint") > 0) { - if (decl) { - printf" MPI_Aint* %sC;\n",para - } else { - printf" %sC = (MPI_Aint*)malloc(sizeof(MPI_Aint)*(*%s));\n",para,len_parameter - } - } else if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) { + if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) { if (decl) { printf" MPI_Comm* %sC;\n",para } else { @@ -551,12 +538,7 @@ function print_convert_function(type,para,decl) { len_parameter = lentemp[2] #now we have to handle all the imaginable handles - if (index(type,"MPI_Aint") > 0) { - if (!decl) { - printf" for (i = 0; i < *%s; i++) \n",len_parameter - printf" %sC[i] = (MPI_Aint)%s[i];\n",para,para - } - } else if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) { + if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) { if (!decl) { printf" for (i = 0; i < *%s; i++) \n",len_parameter printf" %sC[i] = MPI_Comm_f2c(%s[i]);\n",para,para @@ -635,9 +617,6 @@ function print_back_convert_function(type,para) { if (type == "char*") printf" vt_string_c2f(%sC, %s, %s_len);\n", para, para, para - if (index(type,"MPI_Aint") > 0) - printf" *%s = (MPI_Fint)%sC;\n",para,para - if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) printf" *%s = MPI_Comm_c2f(%sC);\n",para,para @@ -686,13 +665,6 @@ function print_back_convert_function(type,para) { len_parameter = lentemp[2] #now we have to handle all the imaginable handles - if (index(type,"MPI_Aint") > 0) { - printf" if ( *%s != MPI_UNDEFINED )\n {\n",len_parameter - printf" for (i = 0; i < *%s; i++) \n",len_parameter - printf" %s[i] = (MPI_Fint)%sC[i];\n",para,para - printf" }\n" - } - if ((ENVIRON["COMM_CONVERT"] == 1) && (index(type,"MPI_Comm") > 0)) { printf" if ( *%s != MPI_UNDEFINED )\n {\n",len_parameter printf" for (i = 0; i < *%s; i++) \n",len_parameter @@ -768,8 +740,7 @@ function print_back_convert_function(type,para) { if ( (index(para,"_CLASS_ARRAY_") != 0) && (index(type,"MPI_Status") == 0) ) { - if ( (index(type,"MPI_Aint") != 0) || - ((index(type,"MPI_Comm") != 0) && (ENVIRON["COMM_CONVERT"] == 1)) || + if ( ((index(type,"MPI_Comm") != 0) && (ENVIRON["COMM_CONVERT"] == 1)) || ((index(type,"MPI_Errhandler") != 0) && (ENVIRON["ERRHANDLER_CONVERT"] == 1)) || ((index(type,"MPI_File") != 0) && (ENVIRON["FILE_CONVERT"] == 1)) || ((index(type,"MPI_Group") != 0) && (ENVIRON["GROUP_CONVERT"] == 1)) || @@ -830,17 +801,16 @@ function get_converted_name(para,type) { address="&" #handle conversion - if ((index(type,"MPI_Aint") > 0) || - ((ENVIRON["COMM_CONVERT"] == 1)&&(index(type,"MPI_Comm") > 0)) || - ((ENVIRON["ERRH_CONVERT"] == 1)&&(index(type,"MPI_Errhandler") > 0)) || - ((ENVIRON["FILE_CONVERT"] == 1)&&(index(type,"MPI_File") > 0)) || - ((ENVIRON["GROUP_CONVERT"] == 1)&&(index(type,"MPI_Group") > 0)) || - ((ENVIRON["INFO_CONVERT"] == 1)&&(index(type,"MPI_Info") > 0)) || - ((ENVIRON["OP_CONVERT"] == 1)&&(index(type,"MPI_Op") > 0)) || - ((ENVIRON["TYPE_CONVERT"] == 1)&&(index(type,"MPI_Datatype") > 0)) || - ((ENVIRON["WIN_CONVERT"] == 1)&&(index(type,"MPI_Win") > 0)) || - ((ENVIRON["REQUEST_CONVERT"] == 1)&&(index(type,"MPI_Request") > 0)) || - (index(type,"MPI_Status") > 0) ) + if ( ((ENVIRON["COMM_CONVERT"] == 1)&&(index(type,"MPI_Comm") > 0)) || + ((ENVIRON["ERRH_CONVERT"] == 1)&&(index(type,"MPI_Errhandler") > 0)) || + ((ENVIRON["FILE_CONVERT"] == 1)&&(index(type,"MPI_File") > 0)) || + ((ENVIRON["GROUP_CONVERT"] == 1)&&(index(type,"MPI_Group") > 0)) || + ((ENVIRON["INFO_CONVERT"] == 1)&&(index(type,"MPI_Info") > 0)) || + ((ENVIRON["OP_CONVERT"] == 1)&&(index(type,"MPI_Op") > 0)) || + ((ENVIRON["TYPE_CONVERT"] == 1)&&(index(type,"MPI_Datatype") > 0)) || + ((ENVIRON["WIN_CONVERT"] == 1)&&(index(type,"MPI_Win") > 0)) || + ((ENVIRON["REQUEST_CONVERT"] == 1)&&(index(type,"MPI_Request") > 0)) || + (index(type,"MPI_Status") > 0) ) return address para "C" #it is not a handle type so normal handling diff --git a/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in b/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in index 8a59b5149a..ee555302ae 100644 --- a/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in +++ b/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in @@ -136,8 +136,6 @@ cat <$tmp.awk type="VT_MPI_COLL_ALL2ONE" else if ( lastparam[3] == "/*COLL_ALL2ALL*/" ) type="VT_MPI_COLL_ALL2ALL" - else if ( lastparam[3] == "/*COLL_OTHER*/" ) - type="VT_MPI_COLL_OTHER" else type="VT_MPI_FUNCTION" diff --git a/ompi/contrib/vt/vt/config/mpigen/mpi2_1sided.h b/ompi/contrib/vt/vt/config/mpigen/mpi2_1sided.h index 080ffd8785..a7c1fe630e 100644 --- a/ompi/contrib/vt/vt/config/mpigen/mpi2_1sided.h +++ b/ompi/contrib/vt/vt/config/mpigen/mpi2_1sided.h @@ -42,6 +42,7 @@ VT_MPI_INT MPI_Win_complete(MPI_Win win); VT_MPI_INT MPI_Win_create(void* base, MPI_Aint size, VT_MPI_INT disp_unit, MPI_Info info, MPI_Comm comm, MPI_Win* win_CLASS_SINGLE_OUT); VT_MPI_INT MPI_Win_fence(VT_MPI_INT assert, MPI_Win win); VT_MPI_INT MPI_Win_free(MPI_Win* win_CLASS_SINGLE_IO); +VT_MPI_INT MPI_Win_get_group(MPI_Win win, MPI_Group* group_CLASS_SINGLE_OUT); VT_MPI_INT MPI_Win_lock(VT_MPI_INT lock_type, VT_MPI_INT rank, VT_MPI_INT assert, MPI_Win win); VT_MPI_INT MPI_Win_post(MPI_Group group, VT_MPI_INT assert, MPI_Win win); VT_MPI_INT MPI_Win_start(MPI_Group group, VT_MPI_INT assert, MPI_Win win); diff --git a/ompi/contrib/vt/vt/config/mpigen/mpi2_extcoll.h b/ompi/contrib/vt/vt/config/mpigen/mpi2_extcoll.h index c37ef61420..ff947ebefc 100644 --- a/ompi/contrib/vt/vt/config/mpigen/mpi2_extcoll.h +++ b/ompi/contrib/vt/vt/config/mpigen/mpi2_extcoll.h @@ -36,4 +36,4 @@ **/ VT_MPI_INT MPI_Alltoallw(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype* sendtypes_CLASS_ARRAY_IN_sendcounts, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype* recvtypes_CLASS_ARRAY_IN_recvcounts, MPI_Comm comm); /*COLL_ALL2ALL*/ -VT_MPI_INT MPI_Exscan(void* sendbuf_CLASS_BUFFER, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_OTHER*/ +VT_MPI_INT MPI_Exscan(void* sendbuf_CLASS_BUFFER, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/ diff --git a/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h b/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h index 728a5b6cea..cc391da99e 100644 --- a/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h +++ b/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h @@ -128,7 +128,7 @@ VT_MPI_INT MPI_Reduce_scatter(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf VT_MPI_INT MPI_Request_free(MPI_Request* request_CLASS_SINGLE_IO); VT_MPI_INT MPI_Rsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm); VT_MPI_INT MPI_Rsend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT); -VT_MPI_INT MPI_Scan(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_OTHER*/ +VT_MPI_INT MPI_Scan(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/ VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/ VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/ VT_MPI_INT MPI_Send(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm); diff --git a/ompi/contrib/vt/vt/configure.in b/ompi/contrib/vt/vt/configure.in index 9c27de00e4..14bf70afdc 100644 --- a/ompi/contrib/vt/vt/configure.in +++ b/ompi/contrib/vt/vt/configure.in @@ -80,19 +80,54 @@ AS_IF([test x"$inside_openmpi" = "xyes" -o "$datarootdir" = "\${prefix}/share"], AS_IF([test "$docdir" = "\${datarootdir}/doc/\${PACKAGE_TARNAME}"], [docdir="\${datarootdir}/doc"]) +# Declare additional variables +AC_ARG_VAR(AR, [archiver command]) +AC_ARG_VAR(NM, [command to list symbols from object files]) +AC_ARG_VAR(JAVA, [Java application launcher command]) + ACVT_CONF_TITLE([Compiler, preprocessor, and programs]) # Check for compiler and preprocessor -AS_IF([test x"$F77" = x -a x"$FC" != x], [F77=$FC]) AC_PROG_CC AM_PROG_CC_C_O AC_PROG_CXX AC_PROG_CPP AC_PROG_CXXCPP -AC_PROG_F77 -AC_PROG_FC AM_PROG_AS +# Do we want to support Fortran +check_fortran="yes" +force_fortran="no" +AC_ARG_ENABLE(fortran, + AC_HELP_STRING([--enable-fortran], + [enable Fortran support, default: enable if Fortran compiler found by configure]), + [ + AS_IF([test x"$enableval" = "xyes"], + [ + force_fortran="yes" + ], + [ + check_fortran="no" + F77= + FC= + ]) + ]) + +# Check for Fortran compiler +AS_IF([test x"$check_fortran" = "xyes"], +[ + AS_IF([test x"$F77" = x -a x"$FC" != x], [F77=$FC]) + + AC_PROG_F77 + AS_IF([test x"$F77" = x], + [ + AC_MSG_NOTICE([error: no Fortran 77 compiler found]) + AS_IF([test x"$force_fortran" = "xyes"], [exit 1]) + ]) + + AC_PROG_FC +]) + AC_LANG([C]) AS_IF([test x"$F77" != x], @@ -113,11 +148,6 @@ AC_ARG_ENABLE(binaries, [enable_binaries="$enableval"], [enable_binaries="yes"]) AM_CONDITIONAL(AMBUILDBINARIES, test x"$enable_binaries" = "xyes") -# Declare additional variables -AC_ARG_VAR(AR, [archiver command]) -AC_ARG_VAR(NM, [command to list symbols from object files]) -AC_ARG_VAR(JAVA, [Java application launcher command]) - # Check for programs #AC_PROG_AR AC_CHECK_PROGS(AR, ar, ar) @@ -351,6 +381,19 @@ AS_IF([test x"$enable_config_titles" = "xyes" -a x"$check_getcpu" = "xno"], AS_IF([test x"$force_getcpu" = "xyes" -a x"$getcpu_error" = "xyes"], [exit 1]) AM_CONDITIONAL(AMHAVEGETCPU, test x"$have_getcpu" = "xyes") +#Check for CUDA Toolkit +ACVT_CONF_TITLE([CUDA Toolkit]) +ACVT_CUDA +AS_IF([test x"$enable_config_titles" = "xyes" -a x"$check_cuda" = "xno"], +[AC_MSG_NOTICE([disabled via command line switch])]) +AS_IF([test x"$force_cuda" = "xyes" -a x"$cuda_error" = "xyes"], [exit 1]) +AM_CONDITIONAL(AMBUILDCUDA, test x"$have_cuda" = "xyes") +AM_CONDITIONAL(AMBUILDCUDART, test x"$have_cudart" = "xyes") + +# Check for CUPTI support +ACVT_CUPTI +AM_CONDITIONAL(AMBUILDCUPTI, test x"$have_cupti" = "xyes") + # Check for library tracing support ACVT_CONF_TITLE([Library tracing]) ACVT_LIBWRAP @@ -365,9 +408,11 @@ AM_CONDITIONAL(AMHAVEFORK, test x"$have_libcwrap" = "xyes" -a x"$have_fork" = "x AM_CONDITIONAL(AMBUILDIOWRAP, test x"$have_iowrap" = "xyes") AM_CONDITIONAL(AMBUILDCUDAWRAP, test x"$have_cudawrap" = "xyes") AM_CONDITIONAL(AMBUILDCUDARTWRAP, test x"$have_cudartwrap" = "xyes") -AM_CONDITIONAL(AMBUILDCUPTI, test x"$have_cupti" = "xyes") AM_CONDITIONAL(AMBUILDVTNVCC, test x"$have_cudartwrap" = "xyes" -a x"$NVCC" != x) +# Check for GPU support +AM_CONDITIONAL(AMHAVEGPU, test x"$have_cupti" = "xyes" -o x"$have_cudartwrap" = "xyes") + # Check path for node-local temporary directory ACVT_LTMPDIR diff --git a/ompi/contrib/vt/vt/doc/UserManual.html b/ompi/contrib/vt/vt/doc/UserManual.html index 046685af42..b0e3a17da3 100644 --- a/ompi/contrib/vt/vt/doc/UserManual.html +++ b/ompi/contrib/vt/vt/doc/UserManual.html @@ -66,7 +66,7 @@ BODY { font-family: sans-serif; }

-VampirTrace 5.11.2 User Manual +VampirTrace 5.12 User Manual


@@ -80,10 +80,10 @@ High Performance Computing (ZIH)
Germany
-
http://www.tu-dresden.de/zih -
http://www.tu-dresden.de/zih/vampirtrace +
+http://www.tu-dresden.de/zih +
+http://www.tu-dresden.de/zih/vampirtrace

Contact: mailto:vampirsupport@zih.tu-dresden.devampirsupport@zih.tu-dresden.de @@ -97,125 +97,164 @@ Contents

    -
  • Introduction -
  • Instrumentation -

    @@ -250,22 +289,19 @@ This includes function enter and leave events, MPI communication, OpenMP events, and performance counters.

    -After a successful tracing run, VampirTrace writes all collected data to a -trace file in the Open Trace Format (OTF)[*][*]. As a result, the information is available for post-mortem analysis and visualization by various tools. -Most notably, VampirTrace provides the input data for the Vampir analysis -and visualization tool[*][*].

    VampirTrace is included in OpenMPI 1.3 and later versions. -If not disabled explicitly, VampirTrace is built automatically when installing -OpenMPI[*][*].

    @@ -300,7 +336,7 @@ Note that not all features are supported on all platforms.

  • Record name and source code location (file name, line)
  • -
  • Various kinds of instrumentation ⇒ Section 2.2 +
  • Various kinds of instrumentation ⇒ Section 2.2
    • Automatic with many compilers ⇒ Section 2.3 @@ -653,9 +689,7 @@ your program uses by using the switches hybrid programs, respectively. Note that these switches do not change the underlying compiler or compiler flags. Use the option -vt:verbose to see the command line that the compiler wrapper -executes. See Section B.1 for a list of all compiler wrapper options. - -

      +executes. The default settings of the compiler wrappers can be modified in the files share/vampirtrace/vtcc-wrapper-data.txt (and similar for the other languages) in the installation directory of VampirTrace. @@ -713,10 +747,7 @@ To determine which instrumentation type will be used by default and which instru types are available on your system have a look at the entry inst_avail in the wrapper's configuration file (e.g. share/vampirtrace/vtcc-wrapper-data.txt in the installation directory of VampirTrace for the C compiler wrapper). - -

      -See Section B.1 or type vtcc -vt:help for other -options that can be passed to VampirTrace's compiler wrapper. + Type vtcc -vt:help for otheroptions that can be passed to VampirTrace's compiler wrapper.

      @@ -773,9 +804,7 @@ Notes for Using the GNU, Intel, or PathScale Compiler For these compilers the command nm is required to get symbol information of the running application executable. For example on Linux systems, this program is a -part of the GNU Binutils, which is downloadable from -http://www.gnu.org/software/binutils. +part of the GNU Binutils, which is downloadable from http://www.gnu.org/software/binutils.

      To get the application executable for nm during runtime, VampirTrace uses the @@ -942,25 +971,33 @@ Measurement Controls

      -

      -Switching tracing on/off: +

      + +
      +Switching tracing on/off:

      In addition to instrumenting arbitrary blocks of code, one can use the VT_ON/ VT_OFF instrumentation calls to start and stop the recording of events. These constructs can be used to stop recording of events for a part of the application and later resume recording. For - example, one could not collect trace events during the initialization phase - of an application and turn on tracing for the computation part. - -

      -Furthermore the "on/off" functionality can be used to control the + example, as is demonstrated in the following C/C++ code snippet, one could + not collect trace events during the initialization phase of an application + and turn on tracing for the computation part. +

      +           int main() {
      +             ...
      +             VT_OFF();
      +             initialize();
      +             VT_ON();
      +             compute();
      +             ...
      +           }
      +
      + Furthermore the "on/off" functionality can be used to control the tracing behavior of VampirTrace and allows to trace only parts of interests. Therefore the amount of trace data can be reduced essentially. To check whether if tracing is enabled or not use the call VT_IS_ON. -

      -For further information about limitations have a look at the FAQ D.5. -

      @@ -982,7 +1019,7 @@ Use the instrumentation call VT_SET_REWIND_MARK at the beginning of All recorded trace data between the mark and the rewind call will be dropped. Note, that only one mark can be set at a time. The last call to VT_SET_REWIND_MARK will be considered when rewinding the trace buffer. - This simplified Fortran code example sketches show the rewind approach can be used: + This simplified Fortran code example sketches how the rewind approach can be used:
                  do step=1,number_of_time_steps
                    VT_SET_REWIND_MARK()
      @@ -990,10 +1027,6 @@ Use the instrumentation call VT_SET_REWIND_MARK at the beginning of
                    if(finished_as_expected) VT_REWIND()
                  end do
       
      - Refer to FAQ
      D.5 for limitations associated with this method. - -

      -

      Intermediate buffer flush:

      @@ -1057,17 +1090,12 @@ Source Instrumentation Using PDT/TAU % vtcc -vt:inst tauinst hello.c -o hello

      - -

      -There is a known issue with the TAU instrumentation in the ⇒ FAQ D.9 - +

      Requirements for TAU instrumentation:

      To work with TAU instrumenation you need the Program Database Toolkit. - You have to make sure, to have cparse and tau_instrumentor in your $PATH. - The PDToolkit can be downloaded from http://www.cs.uoregon.edu/research/pdt/home.php. + You have to make sure, to have cparse and tau_instrumentor in your $PATH. http://www.cs.uoregon.edu/research/pdt/home.php.

      Include/Exclude Lists: @@ -1077,18 +1105,17 @@ Include/Exclude Lists:
      in a single file, that is announced to tau_instrumentor via the option -
      -f=<filename>. +
      -f <filename>. This file contains up to four lists which begin with
      BEGIN[_FILE]_<INCLUDE|EXCLUDE>_LIST. The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line. - The lists end with END[_FILE]_<INCLUDE|EXCLUDE>_LIST. For further information on selective - profiling have a look at the TAU documentation[*]END[_FILE]_<INCLUDE|EXCLUDE>_LIST. For further information on selective TAU documentation[*]. To announce the file through the compiler wrapper use the option -vt:tau:
          % vtcc -vt:inst tauinst hello.c -o hello \
      -     -vt:tau -f=<filename>
      +     -vt:tau '-f <filename>'
       

      @@ -1098,9 +1125,8 @@ Binary Instrumentation Using Dyninst

      The option -vt:inst dyninst is used with the compiler wrapper to -instrument the application during runtime (binary instrumentation), by using -Dyninst[*][*]. Recompiling is not necessary for this kind of instrumentation, but relinking: @@ -1171,13 +1197,9 @@ Runtime Instrumentation Using VTRun Besides the already described instrumentation at compile-time, VampirTrace also supports runtime instrumention using the vtrun command. Prepending the actual call to the application will transparently add instrumentation support and launch the -application. This includes support for function instrumentation as well as MPI -communication tracing. By default, vtrun tries to apply MPI instrumentation first. -If the application does not utilize MPI, the tool tries to use Dyninst -(Section 2.6) for instrumenting function calls which requires -VampirTrace being built with Dyninst support. In order to enable instrumentation for -both MPI and user functions the user has to specify the -dyninst -command line switch. +application. This includes support function instrumentation by Dyninst (Section 2.6) +as well as MPI communication tracing. In order to enable instrumentation for user +functions the user has to specify the -dyninst command line switch.

      @@ -1307,12 +1329,7 @@ This builds the shared library libSDLwrap.so which can be linked to the % LD_PRELOAD=$PWD/libSDLwrap.so <executable>

      - -

      -For more information about the tool vtlibwrapgen see Section B.5. - -

      - +


      @@ -1402,18 +1419,11 @@ of a VampirTrace instrumented executable: VT_BUFFER_SIZE Size of internal event trace buffer. This is the place where - event records are stored, before being written to a file. + event records are stored, before being written to OTF.
      ⇒ Section 3.3 32M -VT_THREAD_BUFFER_SIZE -Size of internal event trace buffer for threads. If not defined, the - size is set to 10% of VT_BUFFER_SIZE. -
      - ⇒ Section 3.3 -0 - VT_CLEAN Remove temporary trace files? yes @@ -1422,6 +1432,10 @@ of a VampirTrace instrumented executable: Write compressed trace files? yes +VT_COMPRESSION_BSIZE +Size of the compression buffer in OTF. +OTF default + VT_FILE_PREFIX Prefix used for trace filenames. ⇒Sect.3.1 @@ -1443,13 +1457,10 @@ of a VampirTrace instrumented executable: Maximum number of threads per process that VampirTrace reserves resources for. 65536 -VT_MAX_MPI_COMMS -Maximum number of MPI communicators used in a MPI program. -100 - -VT_MAX_MPI_WINS -Maximum number of MPI windows used in a MPI program. -100 +VT_OTF_BUFFER_SIZE +Size of internal OTF buffer. This buffer contains OTF-encoded + trace data that is written to file at once. +OTF default VT_PFORM_GDIR Name of global directory to store final trace file in. @@ -1459,6 +1470,13 @@ of a VampirTrace instrumented executable: Name of node-local directory which can be used to store temporary trace files. /tmp/ +VT_THREAD_BUFFER_SIZE +Size of internal event trace buffer for threads. If not defined, the + size is set to 10% of VT_BUFFER_SIZE. +
      + ⇒ Section 3.3 +0 + VT_UNIFY Unify local trace files afterwards? yes @@ -1613,6 +1631,10 @@ of a VampirTrace instrumented executable: Disable instrumentation of functions which have no debug information? no +VT_DYN_DETACH +Detach Dyninst mutator-program vtdyn from application process? +yes + VT_FILTER_SPEC Name of function/region filter file.
      @@ -1635,6 +1657,12 @@ of a VampirTrace instrumented executable: Create a group for each Java class automatically? yes +VT_ONOFF_CHECK_STACK_BALANCE +Check stack level balance when switching tracing on/off. +
      + ⇒ Section 2.4.2 +yes + VT_MAX_STACK_DEPTH Maximum number of stack level to be traced.
      @@ -1836,11 +1864,10 @@ adjusted with VT_ETIMESYNC_INTV. The following LAPACK libraries provide a C-LAPACK API that can be used by VampirTrace for the enhanced timer synchronization: -
        -
      • CLAPACK[*][*] -
      • +
        • AMD ACML
        • IBM ESSL @@ -1928,8 +1955,7 @@ Hardware Performance Counters

      -If VampirTrace has been built with hardware counter support (⇒ - Appendix A), it is capable of recording hardware counter +If VampirTrace has been built with hardware counter support , it is capable of recording hardware counter information as part of the event records. To request the measurement of certain counters, the user is required to set the environment variable VT_METRICS. The variable should contain a colon-separated list of counter names @@ -2055,8 +2081,7 @@ The GNU LIBC implementation provides a special hook mechanism that allows underlying system library.

      -If VampirTrace has been built with memory-tracing support (⇒ - Appendix A), VampirTrace is capable of recording memory allocation +If VampirTrace has been built with memory-tracing support , VampirTrace is capable of recording memory allocation information as part of the event records. To request the measurement of the application's allocated memory, the user must set the environment variable VT_MEMTRACE to yes. @@ -2138,54 +2163,59 @@ Several new region groups have been introduced:

      - - - - - - + + + + + +
      CUDA_KERNELCUDA kernels/functions can only appear on - ``CUDA-Threads''
      CUDART_API CUDA runtime API calls
      VT_CUDAVampirTrace overhead (write CUDA events, - check current device, etc.)
      CUDA_SYNC CUDA synchronization
      CUDA_KERNELCUDA kernels/functions can only appear on + ``CUDA-Threads''
      CUDA_IDLE GPU idle time - the CUDA device does not run any kernel currently (can only appear in one stream of the device)
      VT_CUDAVampirTrace overhead (write CUDA events, + check current device, etc.)

      -Additional feature switches (environment variables): +Additional feature switches (environment variables) to customize + CUDA runtime tracing:

      VT_CUDATRACE_KERNEL (default: yes)
      Tracing of CUDA kernels is enabled/disabled. - + +

      VT_CUDATRACE_MEMCPYASYNC (default: yes)
      Tracing of asynchronous CUDA memory copies is enabled/disabled. - + +

      VT_CUDATRACE_IDLE (default: no)
      Show the GPU idle time on a CUDA stream, if set to yes. - + +

      VT_CUDATRACE_GPUMEMUSAGE (default: no)
      Visualize GPU memory usage as counter ``gpu_mem_usage``, if set to yes. - + +

      VT_CUDATRACE_SYNC (default: yes or 3) @@ -2207,15 +2237,8 @@ Controls how VampirTrace handles synchronizing CUDA API calls, especially buffer and perform a timer synchronization between GPU und and host. This introduces a minimal overhead but increases timer precision and prevents flushes elsewhere in the trace. - -
      -
      -
      VT_CUDATRACE_ERROR (default: no) -
      -Print out an error message and exit the program, if a CUDA wrapper call - does not return 'cudaSuccess'. The default is just a warning message - without program exit. - + +

      VT_CUPTI_METRICS (default: "") @@ -2224,7 +2247,39 @@ Capture CUDA CUPTI counters. Metrics are separated by default with '':`` or user specified by VT_METRICS_SEP.
      Example: VT_CUPTI_METRICS=local_store:local_load - + +

      +

      +
      +
      VT_CUPTI_SAMPLING (default: no) +
      +Poll for CUPTI counter values during kernel execution, if set to yes. + +

      +

      +
      +
      VT_CUPTI_API_CALLBACK (default: no) +
      +Use CUPTI callback API to intercept CUDA runtime calls. + +

      +

      +
      +
      VT_GPUTRACE_ERROR (default: no) +
      +Print out an error message and exit the program, if a function call to a + GPU library does not return succesfully. The default is just a warning message + without program exit. + +

      +

      +
      +
      VT_GPUTRACE_DEBUG (default: no) +
      +Do not cleanup all GPU ressources (profiling events, contexts, event groups), + as they might have been already implicitly cleaned up by the GPU runtime. + +

      @@ -2261,15 +2316,34 @@ CUDA Performance Counters - CUPTI Events specified with the environment variable VT_CUPTI_METRICS. Metrics are separated by default with '':`` or user specified by VT_METRICS_SEP. The CUPTI User's Guide provides - information about the available counters. - Due to the use of asynchronous CUDA events in a GPU stream, VampirTrace - currently writes CUPTI counters directly on the process stream, which created - the GPU stream. Future implementations will write these counter information - on the GPU stream, where the corresponding CUDA kernel runs on. + information about the available counters. Alternatively set + VT_CUPTI_METRICS=help to show a list of available counters + (help_long to print the counter description as well).

      +Tracing CUDA runtime API via CUPTI Callbacks +

      + As there are systems, that does not support dynamic libraries, the CUDA + runtime API can be traced via the CUPTI callback interface, implemented in + VampirTrace. + +

      +If tracing via CUPTI callbacks is enabled (VT_CUPTI_API_CALLBACK=yes) + and the CUDA runtime wrapper has been configured into the VampirTrace + libraries, the CUDA runtime library should be preloaded to reduce tracing + overhead (LD_PRELOAD=libcudart.so). + +

      +Currently CUPTI does not support tracing of asynchronous tasks. If tracing of + kernels or asynchronous memory copies is enabled, they will be synchronized + directly after the call to retrieve their runtime. This may be improved in + future releases. + +

      + +

      Compile and Link CUDA applications

      Use the VampirTrace compiler wrapper vtnvcc instead of nvcc @@ -2326,7 +2400,7 @@ With the library tracing mechanism described in section -

      +

      Tracing the NVIDIA CUDA SDK 3.x and 4.0

      To get some example traces, replace the compiler commands in the common @@ -2352,7 +2426,7 @@ Tracing the NVIDIA CUDA SDK 3.x and 4.0

      -

      +

      Multithreaded CUDA applications

      If threads are used to invoke asynchronous CUDA tasks, make sure to call a @@ -2364,7 +2438,7 @@ Multithreaded CUDA applications

      -

      +

      Mixed Use of CUDA runtime and driver API

      As CUDA runtime API may implicitly create and destroy CUDA contexts, there @@ -2383,10 +2457,9 @@ Mixed Use of CUDA runtime and driver API Note:
      For 32-bit systems VampirTrace has to be configured with the 32-bit - version of cuda runtime library. If the link test fails, use the - following configure option (⇒A.2): + version of cuda runtime library. If the link test fails, use the :
      -  --with-cudart-lib-dir=$CUDA_INSTALL_PATH/lib
      +  --with-cuda-lib-dir=$CUDA_INSTALL_PATH/lib
       

      @@ -2445,9 +2518,8 @@ export VT_PLUGIN_CNTR_METRICS=<library_name>_<event_name>

       export VT_PLUGIN_CNTR_METRICS=KswEvents_page_faults
       
      - Visit http://www.tu-dresden.de/zih/vampirtrace/plugin_counter for - documentation and examples. +Visit http://www.tu-dresden.de/zih/vampirtrace/plugin_counter for +documentation and examples.

      Note: @@ -2559,9 +2631,7 @@ If you'd like to experiment with some other I/O library, set the environment var
      fork/system/exec Calls

      - -

      -If VampirTrace has been built with LIBC trace support (⇒ Appendix A), + , it is capable of tracing programs which call functions from the LIBC exec family (execl, execlp, execle, execv, execvp, execve), system, and fork. @@ -2607,17 +2677,13 @@ To install VampirTrace with correctness checking support it is necessary to have

      1. Marmot -
        (see http://www.hlrs.de/organization/av/amt/research/marmot) +
        (see http://www.hlrs.de/organization/av/amt/research/marmot)
      2. UniMCI -
        (see http://www.tu-dresden.de/zih/unimci) +
        (see http://www.tu-dresden.de/zih/unimci)
      3. VampirTrace -
        (see http://www.tu-dresden.de/zih/vampirtrace) - +
        (see http://www.tu-dresden.de/zih/vampirtrace)
      @@ -2950,11 +3016,7 @@ The remaining functions * will be recorded at most 3000000 times.

      Besides creating filter files manually, you can also use the vtfilter tool to generate them automatically. This tool reads a provided trace -and decides whether a function should be filtered or not, based on the evaluation of -certain parameters. For more information see Section B.4. - -

      - +and decides whether a function should be filtered or not, based on the evaluation of

      Rank Specific Filtering

      @@ -3100,13 +3162,1302 @@ with the prefix app_ are associated with group ``USER''.

      +. + +

      + +

      + +
      +Configure Options +

      + +

      +Compilers and Options +

      + +

      +Some systems require unusual options for compiling or linking which + the configure script does not know. Run ./configure -help + for details on some of the pertinent environment variables. + +

      +You can pass initial values for configuration parameters to configure + by setting variables in the command line or in the environment. Here + is an example: + +

      +

      +% ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix
      +
      + +

      + +

      +Installation Names +

      + +

      +By default, make install will install the package's files in + /usr/local/bin, /usr/local/include, etc. You can specify an + installation prefix other than /usr/local by giving configure the + option -prefix=PATH. + +

      + +

      +Optional Features +

      + +

      +This a summary of the most important optional features. For a full list of all available + features run ./configure -help. + +

      +

      +
      -enable-compinst=TYPE
      +
        +
      +enable support for compiler instrumentation, + e.g. gnu,pgi,pgi9,sun +
      +default: automatically by configure. + Note: Use pgi9 for PGI compiler version 9.0 or higher. + +

      +

      +
      -enable-dyninst
      +
        +
      +enable support for Dyninst instrumentation, + default: enable if found by configure. Dyninst[*]version 6.1 or higher! + +

      +

      +
      -enable-dyninst-attlib
      +
        +
      +build shared library which attaches Dyninst to + the running application, + default: enable if Dyninst found + by configure and system supports shared libraries + +

      +

      +
      -enable-tauinst
      +
        +
      +enable support for automatic source code + instrumentation by using TAU, default: enable if + found by configure. TAU[*]! + +

      +

      +
      -enable-memtrace
      +
        +
      +enable memory tracing support, default: enable if + found by configure + +

      +

      +
      -enable-cpuidtrace
      +
        +
      +enable CPU ID tracing support, default: enable if + found by configure + +

      +

      +
      -enable-libtrace=LIST
      +
        +
      +enable library tracing support (gen,libc,io), + default: automatically by configure + +

      +

      +
      -enable-rutrace
      +
        +
      +enable resource usage tracing support, default: + enable if found by configure + +

      +

      +
      -enable-metrics=TYPE
      +
        +
      +enable support for hardware performance counter + (papi,cpc,necsx), default: automatically + by configure + +

      +

      +
      -enable-zlib
      +
        +
      +enable ZLIB trace compression support, default: + enable if found by configure + +

      +

      +
      -enable-mpi
      +
        +
      +enable MPI support, default: enable if + MPI found by configure + +

      +

      +
      -enable-fmpi-lib
      +
        +
      +build the MPI Fortran support library, in case your + system does not have a MPI Fortran library. + default: enable if no MPI Fortran library + found by configure + +

      +

      +
      -enable-fmpi-handle-convert
      +
        +
      +do convert MPI handles, default: enable if MPI + conversion functions found by configure + +

      +

      +
      -enable-mpi2-thread
      +
        +
      +enable MPI-2 Thread support, default: enable if + found by configure + +

      +

      +
      -enable-mpi2-1sided
      +
        +
      +enable MPI-2 One-Sided Communication support, + default: enable if found by configure + +

      +

      +
      -enable-mpi2-extcoll
      +
        +
      +enable MPI-2 Extended Collective Operation support, + default: enable if found by configure + +

      +

      +
      -enable-mpi2-io
      +
        +
      +enable MPI-2 I/O support, default: enable if found + configure + +

      +

      +
      -enable-mpicheck
      +
        +
      +enable support for Universal MPI Correctness + Interface (UniMCI), default: enable if + unimci-config found by configure + +

      +

      +
      -enable-etimesync
      +
        +
      +enable enhanced timer synchronization support, + default: enable if C-LAPACK found by configure + +

      +

      +
      -enable-threads=LIST
      +
        +
      +enable support for threads (pthread, omp), + default: automatically by configure + +

      +

      +
      -enable-java
      +
        +
      +enable Java support, default: enable if JVMTI + found by configure + +

      +

      +
      + +

      + +

      +Important Optional Packages +

      + +

      +This a summary of the most important optional features. For a full list of all available + features run ./configure -help. + +

      +

      +
      -with-platform=PLATFORM
      +
        +
      +configure for given platform + (altix,bgl,bgp,crayt3e,crayx1,crayxt, +
      +ibm,linux,macos,necsx,origin,sicortex,sun,generic
      ), + default: automatically by configure + +

      +

      +
      -with-bitmode=32|64
      +
        +
      +specify bit mode + +

      +

      +
      -with-options=FILE
      +
        +
      +load options from FILE, default: configure + searches for a config file in config/defaults + based on given platform and bitmode + +

      +

      +
      -with-local-tmp-dir=DIR
      +
        +
      +give the path for node-local temporary directory + to store local traces to, default: /tmp +
      +
      + +

      +If you would like to use an external version of OTF library, set: +

      +
      -with-extern-otf
      +
        +
      +use external OTF library, default: not set +
      +
      -with-extern-otf-dir=OTFDIR
      +
        +
      +give the path for OTF, default: /usr + +

      +

      +
      -with-otf-flags=FLAGS
      +
        +
      +pass FLAGS to the OTF distribution configuration + (only for internal OTF version) + +

      +

      +
      -with-otf-lib=OTFLIB
      +
        +
      +use given otf lib, default: -lotf -lz + +

      +

      +
      + +

      +If the supplied OTF library was built without zlib support then OTFLIB will +be set to -lotf. + +

      +

      +
      -with-dyninst-dir=DYNIDIR
      +
        +
      +give the path for DYNINST, default: /usr + +

      +

      +
      -with-dyninst-inc-dir=DYNIINCDIR
      +
        +
      +give the path for Dyninst-include files, default: DYNIDIR/include + +

      +

      +
      -with-dyninst-lib-dir=DYNILIBDIR
      +
        +
      +give the path for Dyninst-libraries, default: DYNIDIR/lib + +

      +

      +
      -with-dyninst-lib=DYNILIB
      +
        +
      +use given Dyninst lib, default: -ldyninstAPI + +

      +

      +
      -with-tau-instrumentor=TAUINSTUMENTOR
      +
        +
      +give the command for the TAU instrumentor, default: tau_instrumentor + +

      +

      +
      -with-pdt-cparse=PDTCPARSE
      +
        +
      +give the command for PDT C source code parser, default: cparse + +

      +

      +
      -with-pdt-cxxparse=PDTCXXPARSE
      +
        +
      +give the command for PDT C++ source code parser, default: cxxparse + +

      +

      +
      -with-pdt-fparse=PDTFPARSE
      +
        +
      +give the command for PDT Fortran source code parser, default: f95parse, f90parse, or gfparse + +

      +

      +
      -with-papi-dir=PAPIDIR
      +
        +
      +give the path for PAPI, default: /usr + +

      +

      +
      -with-cpc-dir=CPCDIR
      +
        +
      +give the path for CPC, default: /usr + +

      +

      +
      + +

      +If you have not specified the environment variable MPICC +(MPI compiler command) use the following options to set the location +of your MPI installation: + +

      +

      +
      -with-mpi-dir=MPIDIR
      +
        +
      +give the path for MPI, default: /usr/ + +

      +

      +
      -with-mpi-inc-dir=MPIINCDIR
      +
        +
      +give the path for MPI-include files, +
      +default: MPIDIR/include/ + +

      +

      +
      -with-mpi-lib-dir=MPILIBDIR
      +
        +
      +give the path for MPI-libraries, default: MPIDIR/lib/ + +

      +

      +
      -with-mpi-lib
      +
        +
      +use given mpi lib + +

      +

      +
      -with-pmpi-lib
      +
        +
      +use given pmpi lib +
      +
      + +

      +If your system does not have an MPI Fortran library +set -enable-fmpi-lib (see above), otherwise set: + +

      +

      +
      -with-fmpi-lib
      +
        +
      +use given fmpi lib + +

      +

      +
      + +

      +Use the following options to specify your MPI-implementation + +

      +

      +
      -with-hpmpi
      +
        +
      +set MPI-libs for HP MPI + +

      +

      +
      -with-intelmpi
      +
        +
      +set MPI-libs for Intel MPI + +

      +

      +
      -with-intelmpi2
      +
        +
      +set MPI-libs for Intel MPI2 + +

      +

      +
      -with-lam
      +
        +
      +set MPI-libs for LAM/MPI + +

      +

      +
      -with-mpibgl
      +
        +
      +set MPI-libs for IBM BG/L + +

      +

      +
      -with-mpibgp
      +
        +
      +set MPI-libs for IBM BG/P + +

      +

      +
      -with-mpich
      +
        +
      +set MPI-libs for MPICH + +

      +

      +
      -with-mpich2
      +
        +
      +set MPI-libs for MPICH2 + +

      +

      +
      -with-mvapich
      +
        +
      +set MPI-libs for MVAPICH + +

      +

      +
      -with-mvapich2
      +
        +
      +set MPI-libs for MVAPICH2 + +

      +

      +
      -with-mpisx
      +
        +
      +set MPI-libs for NEC MPI/SX + +

      +

      +
      -with-mpisx-ew
      +
        +
      +set MPI-libs for NEC MPI/SX with 8 Byte Fortran Integer + +

      +

      +
      -with-openmpi
      +
        +
      +set MPI-libs for Open MPI + +

      +

      +
      -with-sgimpt
      +
        +
      +set MPI-libs for SGI MPT + +

      +

      +
      -with-sunmpi
      +
        +
      +set MPI-libs for SUN MPI + +

      +

      +
      -with-sunmpi-mt
      +
        +
      +set MPI-libs for SUN MPI-MT +
      +
      + +

      +To enable enhanced timer synchronization a LAPACK library with C wrapper support is needed: + +

      +

      +
      -with-clapack-dir=LAPACKDIR
      +
        +
      +set the path for CLAPACK, default: /usr + +

      +

      +
      -with-clapack-lib
      +
        +
      +set CLAPACK-libs, default: -lclapack -lcblas -lf2c + +

      +

      +
      -with-clapack-acml
      +
        +
      +set CLAPACK-libs for ACML + +

      +

      +
      -with-clapack-essl
      +
        +
      +set CLAPACK-libs for ESSL + +

      +

      +
      -with-clapack-mkl
      +
        +
      +set CLAPACK-libs for MKL + +

      +

      +
      -with-clapack-sunperf
      +
        +
      +set CLAPACK-libs for SUN Performance Library + +

      +

      +
      + +

      +To enable Java support the JVM Tool Interface (JVMTI) version 1.0 or higher is required: + +

      +

      +
      -with-jvmti-dir=JVMTIDIR
      +
        +
      +give the path for JVMTI, default: $JAVA_HOME + +

      +

      +
      -with-jvmti-inc-dir=JVMTIINCDIR
      +
        +
      +give the path for JVMTI-include files, default: + JVMTI/include + +

      +

      +
      + +

      +To enable support for generating wrapper for 3th-Party libraries the C code parser CTool is needed: + +

      +

      +
      -with-ctool-dir=CTOOLDIR
      +
        +
      +give the path for CTool, default: /usr + +

      +

      +
      -with-ctool-inc-dir=CTOOLINCDIR
      +
        +
      +give the path for CTool-include files, default: + CTOOLDIR/include + +

      +

      +
      -with-ctool-lib-dir=CTOOLLIBDIR
      +
        +
      +give the path for CTool-libraries, default: + CTOOLDIR/lib + +

      +

      +
      -with-ctool-lib=CTOOLLIB
      +
        +
      +use given CTool lib, default: automatically by configure + +

      +

      +
      + +

      +To enable support for CUDA runtime API wrapping, the CUDA-Toolkit +install path is needed: + +

      +

      +
      -with-cuda-dir=CUDATKDIR
      +
        +
      +give the path for CUDA Toolkit, default: /usr/local/cuda +
      +
      -with-cuda-inc-dir=CUDATKINCDIR
      +
        +
      +give the path for CUDA Toolkit-include files, default: + CUDATKDIR/include +
      +
      -with-cuda-lib-dir=CUDATKLIBDIR
      +
        +
      +give the path for CUDA Toolkit-libraries, default: + CUDATKDIR/lib64 +
      +
      -with-cudart-lib=CUDARTLIB
      +
        +
      +use given cudart lib, default: -lcudart +
      +
      -with-cudart-shlib=CUDARTSHLIB
      +
        +
      +give the pathname for the shared CUDA runtime library, default: + automatically by configure + +

      +

      +
      + +

      +To enable support for CUPTI counter capturing during CUDA runtime +tracing, the CUPTI install path is needed: + +

      +

      +
      -with-cupti-dir=CUPTIDIR
      +
        +
      +give the path for CUPTI, default: /usr +
      +
      -with-cupti-inc-dir=CUPTIINCDIR
      +
        +
      +give the path for CUPTI-include files, default: + CUPTIDIR/include +
      +
      -with-cupti-lib-dir=CUPTILIBDIR
      +
        +
      +give the path for CUPTI-libraries, default: + CUPTIDIR/lib +
      +
      -with-cupti-lib=CUPTILIB
      +
        +
      +use given cupti lib, default: -lcupti + +

      +

      +
      + +

      + +

      +Cross Compilation +

      + +

      +Building VampirTrace on cross compilation platforms needs some special attention. +The compiler wrappers, OPARI, and the Library Wrapper Generator are built for the front-end (build system) +whereas the the VampirTrace libraries, vtdyn, vtunify, and vtfilter are built +for the back-end (host system). Some configure options which are of interest for cross compilation +are shown below: + +

        +
      • Set CC, CXX, F77, and FC to the cross compilers installed on the front-end. +
      • +
      • Set CC_FOR_BUILD and CXX_FOR_BUILD to the native compilers of the front-end. +
      • +
      • Set -host= to the output of config.guess on the back-end. +
      • +
      • Set -with-cross-prefix= to a prefix which will be prepended to the executables of the compiler wrappers and OPARI (default: ``cross-'') +
      • +
      • Maybe you also need to set additional commands and flags for the back-end (e.g. RANLIB, AR, MPICC, CXXFLAGS). +
      • +
      +For example, this configure command line works for an NEC SX6 system with an X86_64 based front-end: + +

      +

      +% ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
      +              AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
      +              --host=sx6-nec-superux14.1
      +              --with-cross-prefix=sx
      +              --with-otf-lib=-lotf
      +
      + +

      + +

      +Environment Set-Up +

      + +

      +Add the bin subdirectory of the installation directory to your + $PATH environment variable. To use VampirTrace with Dyninst, + you will also need to add the lib subdirectory to your + LD_LIBRARY_PATH environment variable: +
      +
      +
      +for csh and tcsh: +

      +> setenv PATH <vt-install>/bin:$PATH
      +> setenv LD_LIBRARY_PATH <vt-install>/lib:$LD_LIBRARY_PATH
      +
      + for bash and sh: +
      +% export PATH=<vt-install>/bin:$PATH
      +% export LD_LIBRARY_PATH=<vt-install>/lib:$LD_LIBRARY_PATH
      +
      + +

      + +

      +Notes for Developers +

      + +

      + +

      +Build from SVN +

      + +

      +If you have checked out a developer's copy of VampirTrace (i.e. + checked out from CVS), you should first run: + +

      +

      +% ./bootstrap [--otf-package <package>]
      +              [--version <version>]
      +
      +Note that GNU Autoconf ≥2.60 and GNU Automake ≥1.9.6 are required. +You can download them from http://www.gnu.org/software/autoconf +and http://www.gnu.org/software/automake. +

      +Command Reference +

      + +

      + +
      +Compiler Wrappers (vtcc,vtcxx,vtf77,vtf90) +

      + +

      +

      +vtcc,vtcxx,vtf77,vtf90 - compiler wrappers for C, C++, 
      +                         Fortran 77, Fortran 90
      +
      +Syntax: vt<cc|cxx|f77|f90> [options] ...
      +
      +options:
      +  -vt:help            Show this help message.
      +  -vt:version         Show VampirTrace version.
      +  -vt:<cc|cxx|f77|f90> <cmd>
      +                      Set the underlying compiler command.
      +
      +  -vt:inst <insttype> Set the instrumentation type.
      +
      +   possible values:
      +
      +    compinst          fully-automatic by compiler
      +    manual            manual by using VampirTrace's API
      +    dyninst           binary by using Dyninst (www.dyninst.org)
      +    tauinst           automatic source code instrumentation by
      +                      using PDT/TAU
      +
      +  -vt:opari <!args>   Set options for OPARI command. (see
      +                      share/vampirtrace/doc/opari/Readme.html)
      +
      +  -vt:noopari         Disable instrumentation of OpenMP contructs
      +                      by OPARI.
      +
      +  -vt:<seq|mpi|mt|hyb>
      +                      Enforce application's parallelization type.
      +                      It's only necessary if it could not be determined
      +                      automatically based on underlying compiler and flags.
      +                      seq = sequential
      +                      mpi = parallel (uses MPI)
      +                      mt = parallel (uses OpenMP/POSIX threads)
      +                      hyb = hybrid parallel (MPI + Threads)
      +                      (default: automatically)
      +
      +  -vt:tau <!args>     Set options for the TAU instrumentor 
      +                      command.
      +
      +  -vt:pdt <!args>     Set options for the PDT parse command.
      +
      +  -vt:preprocess      Preprocess the source files before parsing
      +                      by OPARI and/or PDT.
      +
      +  -vt:cpp <cmd>       Set C preprocessor command.
      +
      +  -vt:cppflags <[!]flags>
      +                      Set/add flags for the C preprocessor.
      +
      +  -vt:verbose         Enable verbose mode.
      +
      +  -vt:show[me]        Do not invoke the underlying compiler.
      +                      Instead, show the command line that would be
      +                      executed to compile and link the program.
      +
      +  -vt:showme-compile  Do not invoke the underlying compiler.
      +                      Instead, show the compiler flags that would be
      +                      supplied to the compiler.
      +
      +  -vt:showme-link     Do not invoke the underlying compiler.
      +                      Instead, show the linker flags that would be
      +                      supplied to the compiler.
      +
      +  See the man page for your underlying compiler for other 
      +  options that can be passed through 'vt<cc|cxx|f77|f90>'.
      +
      +Environment variables:
      +  VT_INST             Equivalent to '-vt:inst'
      +  VT_CC               Equivalent to '-vt:cc '
      +  VT_CXX              Equivalent to '-vt:cxx '
      +  VT_F77              Equivalent to '-vt:f77'
      +  VT_F90              Equivalent to '-vt:f90'
      +  VT_CFLAGS           C compiler flags
      +  VT_CXXFLAGS         C++ compiler flags
      +  VT_F77FLAGS         Fortran 77 compiler flags
      +  VT_FCFLAGS          Fortran 90 compiler flags
      +  VT_LDFLAGS          Linker flags
      +  VT_LIBS             Libraries to pass to the linker
      +
      +  The corresponding command line options overwrite the 
      +  environment variables setting.
      +
      +Examples:
      +  automatically instrumentation by compiler:
      +
      +     vtcc -vt:cc gcc -vt:inst compinst -c foo.c -o foo.o
      +     vtcc -vt:cc gcc -vt:inst compinst -c bar.c -o bar.o
      +     vtcc -vt:cc gcc -vt:inst compinst foo.o bar.o -o foo
      +
      +  manually instrumentation by using VT's API:
      +
      +     vtf90 -vt:inst manual foobar.F90 -o foobar -DVTRACE
      +
      +  IMPORTANT: Fortran source files instrumented by VT's API
      +             have to be preprocessed by CPP.
      +
      + +

      + +

      + +
      +Local Trace Unifier (vtunify) +

      + +

      +

      +vtunify[-mpi] - local trace unifier for VampirTrace.
      +
      +Syntax: vtunify[-mpi] <input trace prefix> [options]
      +
      +options:
      +  -h, --help          Show this help message.
      +
      +  -V, --version       Show VampirTrace version.
      +
      +  -o PREFIX           Prefix of output trace filename.
      +
      +  -f FILE             Function profile output filename.
      +                      (default=PREFIX.prof.txt)
      +
      +  -k, --keeplocal     Don't remove input trace files.
      +
      +  -p, --progress      Show progress.
      +
      +  -v, --verbose       Increase output verbosity.
      +                      (can be used more than once)
      +
      +  -q, --quiet         Enable quiet mode.
      +                      (only emergency output)
      +
      +  --nocompress        Don't compress output trace files.
      +
      +  --nomsgmatch        Don't match messages.
      +
      +  --droprecvs         Drop message receive events, if msg. matching
      +                      is enabled.
      +
      + +

      + +

      + +
      +Binary Instrumentor (vtdyn) +

      + +

      +

      +vtdyn - binary instrumentor (Dyninst mutator) for VampirTrace.
      +
      +Syntax: vtdyn [options] <executable> [arguments ...]
      +
      +options:
      +  -h, --help          Show this help message.
      +
      +  -V, --version       Show VampirTrace version.
      +
      +  -v, --verbose       Increase output verbosity.
      +                      (can be used more than once)
      +
      +  -q, --quiet         Enable quiet mode.
      +                      (only emergency output)
      +
      +  -o, --output FILE   Rewrite instrumented executable to specified pathname.
      +
      +  -s, --shlibs SHLIBS[,...]
      +                      Comma-separated list of shared libraries which shall
      +                      also be instrumented.
      +
      +  -f, --filter FILE   Pathname of input filter file.
      +
      +  --ignore-nodbg      Don't instrument functions which have no debug
      +                      information.
      +
      + +

      + +

      + +
      +Trace Filter Tool (vtfilter) +

      + +

      +

      +vtfilter[-mpi] - filter tool for VampirTrace.
      +
      +Syntax: 
      +  Generate a filter file:
      +    vtfilter[-mpi] --gen [gen-options] <input trace file>
      +
      +  Filter a trace using an already existing filter file:
      +    vtfilter[-mpi] [--filt] [filt-options]
      +      --filter=<input filter file> <input trace file>
      +
      +options:
      +  --gen               Generate a filter file.
      +                      See 'gen-options' below for valid options.
      +
      +  --filt              Filter a trace using an already existing
      +                      filter file. (default)
      +                      See 'filt-options' below for valid options.
      +
      +  -h, --help          Show this help message.
      +
      +  -V, --version       Show VampirTrace version.
      +
      +  -p, --progress      Show progress.
      +
      +  -v, --verbose       Increase output verbosity.
      +                      (can be used more than once)
      +
      +gen-options:
      +  -o, --output=FILE   Pathname of output filter file.
      +
      +  -r, --reduce=N      Reduce the trace size to N percent of the
      +                      original size. The program relies on the
      +                      fact that the major part of the trace are
      +                      function calls. The approximation of size
      +                      will get worse with a rising percentage of
      +                      communication and other non function
      +                      calling or performance counter records.                           
      +
      +  -l, --limit=N       Limit the number of calls for filtered
      +                      function to N.
      +                      (default: 0)                                         
      +
      +  -s, --stats         Prints out the desired and the expected
      +                      percentage of file size.                                     
      +
      +  -e, --exclude=FUNC[;FUNC;...]
      +                      Exclude certain functions from filtering.
      +                      A function name may contain wildcards.   
      +
      +  --exclude-file=FILE Pathname of file containing a list of
      +                      functions to be excluded from filtering.                             
      +
      +  -i, --include=FUNC[;FUNC;...]
      +                      Force to include certain functions into
      +                      the filter. A function name may contain
      +                      wildcards.             
      +
      +  --include-file=FILE Pathname of file containing a list of
      +                       functions to be included into the filter.                            
      +
      +  --include-callees   Automatically include callees of included
      +                      functions as well into the filter.                           
      +
      +filt-options:
      +  -o, --output=FILE   Pathname of output trace file.
      +
      +  -f, --filter=FILE   Pathname of input filter file.
      +
      +  -s, --max-streams=N Maximum number of output streams.
      +                      (default: 0)
      +            vtfilter: Set this to 0 to get the same number of
      +                      output streams as input streams.                                     
      +        vtfilter-mpi: Set this to 0 to get the same number of
      +                      output streams as MPI processes used, but
      +                      at least the number of input streams.
      +
      +  --max-file-handles=N
      +                      Maximum number of files that are allowed
      +                      to be open simultaneously.
      +                      (default: 256)
      +
      +  --nocompress        Don't compress output trace files.
      +
      + +

      + +

      + +
      +Library Wrapper Generator (vtlibwrapgen) +

      + +

      +

      +vtlibwrapgen - library wrapper generator for VampirTrace.
      +
      +Syntax: 
      +  Generate a library wrapper source file:
      +    vtlibwrapgen [gen-options] <input header file> 
      +                 [input header file...]
      +
      +  Build a wrapper library from a generated source file:
      +    vtlibwrapgen --build [build-options] 
      +                 <input lib. wrapper source file>
      +
      +options:
      +  --gen              Generate a library wrapper source file. 
      +                     This is the default behavior. See 
      +                     'gen-options' below for valid options.
      +
      +  --build            Build a wrapper library from a generated 
      +                     source file. See 'build-options' below 
      +                     for valid options.
      +
      +  -h, --help         Show this help message.
      +
      +  -V, --version      Show VampirTrace version.
      +
      +  -q, --quiet        Enable quiet mode. 
      +                     (only emergency output)
      +
      +  -v, --verbose      Increase output verbosity.
      +                     (can be used more than once)
      +
      +gen-options:
      +  -o, --output=FILE  Pathname of output wrapper source file.
      +                     (default: wrap.c)                      
      +
      +  -l, --shlib=SHLIB  Pathname of shared library that contains 
      +                     the actual library functions.
      +                     (can be used more then once)
      +
      +  -f, --filter=FILE  Pathname of input filter file.
      +
      +  -g, --group=NAME   Separate function group name for wrapped 
      +                     functions.
      +
      +  -s, --sysheader=FILE
      +                     Header file to be included additionally.
      +
      +  --nocpp            Don't use preprocessor.
      +
      +  --keepcppfile      Don't remove preprocessed header files.
      +
      +  --cpp=CPP          C preprocessor command
      +                     (default: gcc -E)     
      +
      +  --cppflags=CPPFLAGS 
      +                     C preprocessor flags, e.g. 
      +                     -I<include dir>
      +
      +  --cppdir=DIR       Change to this preprocessing directory.
      +
      +environment variables:
      +  VT_CPP             C preprocessor command 
      +                     (equivalent to '--cpp')
      +  VT_CPPFLAGS        C preprocessor flags 
      +                     (equivalent to '--cppflags')
      +
      +build-options:
      +  -o, --output=PREFIX
      +                     Prefix of output wrapper library.
      +                     (default: libwrap)               
      +
      +  --shared           Do only build shared wrapper library.
      +
      +  --static           Do only build static wrapper library.
      +
      +  --libtool=LT       Libtool command
      +
      +  --cc=CC            C compiler command (default: gcc)
      +
      +  --cflags=CFLAGS    C compiler flags
      +
      +  --ld=LD            linker command (default: CC)
      +
      +  --ldflags=LDFLAGS  linker flags, e.g. -L<lib dir>
      +                     (default: CFLAGS)
      +
      +  --libs=LIBS        libraries to pass to the linker, 
      +                     e.g. -l<library>
      +
      +environment variables:
      +  VT_CC              C compiler command 
      +                     (equivalent to '--cc')
      +  VT_CFLAGS          C compiler flags 
      +                     (equivalent to '--cflags')
      +  VT_LD              linker command 
      +                     (equivalent to '--ld')
      +  VT_LDFLAGS         linker flags 
      +                     (equivalent to '--ldflags')
      +  VT_LIBS            libraries to pass to the linker
      +                     (equivalent to '--libs')
      +
      +examples:
      +  Generating wrapper library 'libm_wrap' for the Math library
      +  'libm.so':
      +
      +    vtlibwrapgen -l libm.so -g MATH -o mwrap.c \
      +    /usr/include/math.h
      +    vtlibwrapgen --build -o libm_wrap mwrap.c
      +    export LD_PRELOAD=$PWD/libm_wrap.so:libvt.so
      +
      + +

      + +

      + +
      +Application Execution Wrapper (vtrun) +

      + +

      +

      + vtrun - application execution wrapper for VampirTrace.
      +
      + Syntax: vtrun [options] <executable> [arguments]
      +
      +   options:
      +     -h, --help          Show this help message.
      +
      +     -V, --version       Show VampirTrace version.
      +
      +     -v, --verbose       Increase output verbosity.
      +                         (can be used more than once)
      +
      +     -q, --quiet         Enable quiet mode.
      +                         (only emergency output)
      +
      +     -<seq|mpi|mt|hyb>   Set application's parallelization type.
      +                         It's only necessary if it could not 
      +                         be determined automatically.
      +                         seq = sequential
      +                         mpi = parallel (uses MPI)
      +                         mt  = parallel (uses OpenMP/POSIX threads)
      +                         hyb = hybrid parallel (MPI + Threads)
      +                         (default: automatically)
      +
      +     --fortran           Set application's language to Fortran.
      +                         It's only necessary for MPI-applications 
      +                         and if it could not be determined 
      +                         automatically.
      +
      +     --dyninst           Instrument user functions by Dyninst.
      +
      +     --extra-libs=LIBS   Extra libraries to preload.
      +
      +   example:
      +     original:
      +        mpirun -np 4 ./a.out
      +     with VampirTrace:
      +        mpirun -np 4 vtrun ./a.out
      +
      + +

      + +

      Counter Specifications

      -

      +


      PAPI @@ -3195,7 +4546,7 @@ PAPI_HW_INT Hardware interrupts

      -

      +


      CPC @@ -3261,7 +4612,7 @@ http://www.sun.com/processors/manuals

      -

      +


      NEC SX Hardware Performance Counter @@ -3291,7 +4642,7 @@ SX_CTR_BPFC Branch prediction failure counter

      -

      +


      Resource Usage @@ -3397,50 +4748,199 @@ by the Linux 2.6 kernel are shown in the table.

      -


      Footnotes

      +to learn more about using vtunify. +

      + +

      + +
      +What limitations are associated with "on/off" and buffer rewind? +

      + +

      +Starting and stopping tracing by using the VT_ON/VT_OFF calls +as well as the buffer rewind method are considered +advanced usage of VampirTrace and should be performed with care. When restarting +the recording of events, the call stack of the application has to have the same depth +as when the recording was stopped. The same applies for the rewind call, which +has to be at the same stack level as the rewind mark. If this is not the case, an error +message will be printed during runtime and VampirTrace will abort execution. +A safe method is to call VT_OFF and VT_ON in the same function. + +

      +It is allowed to use "on/off" in a section between a rewind mark and a buffer rewind call. +But it is not allowed to call VT_SET_REWIND_MARK or VT_REWIND +during a section deactivated by the "on/off" functionality. + +

      +Buffer flushes interfere with the rewind method: If the trace buffer is flushed +after the call to VT_SET_REWIND_MARK, the mark is removed and a subsequent +call to VT_REWIND will not work and issue a warning message. + +

      +In addition, stopping or rewinding tracing while waiting for MPI messages can cause those MPI messages not to +be recorded in the trace. This can cause problems when analyzing the OTF trace afterwards, e.g.,  with Vampir. + +

      + +

      + +
      +VampirTrace warns that it ``cannot lock file a.lock'', what's wrong? +

      + +

      +For unique naming of multiple trace files in the same directory, a file *.lock +is created and locked for exclusive access if VT_FILE_UNIQUE +is set to yes (⇒ Section 3.1). +Some file systems do not implement file locking. +In this case, VampirTrace still tries to name the trace files uniquely, but this may fail +in certain cases. +Alternatively, you can manually control the unique file naming by setting +VT_FILE_UNIQUE to a different numerical ID for each program run. + +

      + +

      + +
      +Can I relocate my VampirTrace installation without rebuilding from source? +

      + +

      +VampirTrace hard-codes some directory paths in its executables and libraries based on installation +paths specified by the configure script. However, it's possible to move an existing VampirTrace +installation to another location and use it without rebuild from source. +Therefore it's necessary to set the environment variable VT_PREFIX to the new installation prefix +before using VampirTrace's Compiler Wrappers (⇒ Section 2.1) or launching an +instrumented application. For example: + +

      +

      +./configure --prefix=/opt/vampirtrace
      +make install
      +mv /opt/vampirtrace $HOME/vampirtrace
      +export VT_PREFIX=$HOME/vampirtrace
      +
      + +

      + +

      + +
      +What are the byte counts in collective communication records? +

      + +

      +The byte counts in collective communication records changed with version 5.10. + +

      +From 5.10 on, the byte counts of collective communication records show the +bytes per rank given to the MPI call or returned by the MPI call. +This is the MPI API perspective. It is next to impossible to find out how many +bytes are actually sent or received during a collective operation by any other +MPI implementation. + +

      +In the past (until VampirTrace version 5.9), the byte count in collective +operation records was defined differently. It used a simple and naive +hypothetical implementation of collectives based on point-to-point messages +and derived the byte counts from that. This might have been more confusing than +helpful and was therefore changed. + +

      +Thanks to Eugene Loh for pointing this out! + +

      + +

      + +
      +I get ``error: unknown asm constraint letter'' +

      + +

      +It is a known issue with the tau_instrumentor that it doesn't support inline assembler code. +At the moment there is no other solution than using another kind of instrumentation like +compiler instrumenation (⇒ Section 2.3) or manual instrumenation (⇒ Section 2.4). + +

      + +

      + +
      +I have a question that is not answered in this document! +

      + +

      +You may contact us at mailto:vampirsupport@zih.tu-dresden.devampirsupport@zih.tu-dresden.de +for support on installing and using VampirTrace. + +

      + +

      + +
      +I need support for additional features so I can trace application xyz. +

      + +

      +Suggestions are always welcome (contact: mailto:vampirsupport@zih.tu-dresden.devampirsupport@zih.tu-dresden.de) +but there is a chance that we can not implement all your wishes as our resources +are limited. + +

      +Anyways, the source code of VampirTrace is open to everybody so you may +implement support for new stuff yourself. +If you provide us with your additions afterwards we will consider merging them +into the official VampirTrace package.


      Footnotes

      -
      ... (OTF)[*]... (OTF)[*]
      -
      http://www.tu-dresden.de/zih/otf +
      http://www.tu-dresden.de/zih/otf
      -
      ... tool [*]... tool [*]
      -
      http://www.vampir.eu +
      http://www.vampir.eu
      -
      ... -Open MPI [*]... Open MPI [*]
      -
      http://www.open-mpi.org/faq/?category=vampirtrace +
      http://www.open-mpi.org/faq/?category=vampirtrace
      -
      ... documentation [*]... documentation [*]
      -
      http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling +
      http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling
      -
      ... -Dyninst [*]... Dyninst [*]
      -
      http://www.dyninst.org +
      http://www.dyninst.org
      -
      ... CLAPACK[*]... CLAPACK[*]
      -
      www.netlib.org/clapack +
      www.netlib.org/clapack + +
      +
      ... Dyninst [*]
      +
      http://www.dyninst.org + +
      +
      ... TAU [*]
      +
      http://tau.uoregon.edu
      diff --git a/ompi/contrib/vt/vt/doc/UserManual.pdf b/ompi/contrib/vt/vt/doc/UserManual.pdf index 2f607f86d5..ed01470e9e 100644 Binary files a/ompi/contrib/vt/vt/doc/UserManual.pdf and b/ompi/contrib/vt/vt/doc/UserManual.pdf differ diff --git a/ompi/contrib/vt/vt/etc/vt-setup-config.xml.in b/ompi/contrib/vt/vt/etc/vt-setup-config.xml.in index 826f59c49e..949296094d 100644 --- a/ompi/contrib/vt/vt/etc/vt-setup-config.xml.in +++ b/ompi/contrib/vt/vt/etc/vt-setup-config.xml.in @@ -1,4 +1,4 @@ - + @VT_SETUP_NM@ diff --git a/ompi/contrib/vt/vt/extlib/otf/AUTHORS b/ompi/contrib/vt/vt/extlib/otf/AUTHORS index 2471c31661..2a3504d2fc 100644 --- a/ompi/contrib/vt/vt/extlib/otf/AUTHORS +++ b/ompi/contrib/vt/vt/extlib/otf/AUTHORS @@ -4,6 +4,7 @@ Johannes Spazier Matthias Jurenz Bert Wesarg Robert Dietrich +Andre Groetzsch Michael Heyde Michael Kluge Holger Mickler diff --git a/ompi/contrib/vt/vt/extlib/otf/ChangeLog b/ompi/contrib/vt/vt/extlib/otf/ChangeLog index 408332e5d9..1b369c59e9 100644 --- a/ompi/contrib/vt/vt/extlib/otf/ChangeLog +++ b/ompi/contrib/vt/vt/extlib/otf/ChangeLog @@ -1,3 +1,43 @@ +1.10openmpi + - added process substitute record + - added process group attribute 'OTF_ATTR_IsCommunicator' + - renamed OTF_fprintf to OTF_Error which sets the error variables + otf_errno and otf_strerr + - added OTF_Warning which prints warning messages if verbosity + (--with-verbose) is enabled + - fixed detection of C compiler flags for compiling the python interface + - corrected return value of OTF_WStream_write* functions + - fixed parsing of process[group] attributes record + - otfmerge-mpi: + - build sequential version; replaces the old otfmerge + - otfprofile-mpi: + - added process clustering based on feature vectors from every + process trace + (see docu/tools/otfprofile_clustering.pdf for more details) + - added CSV output + - changeable number of process groups for LaTeX output + - build sequential version; replaces the old otfprofile + - fixed problem with pgfplots 1.5 + - improved y axis labeling for message length charts + - otfshrink: + - added mapping mode where all but one members per group are + removed with multiple groups + - reworked parameter parsing, - ranges no longer + supported + - fixed creation of symbolic links when non-contiguous + stream IDs are used + - disable 'parent' in a remaining process definition in case + the parent is removed + +1.9.2sawfish + - otfprofile-mpi: + - ignore "backward-running" P2P-messages when calculating + durations + - removed '0' in message length charts and added '1' as + extra label + - added thousands separator for large integer numbers in + latex output + 1.9.1sawfish - improved zlib compression - added progress display to otfprofile-mpi diff --git a/ompi/contrib/vt/vt/extlib/otf/VERSION b/ompi/contrib/vt/vt/extlib/otf/VERSION index 051ec43ece..e9cd762679 100644 --- a/ompi/contrib/vt/vt/extlib/otf/VERSION +++ b/ompi/contrib/vt/vt/extlib/otf/VERSION @@ -6,8 +6,8 @@ # ... If sub is zero, then it is omitted. major=1 -minor=9 -sub=1 +minor=10 +sub=0 # string is used for alpha, beta, or release tags. If it is non-empty, it will # be appended to the version number. @@ -23,6 +23,7 @@ sub=1 # 1.7.* catfish # 1.8.* sturgeon # 1.9.* sawfish +# 1.a.* coelacanth # string=openmpi @@ -47,5 +48,5 @@ string=openmpi # release, age must be incremented. Otherwise, reset age # to '0'. -library=3:1:2 +library=4:0:3 diff --git a/ompi/contrib/vt/vt/extlib/otf/config/m4/acinclude.swig_python.m4 b/ompi/contrib/vt/vt/extlib/otf/config/m4/acinclude.swig_python.m4 index 1e79417f70..d6742ecab1 100644 --- a/ompi/contrib/vt/vt/extlib/otf/config/m4/acinclude.swig_python.m4 +++ b/ompi/contrib/vt/vt/extlib/otf/config/m4/acinclude.swig_python.m4 @@ -34,7 +34,10 @@ canonical name.]) if test x"$python_config" != x; then AC_MSG_CHECKING([for python compiler flags]) - SWIG_PYTHON_CFLAGS=`$python_config --cflags` + # cannot use '--cflags' here; the resulting flags might contain GNU specific flags + # which are not compatible with other compilers; use '--includes' instead of '--cflags' + #SWIG_PYTHON_CFLAGS=`$python_config --cflags + SWIG_PYTHON_CFLAGS=`$python_config --includes` AC_MSG_RESULT([$SWIG_PYTHON_CFLAGS]) fi fi diff --git a/ompi/contrib/vt/vt/extlib/otf/configure.in b/ompi/contrib/vt/vt/extlib/otf/configure.in index cfafe463e3..836e8d08b0 100644 --- a/ompi/contrib/vt/vt/extlib/otf/configure.in +++ b/ompi/contrib/vt/vt/extlib/otf/configure.in @@ -71,7 +71,7 @@ CHECK_SWIG_PYTHON if test x"$force_swig_python" = "xyes" -a x"$swig_python_error" = "xyes"; then exit 1; fi AM_CONDITIONAL(AMHAVESWIGPYTHON, test x"$have_swig_python" = xyes) -# Checks for pdflatex and PGFPLOTS needed for otfprofile-mpi to convert TeX output to PDF +# Checks for pdflatex and PGFPLOTS needed for otfprofile to convert TeX output to PDF CHECK_PDFLATEX_PGFPLOTS @@ -134,9 +134,9 @@ AC_CONFIG_FILES([Makefile tools/otfdump/Makefile tools/otfinfo/Makefile tools/otfmerge/Makefile - tools/otfmerge-mpi/Makefile + tools/otfmerge/mpi/Makefile tools/otfprofile/Makefile - tools/otfprofile-mpi/Makefile + tools/otfprofile/mpi/Makefile tools/otfshrink/Makefile tools/otf2vtf/Makefile tools/vtf2otf/Makefile @@ -146,6 +146,7 @@ AC_CONFIG_FILES([Makefile tests/generic_streams-mpi/Makefile tests/progress/Makefile tests/read_from_buffer/Makefile + tests/thumbnail/Makefile docu/Makefile ]) diff --git a/ompi/contrib/vt/vt/extlib/otf/docu/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/docu/Makefile.am index 65a68e9570..85ddce70a9 100644 --- a/ompi/contrib/vt/vt/extlib/otf/docu/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/docu/Makefile.am @@ -3,7 +3,8 @@ otfdoc_DATA = \ $(top_srcdir)/LICENSE \ api/specification.pdf \ tools/otftools.pdf \ - tools/otfprofile.pdf + tools/otfprofile.pdf \ + tools/otfprofile_clustering.pdf otfdocdir = $(docdir) diff --git a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf index c51c8cf290..33b553a281 100644 Binary files a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf and b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf differ diff --git a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile_clustering.pdf b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile_clustering.pdf new file mode 100644 index 0000000000..27de65f320 Binary files /dev/null and b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile_clustering.pdf differ diff --git a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf index 477b242f25..bc8076b96f 100644 Binary files a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf and b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf differ diff --git a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/Makefile.am index a033fe4ec2..1853bc2ae7 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/Makefile.am @@ -1,5 +1,6 @@ include_HEADERS = otfaux.h \ - OTFAUX_MsgMatching.h + OTFAUX_MsgMatching.h \ + OTFAUX_Thumbnail.h noinst_HEADERS = @@ -14,6 +15,7 @@ libotfaux_la_CFLAGS = -prefer-pic #libotfaux_la_LDFLAGS = -version-info @OTF_VERSION_LIBRARY@ libotfaux_la_LIBADD = $(top_builddir)/vendor/jenkins_hash/libjenkins_hash.la libotfaux_la_SOURCES = \ - OTFAUX_MsgMatching.c + OTFAUX_MsgMatching.c \ + OTFAUX_Thumbnail.c EXTRA_DIST = diff --git a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.c b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.c new file mode 100644 index 0000000000..49235fb1b1 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.c @@ -0,0 +1,574 @@ +#include + +#include +#include +#include + +#include + +/* for OTF_Error */ +#include + +#include + +#include "otfaux.h" + +#define FUNCTION_STACK_INCREMENT 16 + +typedef struct OTFAUX_Thumbail_Process { + /** next in hash chain */ + struct OTFAUX_Thumbail_Process* next; + + /** the id of this process */ + uint64_t token; + + /** current function stack */ + uint32_t* function_stack; + uint32_t stack_pos; + uint32_t stack_size; + + uint32_t pos, alloc; + uint32_t* start_pixel; + uint32_t* function; +} OTFAUX_Thumbail_Process; + +#define PROCESSES_HASH_SHIFT 10 +#define PROCESSES_HASH_SIZE (1 << PROCESSES_HASH_SHIFT) +#define PROCESSES_HASH_MASK (PROCESSES_HASH_SIZE - 1) + +struct OTFAUX_Thumbnail_Context { + /** The processes */ + OTFAUX_Thumbail_Process* processes[ PROCESSES_HASH_SIZE ]; + + /* timestamps */ + uint32_t* timestamps; +}; + +OTFAUX_Thumbnail_Context* +OTFAUX_Thumbnail_create( uint64_t minTime, + uint64_t maxTime, + uint32_t width ) +{ + OTFAUX_Thumbnail_Context* new_context = calloc( 1, sizeof( *new_context ) ); + + /* TODO: callculate sample time stamps */ + + return new_context; +} + +void +OTFAUX_Thumbnail_destroy( OTFAUX_Thumbnail_Context* tn_context ) +{ + int i; + for ( i = 0; i < PROCESSES_HASH_SIZE; i++ ) { + while ( tn_context->processes[ i ] ) { + OTFAUX_Thumbail_Process* next = tn_context->processes[ i ]->next; + free( tn_context->processes[ i ]->function_stack ); + free( tn_context->processes[ i ] ); + tn_context->processes[ i ] = next; + } + } + + free( tn_context ); +} + +static OTFAUX_Thumbail_Process* +get_process( OTFAUX_Thumbnail_Context* tn_context, + uint64_t process_token, + int create ) +{ + uint32_t process_hash = hash( &process_token, sizeof( process_token ), 0 ); + OTFAUX_Thumbail_Process** process_bucket = &tn_context->processes[ process_hash & PROCESSES_HASH_MASK ]; + OTFAUX_Thumbail_Process* process = *process_bucket; + + /* search in hash chain */ + while ( process ) { + if ( process->token == process_token ) { + /* found, is this an error? */ + return process; + } + + process = process->next; + } + + if ( !create ) { + return process; + } + + /* create new process */ + process = calloc( 1, sizeof( *process ) ); + if ( !process ) { + return NULL; + } + + process->token = process_token; + process->function_stack = calloc( FUNCTION_STACK_INCREMENT, + sizeof( *process->function_stack ) ); + if ( !process->function_stack ) { + free( process ); + return NULL; + } + process->stack_size = FUNCTION_STACK_INCREMENT; + + /* TODO: init arrays */ + + /* chain into hash table */ + process->next = *process_bucket; + *process_bucket = process; + + return process; +} + +void +OTFAUX_Thumbnail_declareProcess( OTFAUX_Thumbnail_Context* tn_context, + uint64_t process_token ) +{ + OTFAUX_Thumbail_Process* process = get_process( tn_context, + process_token, + 1 ); + + if ( !process ) { + return; + } + + /* TODO: enter the invalid */ +} + +void +OTFAUX_Thumbnail_handleEnter( OTFAUX_Thumbnail_Context* tn_context, + uint64_t timestamp, + uint64_t process_token, + uint32_t function_token ) +{ + OTFAUX_Thumbail_Process* process; + + process = get_process( tn_context, process_token, 0 ); + + if ( !process ) { + return; + } + + /* need to increase stack size? */ + if ( process->stack_pos == process->stack_size ) { + uint32_t new_stack_size = process->stack_size + FUNCTION_STACK_INCREMENT; + uint32_t* new_function_stack = realloc( process->function_stack, + new_stack_size * sizeof( *process->function_stack ) ); + if ( !new_function_stack ) { + return; + } + process->function_stack = new_function_stack; + process->stack_size = new_stack_size; + } + + process->function_stack[ process->stack_pos++ ] = function_token; + + /* TODO: check for pipxel */ +} + +void +OTFAUX_Thumbnail_handleLeave( OTFAUX_Thumbnail_Context* tn_context, + uint64_t timestamp, + uint64_t process_token ) +{ + OTFAUX_Thumbail_Process* process; + + process = get_process( tn_context, process_token, 0 ); + + if ( !process || process->stack_pos == 0 ) { + return; + } + + /* pop from function stack */ + process->stack_pos--; + + /* TODO: check for pipxel */ +} + +uint32_t +OTFAUX_Thumbnail_getSize( OTFAUX_Thumbnail_Context* tn_context, + uint64_t process_token ) +{ + OTFAUX_Thumbail_Process* process; + + process = get_process( tn_context, process_token, 0 ); + + if ( !process ) { + return 0; + } + + /* TODO */ + return 0; +} + +int +OTFAUX_Thumbnail_getData( OTFAUX_Thumbnail_Context* tn_context, + uint64_t process_token, + OTFAUX_Thumbnail_Data* data ) +{ + OTFAUX_Thumbail_Process* process; + + process = get_process( tn_context, process_token, 0 ); + + if ( !process ) { + return 0; + } + + /* TODO */ + return 0; +} + +char* +OTFAUX_Thumbnail_getFilename( const char* namestub, + size_t length, + char* name_buffer ) +{ + if ( !namestub ) { + return NULL; + } + + if ( ( NULL == name_buffer ) || ( 0 == length ) ) { + length = strlen( namestub ) + strlen( ".thumb" ) + 1; + name_buffer = (char*)malloc( length * sizeof( char ) ); + } + + strcpy( name_buffer, namestub ); + strcat( name_buffer, ".thumb" ); + + return name_buffer; +} + +struct OTFAUX_ThumbnailWriter { + char* namestub; + OTF_FileManager* manager; + + uint32_t height, width; + + OTF_WBuffer* buffer; +}; + + +OTFAUX_ThumbnailWriter* +OTFAUX_ThumbnailWriter_create( const char* filename, + uint32_t height, + uint32_t width, + OTF_FileManager* manager ) +{ + OTFAUX_ThumbnailWriter* new_writer; + + if ( !filename || !manager ) { + return NULL; + } + + new_writer = calloc( 1, sizeof( *new_writer) ); + if ( !new_writer ) { + return NULL; + } + + new_writer->namestub = OTF_stripFilename( filename ); + if ( !new_writer->namestub ) { + free( new_writer ); + return NULL; + } + + new_writer->height = height; + new_writer->width = width; + new_writer->manager = manager; + + return new_writer; +} + +int +OTFAUX_ThumbnailWriter_destroy( OTFAUX_ThumbnailWriter* tn_writer ) +{ + int ret; + + if ( !tn_writer ) { + return 0; + } + + ret = OTFAUX_ThumbnailWriter_close( tn_writer ); + + free( tn_writer->namestub ); + free( tn_writer ); + + return ret; +} + +int +OTFAUX_ThumbnailWriter_close( OTFAUX_ThumbnailWriter* tn_writer ) +{ + if ( !tn_writer ) { + return 0; + } + + if ( tn_writer->buffer ) { + OTF_WBuffer_close( tn_writer->buffer ); + } + tn_writer->buffer = NULL; + + return 1; +} + +int +OTFAUX_ThumbnailWriter_writeProcess( OTFAUX_ThumbnailWriter* tn_writer, + uint64_t process, + OTFAUX_Thumbnail_Data* data ) +{ + size_t i; + char sep = ':'; + + if ( !tn_writer || !data ) { + return 0; + } + + if ( !tn_writer->buffer ) { + char* filename = OTFAUX_Thumbnail_getFilename( tn_writer->namestub, + 0, NULL ); + + if ( !filename ) { + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "OTFAUX_Thumbnail_getFilename() failed.\n", + __FUNCTION__, __FILE__, __LINE__ ); + + return 0; + } + + tn_writer->buffer = OTF_WBuffer_open( filename, tn_writer->manager ); + if ( !tn_writer->buffer ) { + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "OTF_WBuffer_open( %s ) failed.\n", + __FUNCTION__, __FILE__, __LINE__, filename ); + + free( filename ); + + return 0; + } + + OTF_WBuffer_setSize( tn_writer->buffer, tn_writer->width * 16 ); + + free( filename ); + + /* write header */ + OTF_WBuffer_writeUint32( tn_writer->buffer, 0 ); + OTF_WBuffer_writeChar( tn_writer->buffer, ':' ); + OTF_WBuffer_writeUint32( tn_writer->buffer, tn_writer->height ); + OTF_WBuffer_writeChar( tn_writer->buffer, ',' ); + OTF_WBuffer_writeUint32( tn_writer->buffer, tn_writer->width ); + OTF_WBuffer_writeNewline( tn_writer->buffer ); + } + + OTF_WBuffer_writeUint64( tn_writer->buffer, process ); + + for ( i = 0; i < data->size; i++ ) { + OTF_WBuffer_writeChar( tn_writer->buffer, sep ); + sep = ';'; + + OTF_WBuffer_writeUint32( tn_writer->buffer, data->start_pixel[ i ] ); + OTF_WBuffer_writeChar( tn_writer->buffer, ',' ); + OTF_WBuffer_writeUint32( tn_writer->buffer, data->function[ i ] ); + } + + OTF_WBuffer_writeNewline( tn_writer->buffer ); + + return 1; +} + +struct OTFAUX_ThumbnailReader +{ + char* namestub; + OTF_FileManager* manager; + + uint32_t height, width; + + OTF_RBuffer* buffer; +}; + +OTFAUX_ThumbnailReader* +OTFAUX_ThumbnailReader_create( const char* filename, + OTF_FileManager* manager ) +{ + OTFAUX_ThumbnailReader* new_reader; + + if ( !filename || !manager ) { + return NULL; + } + + new_reader = calloc( 1, sizeof( *new_reader) ); + if ( !new_reader ) { + return NULL; + } + + new_reader->namestub = OTF_stripFilename( filename ); + if ( !new_reader->namestub ) { + free( new_reader ); + return NULL; + } + + new_reader->manager = manager; + + return new_reader; +} + +int +OTFAUX_ThumbnailReader_destroy( OTFAUX_ThumbnailReader* tn_reader ) +{ + int ret; + + if ( !tn_reader ) { + return 0; + } + + ret = OTFAUX_ThumbnailReader_close( tn_reader ); + + free( tn_reader->namestub ); + free( tn_reader ); + + return ret; +} + +int +OTFAUX_ThumbnailReader_close( OTFAUX_ThumbnailReader* tn_reader ) +{ + if ( !tn_reader ) { + return 0; + } + + if ( tn_reader->buffer ) { + OTF_RBuffer_close( tn_reader->buffer ); + } + tn_reader->buffer = NULL; + + return 1; +} + +int +OTFAUX_ThumbnailReader_getDimension( OTFAUX_ThumbnailReader* tn_reader, + uint32_t* height, + uint32_t* width ) +{ + if ( !tn_reader ) { + return 0; + } + + if ( !tn_reader->buffer ) { + uint32_t val; + + char* filename = OTFAUX_Thumbnail_getFilename( tn_reader->namestub, + 0, NULL ); + if ( !filename ) { + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "OTF_getFilename() failed.\n", + __FUNCTION__, __FILE__, __LINE__ ); + + return 0; + } + + tn_reader->buffer = OTF_RBuffer_open( filename, tn_reader->manager ); + if ( !tn_reader->buffer ) { + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "OTF_RBuffer_open( %s ) failed.\n", + __FUNCTION__, __FILE__, __LINE__, filename ); + + free( filename ); + + return 0; + } + + OTF_RBuffer_setSize( tn_reader->buffer, 1024 ); + + free( filename ); + + /* read header */ + + if ( !OTF_RBuffer_guaranteeRecord( tn_reader->buffer ) ) { + OTF_RBuffer_close( tn_reader->buffer ); + tn_reader->buffer = NULL; + return 0; + } + + val = OTF_RBuffer_readUint32( tn_reader->buffer ); + if ( val != 0 || !OTF_RBuffer_testChar( tn_reader->buffer, ':' ) ) { + OTF_RBuffer_close( tn_reader->buffer ); + tn_reader->buffer = NULL; + return 0; + } + + tn_reader->height = OTF_RBuffer_readUint32( tn_reader->buffer ); + + if ( !OTF_RBuffer_testChar( tn_reader->buffer, ',' ) ) { + OTF_RBuffer_close( tn_reader->buffer ); + tn_reader->buffer = NULL; + return 0; + } + + tn_reader->width = OTF_RBuffer_readUint32( tn_reader->buffer ); + + OTF_RBuffer_readNewline( tn_reader->buffer ); + } + + if ( height ) { + *height = tn_reader->height; + } + + if ( width ) { + *width = tn_reader->width; + } + + return 1; +} + +int +OTFAUX_ThumbnailReader_read( OTFAUX_ThumbnailReader* tn_reader, + void ( *handler )( void*, + uint64_t, + uint32_t, + uint32_t ), + void* data ) +{ + uint64_t process; + uint32_t start_pixel, function; + + if ( !tn_reader ) { + return 0; + } + + if ( !tn_reader->buffer ) { + int ret; + + ret = OTFAUX_ThumbnailReader_getDimension( tn_reader, NULL, NULL ); + if ( !ret ) { + return ret; + } + } + + while ( OTF_RBuffer_guaranteeRecord( tn_reader->buffer ) ) { + /* read process */ + process = OTF_RBuffer_readUint64( tn_reader->buffer ); + + if ( !OTF_RBuffer_testChar( tn_reader->buffer, ':' ) ) { + OTF_RBuffer_readNewline( tn_reader->buffer ); + continue; + } + + do { + + start_pixel = OTF_RBuffer_readUint32( tn_reader->buffer ); + + if ( !OTF_RBuffer_testChar( tn_reader->buffer, ',' ) ) { + OTF_RBuffer_readNewline( tn_reader->buffer ); + break; + } + + function = OTF_RBuffer_readUint32( tn_reader->buffer ); + + if ( handler ) { + handler( data, process, start_pixel, function ); + } + + } while ( OTF_RBuffer_testChar( tn_reader->buffer, ';' ) ); + + OTF_RBuffer_readNewline( tn_reader->buffer ); + } + + return 1; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.h b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.h new file mode 100644 index 0000000000..fa382ab8aa --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/OTFAUX_Thumbnail.h @@ -0,0 +1,225 @@ +#ifndef OTFAUX_THUMBNAIL_H +#define OTFAUX_THUMBNAIL_H + +#include + +/** + * @file otfauxlib/OTFAUX_Thumbnail.h + * + * @brief Provides a module to collect data for thumbnail generation. + */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @defgroup thumbnail Module for thumbnail generation. + * + * @usage: + * + * ctx = OTFAUX_Thumbnail_Create(minTime, maxTime, 1024); + * + * announce all interesting processes: + * OTFAUX_Thumbnail_declareProcess(ctx, ...); + * + * repeatedly call for interesting processes: + * OTFAUX_Thumbnail_handleEnter(ctx, ...); + * OTFAUX_Thumbnail_handleLeave(ctx, ...); + * + * at end, for all processes: + * OTFAUX_ThumbnailData td; + * OTFAUX_Thumbnail_getData(ctx, process, &td); + * .. do something with td.start_pixel and td.function .. + * + * OTFAUX_Thumbnail_Destroy(ctx); + * + * @{ + */ + +/** Opaque type for using the thumbnail module. */ +typedef struct OTFAUX_Thumbnail_Context OTFAUX_Thumbnail_Context; + +/** + * Create a context for thumbnail generation. + * + * @param minTime Minimum timestamp of the trace file. + * @param maxTime Maximum timestamp of the trace file. + * @param width The width in pixels of the thumbnail. + * + * @return The context. + */ +OTFAUX_Thumbnail_Context* +OTFAUX_Thumbnail_create( uint64_t minTime, + uint64_t maxTime, + uint32_t width ); + +/** + * Destroy a context previously created with @a OTFAUX_Thumbnail_Create. + * + * @param tn_context The context. + */ +void +OTFAUX_Thumbnail_destroy( OTFAUX_Thumbnail_Context* tn_context ); + +/** + * Declares that the process @a process should be handled by this context. + * + * @param tn_context The context. + */ +void +OTFAUX_Thumbnail_declareProcess( OTFAUX_Thumbnail_Context* tn_context, + uint64_t process ); + +/** + * Declare that the process @a process has entered the fucntion @a function + * at timestamp @a timestamp. + * + * This function needs to be called in monotonically increasing timestamp order. + * + * @param tn_context The context. + * @param timestamp The timestamp. + * @param process The process. + * @param function The function. + */ +void +OTFAUX_Thumbnail_handleEnter( OTFAUX_Thumbnail_Context* tn_context, + uint64_t timestamp, + uint64_t process, + uint32_t function ); + +/** + * Declare that the process @a process has left the current fucntion at + * timestamp @a timestamp. + * + * This function needs to be called in monotonically increasing timestamp order. + * + * @param tn_context The context. + * @param timestamp The timestamp. + * @param process The process. + */ +void +OTFAUX_Thumbnail_handleLeave( OTFAUX_Thumbnail_Context* tn_context, + uint64_t timestamp, + uint64_t process ); + +/** + * Get the number of entries for the process @a process. + * + * @param tn_context The context. + * + * @param The size. + */ +uint32_t +OTFAUX_Thumbnail_getSize( OTFAUX_Thumbnail_Context* context, + uint64_t process ); + +typedef struct { + uint32_t* start_pixel; + uint32_t* function; + uint32_t size; +} OTFAUX_Thumbnail_Data; + +/** + * Get the collected thumbnail data for process @a process. + * + * @param tn_context The context. + * @param process The process. + * @param data Pointer to storage where the data will be stored into. + * @param size Pointer to storage where the size will be stored into. + * + * @param 1 on success. + */ +int +OTFAUX_Thumbnail_getData( OTFAUX_Thumbnail_Context* context, + uint64_t process, + OTFAUX_Thumbnail_Data* data ); + +/** + * @} + */ + +char* +OTFAUX_Thumbnail_getFilename( const char* namestub, + size_t length, + char* name_buffer ); + +/** + * @defgroup thumbnailwriter Module to write a thumbnail. + * + * @usage: + * + * writer = OTFAUX_ThumbnailWriter_create("foo.otf", 512, 1024, ...); + * + * for each process: + * OTFAUX_ThumbnailData td; + * OTFAUX_Thumbnail_getData( ctx, process, &td ); + * OTFAUX_ThumbnailWriter_writeProcess( writer, process, &td ); + * + * OTFAUX_ThumbnailWriter_destroy( writer ); + */ + +typedef struct OTFAUX_ThumbnailWriter OTFAUX_ThumbnailWriter; + +OTFAUX_ThumbnailWriter* +OTFAUX_ThumbnailWriter_create( const char* filename, + uint32_t height, + uint32_t width, + OTF_FileManager* manager ); + +int +OTFAUX_ThumbnailWriter_destroy( OTFAUX_ThumbnailWriter* tn_writer ); + +int +OTFAUX_ThumbnailWriter_close( OTFAUX_ThumbnailWriter* tn_writer ); + +int +OTFAUX_ThumbnailWriter_writeProcess( OTFAUX_ThumbnailWriter* tn_writer, + uint64_t process, + OTFAUX_Thumbnail_Data* data ); + +/** + * @} + */ + + +/** + * @defgroup thumbnailreader Module to read a thumbnail. + */ + +typedef struct OTFAUX_ThumbnailReader OTFAUX_ThumbnailReader; + +OTFAUX_ThumbnailReader* +OTFAUX_ThumbnailReader_create( const char* filename, + OTF_FileManager* manager ); + +int +OTFAUX_ThumbnailReader_destroy( OTFAUX_ThumbnailReader* tn_reader ); + +int +OTFAUX_ThumbnailReader_close( OTFAUX_ThumbnailReader* tn_reader ); + +int +OTFAUX_ThumbnailReader_getDimension( OTFAUX_ThumbnailReader* tn_reader, + uint32_t* height, + uint32_t* width ); + +int +OTFAUX_ThumbnailReader_read( OTFAUX_ThumbnailReader* tn_reader, + void ( *process_handler )( void*, + uint64_t, + uint32_t, + uint32_t ), + void* data ); + + +/** + * @} + */ + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* OTFAUX_THUMBNAIL_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/otfaux.h b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/otfaux.h index 51aa5deee3..75586cb22c 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otfauxlib/otfaux.h +++ b/ompi/contrib/vt/vt/extlib/otf/otfauxlib/otfaux.h @@ -9,5 +9,6 @@ */ #include +#include #endif /* OTFAUX_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.c index e46e03719b..ac0a4da7ef 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.c @@ -175,34 +175,32 @@ int OTF_CopyHandler_DefKeyValue( void* userData, uint32_t stream, uint32_t key, type, name, description, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; } -int OTF_CopyHandler_DefTimeRange( void* userData, - uint32_t stream, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* list ) { +int OTF_CopyHandler_DefTimeRange( void* userData, uint32_t stream, + uint64_t minTime, uint64_t maxTime, OTF_KeyValueList* list ) { - return ( 0 == OTF_Writer_writeDefTimeRange( (OTF_Writer*)userData, - stream, - minTime, - maxTime, - list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return ( 0 == OTF_Writer_writeDefTimeRange( (OTF_Writer*)userData, + stream, minTime, maxTime, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; } -int OTF_CopyHandler_DefCounterAssignments( void* userData, - uint32_t stream, - uint32_t counter, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* list ) { +int OTF_CopyHandler_DefCounterAssignments( void* userData, uint32_t stream, + uint32_t counter, uint32_t number_of_members, const uint32_t* procs_or_groups, + OTF_KeyValueList* list ) { - return ( 0 == OTF_Writer_writeDefCounterAssignments( (OTF_Writer*)userData, - stream, - counter, - number_of_members, - procs_or_groups, - list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return ( 0 == OTF_Writer_writeDefCounterAssignments( (OTF_Writer*)userData, + stream, counter, number_of_members, procs_or_groups, + list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; +} + +int OTF_CopyHandler_DefProcessSubstitutes( void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* list ) { + + + return ( 0 == OTF_Writer_writeDefProcessSubstitutes( (OTF_Writer*)userData, + stream, representative, numberOfProcs, procs, + list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; } /* *** Event handlers *** ****************************************** */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.h index 5e1679ded0..a0efa05ff0 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler.h @@ -81,18 +81,16 @@ int OTF_CopyHandler_DefFileGroup( void* userData, uint32_t stream, int OTF_CopyHandler_DefKeyValue( void* userData, uint32_t stream, uint32_t key, OTF_Type type, const char* name, const char* description, OTF_KeyValueList* list ); -int OTF_CopyHandler_DefTimeRange( void* userData, - uint32_t stream, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* list ); +int OTF_CopyHandler_DefTimeRange( void* userData, uint32_t stream, + uint64_t minTime, uint64_t maxTime, OTF_KeyValueList* list ); -int OTF_CopyHandler_DefCounterAssignments( void* userData, - uint32_t stream, - uint32_t counter, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* list ); +int OTF_CopyHandler_DefCounterAssignments( void* userData, uint32_t stream, + uint32_t counter, uint32_t number_of_members, + const uint32_t* procs_or_groups, OTF_KeyValueList* list ); + +int OTF_CopyHandler_DefProcessSubstitutes( void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* list ); int OTF_CopyHandler_NoOp( void* userData, uint64_t time, uint32_t process, OTF_KeyValueList* list ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.c index b054a23bc8..22f6e0db6d 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.c @@ -175,32 +175,32 @@ int OTF_CopyHandler_stream_DefKeyValue( void* userData, uint32_t stream, uint32_ type, name, description, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; } -int OTF_CopyHandler_stream_DefTimeRange( void* userData, - uint32_t stream, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* list ) { +int OTF_CopyHandler_stream_DefTimeRange( void* userData, uint32_t stream, + uint64_t minTime, uint64_t maxTime, OTF_KeyValueList* list ) { - return ( 0 == OTF_WStream_writeDefTimeRange( (OTF_WStream*)userData, - minTime, - maxTime, - list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return ( 0 == OTF_WStream_writeDefTimeRange( (OTF_WStream*)userData, + minTime, maxTime, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; } -int OTF_CopyHandler_stream_DefCounterAssignments( void* userData, - uint32_t stream, - uint32_t counter, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* list ) { +int OTF_CopyHandler_stream_DefCounterAssignments( void* userData, uint32_t stream, + uint32_t counter, uint32_t number_of_members, const uint32_t* procs_or_groups, + OTF_KeyValueList* list ) { - return ( 0 == OTF_WStream_writeDefCounterAssignments( (OTF_WStream*)userData, - counter, - number_of_members, - procs_or_groups, - list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return ( 0 == OTF_WStream_writeDefCounterAssignments( (OTF_WStream*)userData, + counter, number_of_members, procs_or_groups, + list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; +} + +int OTF_CopyHandler_stream_DefProcessSubstitutes( void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* list ) { + + + return ( 0 == OTF_WStream_writeDefProcessSubstitutes( (OTF_WStream*)userData, + representative, numberOfProcs, procs, + list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; } /* *** Event handlers *** ****************************************** */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.h index 6eb02701a3..3dc5d6103c 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_CopyHandler_stream.h @@ -81,18 +81,16 @@ int OTF_CopyHandler_stream_DefFileGroup( void* userData, uint32_t stream, int OTF_CopyHandler_stream_DefKeyValue( void* userData, uint32_t stream, uint32_t key, OTF_Type type, const char* name, const char* description, OTF_KeyValueList* list ); -int OTF_CopyHandler_stream_DefTimeRange( void* userData, - uint32_t stream, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* list ); +int OTF_CopyHandler_stream_DefTimeRange( void* userData, uint32_t stream, + uint64_t minTime, uint64_t maxTime, OTF_KeyValueList* list ); -int OTF_CopyHandler_stream_DefCounterAssignments( void* userData, - uint32_t stream, - uint32_t counter, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* list ); +int OTF_CopyHandler_stream_DefCounterAssignments( void* userData, uint32_t stream, + uint32_t counter, uint32_t number_of_members, const uint32_t* procs_or_groups, + OTF_KeyValueList* list ); + +int OTF_CopyHandler_stream_DefProcessSubstitutes( void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* list ); int OTF_CopyHandler_stream_NoOp( void* userData, uint64_t time, uint32_t process, OTF_KeyValueList* list ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Definitions.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Definitions.h index 27a0f6361d..2e6c5e88ed 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Definitions.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Definitions.h @@ -61,6 +61,7 @@ yet it breaks the link compatibility of library versions.*/ #define OTF_DEFPROCESSGROUP_RECORD 15 #define OTF_DEFATTRLIST_RECORD 55 #define OTF_DEFPROCESSORGROUPATTR_RECORD 56 +#define OTF_DEFPROCESSSUBSTITUTES_RECORD 63 #define OTF_DEFFUNCTION_RECORD 16 #define OTF_DEFFUNCTIONGROUP_RECORD 17 #define OTF_DEFCOUNTER_RECORD 18 @@ -102,7 +103,7 @@ yet it breaks the link compatibility of library versions.*/ #define OTF_MARKER_RECORD 46 /* Number of records */ -#define OTF_NRECORDS 63 +#define OTF_NRECORDS 64 /* Stream format definition */ @@ -193,8 +194,8 @@ could be added for convenience. /* File Operations - 32-bit -The bits 0-4 contain the identifier of the file operation that has happened. -The bits 5-31 are bit flags that carry additional information on the operation. +The bits 0-5 contain the identifier of the file operation that has happened. +The bits 6-31 are bit flags that carry additional information on the operation. A macro allows for accessing the file operation in a convenient way. */ #define OTF_FILEOP_BITS 0x0000001f @@ -251,7 +252,8 @@ typedef enum OTF_ATTR_TYPE_enum { OTF_ATTR_IsOMPThread = 3, /**< */ OTF_ATTR_IsCellSPUThread = 4, /**< */ OTF_ATTR_hasGroupCounters = 5, /**< */ - OTF_ATTR_hasEnterLeaveRecords = 6 /**< */ + OTF_ATTR_hasEnterLeaveRecords = 6, /**< */ + OTF_ATTR_IsCommunicator = 7 /**< */ } OTF_ATTR_TYPE; diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.c index 782f9e5591..a906c630f5 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.c @@ -1,36 +1,40 @@ #ifdef HAVE_CONFIG_H - #include "config.h" +#include "config.h" #endif + #include "OTF_Definitions.h" #include "OTF_Errno.h" +#include +#include + + char otf_strerr[OTF_ERR_LEN] = "No errors occurred."; int otf_errno = OTF_NO_ERROR; +void OTF_Error( const char* format, ... ) { + + va_list ap; + va_start( ap, format ); + + vsnprintf( otf_strerr, OTF_ERR_LEN, format, ap ); + otf_errno = OTF_ERROR; #ifdef OTF_VERBOSE + fprintf( stderr, "%s", otf_strerr ); +#endif /* OTF_VERBOSE */ - void OTF_fprintf( FILE* stream, const char* format, ... ) { - va_list ap; - va_start(ap, format); + va_end( ap ); +} - vsnprintf( otf_strerr, OTF_ERR_LEN, format, ap ); - otf_errno = OTF_ERROR; - fprintf( stream, "%s", otf_strerr ); +void OTF_Warning( const char* format, ... ) { - va_end(ap); - } + va_list ap; + va_start( ap, format ); -#else +#ifdef OTF_VERBOSE + vfprintf( stderr, format, ap ); +#endif /* OTF_VERBOSE */ - void OTF_fprintf( FILE* stream, const char* format, ... ) { - va_list ap; - va_start(ap, format); - - vsnprintf( otf_strerr, OTF_ERR_LEN, format, ap ); - otf_errno = OTF_ERROR; - - va_end(ap); - } - -#endif + va_end( ap ); +} diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.h index 2370e2af55..e1b7bb9971 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Errno.h @@ -32,19 +32,16 @@ #define OTF_ERRNO_H -#include -#include - - #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ /** @cond Errno.h */ -/** the following line is ignored by doxygen */ +/** the following lines is ignored by doxygen */ -void OTF_fprintf( FILE* stream, const char* format, ... ); +void OTF_Error( const char* format, ... ); +void OTF_Warning( const char* format, ... ); /** @endcond */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_File.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_File.c index 032c97d83c..21d0c7199b 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_File.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_File.c @@ -168,88 +168,90 @@ OTF_File* OTF_File_open( const char* filename, OTF_File* OTF_File_open_with_external_buffer( uint32_t len, const char* buffer, uint8_t is_compressed, OTF_FileMode mode ) { - OTF_File* ret; + OTF_File* ret; - ret= (OTF_File*) malloc( sizeof(OTF_File) ); - if( NULL == ret ) { + ret= (OTF_File*) malloc( sizeof(OTF_File) ); + if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "no memory left.\n", - __FUNCTION__, __FILE__, __LINE__ ); + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "no memory left.\n", + __FUNCTION__, __FILE__, __LINE__ ); - return NULL; - } + return NULL; + } - OTF_File_init( ret ); + OTF_File_init( ret ); - ret->externalbuffer= buffer; - ret->externalpos= 0; - ret->externallen= (uint64_t) len; + ret->externalbuffer= buffer; + ret->externalpos= 0; + ret->externallen= (uint64_t) len; - ret->mode = mode; + ret->mode = mode; - if ( is_compressed ) { + if ( is_compressed ) { #ifdef HAVE_ZLIB - /* alloc zlib stuff */ - ret->z= malloc( sizeof(z_stream) ); - if( NULL == ret->z ) { + /* alloc zlib stuff */ + ret->z= malloc( sizeof(z_stream) ); + if( NULL == ret->z ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "no memory left.\n", + __FUNCTION__, __FILE__, __LINE__ ); - free( ret ); - ret= NULL; - - return NULL; - } + free( ret ); + ret= NULL; - ret->z->next_in= NULL; - ret->z->avail_in= 0; - ret->z->zalloc= NULL; - ret->z->zfree= NULL; - ret->z->opaque= NULL; + return NULL; + } - inflateInit( ret->z ); + ret->z->next_in= NULL; + ret->z->avail_in= 0; + ret->z->zalloc= NULL; + ret->z->zfree= NULL; + ret->z->opaque= NULL; - ret->zbuffer= malloc( ret->zbuffersize ); - if( NULL == ret->zbuffer ) { + inflateInit( ret->z ); - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); + ret->zbuffer= malloc( ret->zbuffersize ); + if( NULL == ret->zbuffer ) { - free( ret->zbuffer ); - ret->zbuffer= NULL; - free( ret->z ); - ret->z= NULL; - free( ret ); - ret= NULL; + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "no memory left.\n", + __FUNCTION__, __FILE__, __LINE__ ); - return NULL; - } + free( ret->zbuffer ); + ret->zbuffer= NULL; + free( ret->z ); + ret->z= NULL; + free( ret ); + ret= NULL; + + return NULL; + } #else /* HAVE_ZLIB */ - free( ret ); - ret= NULL; + free( ret ); + ret= NULL; - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "built without HAVE_ZLIB, still trying to open with compressed buffer.\n", - __FUNCTION__, __FILE__, __LINE__ ); + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "built without HAVE_ZLIB, still trying to open with compressed buffer.\n", + __FUNCTION__, __FILE__, __LINE__ ); - return NULL; + return NULL; #endif /* HAVE_ZLIB */ - } else { + } else { - /* normal, don't need any special setup */ - } + /* normal, don't need any special setup */ + } - ret->manager= NULL; + ret->manager= NULL; - return ret; + return ret; } @@ -265,7 +267,7 @@ size_t OTF_File_write( OTF_File* file, const void* ptr, size_t size ) { if ( NULL != file->externalbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "not yet supported in 'external buffer' mode.\n", __FUNCTION__, __FILE__, __LINE__ ); return (size_t) -1; @@ -274,7 +276,7 @@ size_t OTF_File_write( OTF_File* file, const void* ptr, size_t size ) { if( OTF_FILEMODE_WRITE != file->mode ) { - OTF_fprintf ( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current file->mode is not OTF_FILEMODE_WRITE. writing forbidden.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -282,15 +284,9 @@ size_t OTF_File_write( OTF_File* file, const void* ptr, size_t size ) { } - /* - OTF_fprintf( stderr, "OTF_File_write: %u / %u file handles\n", - OTF_FileManager_getCount( file->manager ), - OTF_FileManager_getNumber( file->manager ) ); - */ - if( 0 == OTF_File_revive( file, OTF_FILEMODE_WRITE ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_revive() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -301,58 +297,53 @@ size_t OTF_File_write( OTF_File* file, const void* ptr, size_t size ) { if ( NULL != file->z ) { - /* compress the data without using the ybuffer */ - file->z->avail_in = size; - file->z->next_in = (void*)ptr; - - while (file->z->avail_in > 0) - { - status = deflate(file->z, Z_FULL_FLUSH); - if (status == Z_STREAM_ERROR) - { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "error in compressing, status %i.\n", - __FUNCTION__, __FILE__, __LINE__, status ); - return byteswritten; - } - - while (file->z->avail_out == 0) - { - size_t towrite = file->zbuffersize - file->z->avail_out; - if (towrite != fwrite(file->zbuffer, 1, towrite, file->file)) - { - OTF_fprintf(stderr, "ERROR in function %s, file: %s, line %i:\n", - "Failed to write %u bytes to file!\n", - __FUNCTION__, __FILE__, __LINE__, towrite); - return byteswritten; - } - file->z->avail_out = file->zbuffersize; - file->z->next_out = file->zbuffer; - status = deflate(file->z, Z_FULL_FLUSH); - if (status == Z_STREAM_ERROR) - { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "error in compressing, status %i.\n", - __FUNCTION__, __FILE__, __LINE__, status ); - assert(status != Z_STREAM_ERROR); - return byteswritten; - } - } - byteswritten = size - file->z->avail_in; - } + /* compress the data without using the ybuffer */ + file->z->avail_in = size; + file->z->next_in = (void*)ptr; + + while (file->z->avail_in > 0) { + + status = deflate(file->z, Z_FULL_FLUSH); + if (status == Z_STREAM_ERROR) { + + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "error in compressing, status %i.\n", + __FUNCTION__, __FILE__, __LINE__, status ); + return byteswritten; + } + + while (file->z->avail_out == 0) { + + size_t towrite = file->zbuffersize - file->z->avail_out; + if (towrite != fwrite(file->zbuffer, 1, towrite, file->file)) { + + OTF_Error( "ERROR in function %s, file: %s, line %i:\n", + "Failed to write %u bytes to file!\n", + __FUNCTION__, __FILE__, __LINE__, towrite); + return byteswritten; + } + file->z->avail_out = file->zbuffersize; + file->z->next_out = file->zbuffer; + status = deflate(file->z, Z_FULL_FLUSH); + if (status == Z_STREAM_ERROR) { + + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "error in compressing, status %i.\n", + __FUNCTION__, __FILE__, __LINE__, status ); + assert(status != Z_STREAM_ERROR); + return byteswritten; + } + } + byteswritten = size - file->z->avail_in; + } } else { #endif /* HAVE_ZLIB */ - /* - OTF_fprintf( stderr, "OTF_File_write(): buffer %p, size %u file %p\n", ptr, - (uint32_t) size, file->file ); - */ - byteswritten= fwrite( ptr, 1, size, file->file ); if( byteswritten < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "less bytes written than expected %u < %u.\n", __FUNCTION__, __FILE__, __LINE__, (uint32_t) byteswritten, (uint32_t) size ); @@ -379,7 +370,7 @@ size_t OTF_File_read( OTF_File* file, void* ptr, size_t size ) { if( OTF_FILEMODE_WRITE == file->mode ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current file->mode is OTF_FILEMODE_WRITE. reading forbidden.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -388,7 +379,7 @@ size_t OTF_File_read( OTF_File* file, void* ptr, size_t size ) { if( 0 == OTF_File_revive( file, OTF_FILEMODE_READ ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_revive() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -422,7 +413,7 @@ size_t OTF_File_read( OTF_File* file, void* ptr, size_t size ) { status = inflate( file->z, Z_SYNC_FLUSH ); if ( status != Z_OK ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "error in uncompressing, status %u.\n", __FUNCTION__, __FILE__, __LINE__, status ); @@ -464,7 +455,7 @@ int OTF_File_seek( OTF_File* file, uint64_t pos ) { if ( NULL != file->externalbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "not yet supported in 'external buffer' mode.\n", __FUNCTION__, __FILE__, __LINE__ ); return -1; @@ -473,7 +464,7 @@ int OTF_File_seek( OTF_File* file, uint64_t pos ) { if( OTF_FILEMODE_WRITE == file->mode ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current file->mode is OTF_FILEMODE_WRITE. seeking forbidden.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -483,7 +474,7 @@ int OTF_File_seek( OTF_File* file, uint64_t pos ) { if( 0 == OTF_File_revive( file, OTF_FILEMODE_SEEK ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_revive() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -499,28 +490,17 @@ int OTF_File_seek( OTF_File* file, uint64_t pos ) { do { - /* - OTF_fprintf( stderr, "OTF_File_seek() with zlib: jump to %llu\n", - (unsigned long long) pos ); - */ - /* OLD: read= fread( file->zbuffer, 1, file->zbuffersize, file->file ); */ read= OTF_File_read_internal( file, file->zbuffer, file->zbuffersize ); - - /* - OTF_fprintf( stderr, "OTF_File_seek() with zlib: read %llu bytes\n", - (unsigned long long) read ); - */ - file->z->next_in= file->zbuffer; file->z->avail_in= (uInt) read; file->z->total_in= 0; /* re-initialize z object */ - inflateReset(file->z); + inflateReset(file->z); /* do not sync at very beginning of compressed stream because it would skip the first block */ @@ -542,19 +522,14 @@ int OTF_File_seek( OTF_File* file, uint64_t pos ) { if ( Z_DATA_ERROR == sync ) { - /* do not break here, this might happen with larger zlib chunks */ - /*OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "Z_DATA_ERROR.\n", - __FUNCTION__, __FILE__, __LINE__ ); - + /* do not break here, this might happen with larger zlib chunks return -1; - */ - continue; + */ } if ( Z_STREAM_ERROR == sync ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "Z_STREAM_ERROR.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -575,7 +550,7 @@ uint64_t OTF_File_tell( OTF_File* file ) { if ( NULL != file->externalbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "not yet supported in 'external buffer' mode.\n", __FUNCTION__, __FILE__, __LINE__ ); return (uint64_t) -1; @@ -599,7 +574,7 @@ uint64_t OTF_File_size( OTF_File* file ) { if ( NULL != file->externalbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "not yet supported in 'external buffer' mode.\n", __FUNCTION__, __FILE__, __LINE__ ); return (uint64_t) -1; @@ -608,10 +583,10 @@ uint64_t OTF_File_size( OTF_File* file ) { if ( stat( file->filename, &st ) == -1 ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "stat() failed: %s\n", - __FUNCTION__, __FILE__, __LINE__, - strerror(errno) ); + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "stat() failed: %s\n", + __FUNCTION__, __FILE__, __LINE__, + strerror(errno) ); return 0; } else { @@ -633,7 +608,7 @@ int OTF_File_close( OTF_File* file ) { if ( NULL == file ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "file has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -651,47 +626,52 @@ int OTF_File_close( OTF_File* file ) { } else { - size_t towrite; - /* flush buffer */ - if( 0 == OTF_File_revive( file, OTF_FILEMODE_WRITE ) ) { + size_t towrite; - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "OTF_File_revive() failed.\n", - __FUNCTION__, __FILE__, __LINE__ ); - - return 0; - } - status = deflate(file->z, Z_FULL_FLUSH); - assert(status != Z_STREAM_ERROR); - towrite = file->zbuffersize - file->z->avail_out; - byteswritten = 0; - if (towrite > 0) - byteswritten = fwrite(file->zbuffer, 1, towrite, file->file); - if (towrite != byteswritten) - { - OTF_fprintf(stderr, "ERROR in function %s, file: %s, line: %i:\n" - "Failed to write compressed buffer of size %lu\n", - __FUNCTION__, __FILE__, __LINE__, towrite); - } - while (file->z->avail_out != file->zbuffersize) - { - file->z->avail_out = file->zbuffersize; - file->z->next_out = file->zbuffer; - deflate(file->z, Z_FULL_FLUSH); - assert(status != Z_STREAM_ERROR); - towrite = file->zbuffersize - file->z->avail_out; - if (towrite > 0) - fwrite(file->zbuffer, 1, towrite, file->file); - } - deflateEnd( file->z ); + /* flush buffer */ + if( 0 == OTF_File_revive( file, OTF_FILEMODE_WRITE ) ) { + + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "OTF_File_revive() failed.\n", + __FUNCTION__, __FILE__, __LINE__ ); + + return 0; + } + + status = deflate( file->z, Z_FULL_FLUSH ); + assert( status != Z_STREAM_ERROR ); + + towrite = file->zbuffersize - file->z->avail_out; + byteswritten = 0; + if (towrite > 0) + byteswritten = fwrite( file->zbuffer, 1, towrite, file->file ); + if (towrite != byteswritten) { + + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n" + "Failed to write compressed buffer of size %lu\n", + __FUNCTION__, __FILE__, __LINE__, towrite ); + } + + while (file->z->avail_out != file->zbuffersize) { + + file->z->avail_out = file->zbuffersize; + file->z->next_out = file->zbuffer; + deflate( file->z, Z_FULL_FLUSH ); + assert(status != Z_STREAM_ERROR); + + towrite = file->zbuffersize - file->z->avail_out; + if (towrite > 0) + fwrite( file->zbuffer, 1, towrite, file->file ); + } + deflateEnd( file->z ); } free( file->z ); - file->z = NULL; + file->z = NULL; free( file->zbuffer ); - file->zbuffer = NULL; + file->zbuffer = NULL; } - + #endif /* HAVE_ZLIB */ if ( NULL != file->file ) { @@ -715,9 +695,9 @@ OTF_FileStatus OTF_File_status( OTF_File* file ) { if ( NULL != file->externalbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "not yet supported in 'external buffer' mode.\n", - __FUNCTION__, __FILE__, __LINE__ ); + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "not yet supported in 'external buffer' mode.\n", + __FUNCTION__, __FILE__, __LINE__ ); return OTF_FILESTATUS_UNKNOWN; } @@ -743,9 +723,9 @@ void OTF_File_suspend( OTF_File* file ) { if ( NULL != file->externalbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "not yet supported in 'external buffer' mode.\n", - __FUNCTION__, __FILE__, __LINE__ ); + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "not yet supported in 'external buffer' mode.\n", + __FUNCTION__, __FILE__, __LINE__ ); return; } @@ -778,12 +758,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { /* file currently closed, aka open or reopen */ - /* - OTF_fprintf( stderr, "OTF_File_revive() READ: ask FileManager for free handle\n" ); - */ if ( 0 == OTF_FileManager_guaranteeFile( file->manager ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_guaranteeFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -816,7 +793,7 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { } else { /* show this error every time */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot open file %s for reading. Maybe the number of " "opened filehandles exceeds your system's limit\n", __FUNCTION__, __FILE__, __LINE__, file->filename ); @@ -835,7 +812,7 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { if( NULL == file->file ) { /* show this error every time */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot open file %s for reading. Maybe the number of " "opened filehandles exceeds your system's limit\n", __FUNCTION__, __FILE__, __LINE__, file->filename ); @@ -851,7 +828,7 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { if ( 0 == OTF_FileManager_registerFile( file->manager, file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_registerFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -861,12 +838,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { } else { /* file already opened */ - /* - OTF_fprintf( stderr, "OTF_File_revive() READ: update FileManagers LRU list\n" ); - */ if ( 0 == OTF_FileManager_touchFile( file->manager, file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_touchFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -884,12 +858,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { /* file currently closed */ - /* - OTF_fprintf( stderr, "OTF_File_revive() WRITE: ask FileManager for free handle\n" ); - */ if ( 0 == OTF_FileManager_guaranteeFile( file->manager ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_guaranteeFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -904,7 +875,7 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { if( NULL == file->file ) { /* show this error every time */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot open file %s for writing. Maybe the number of " "opened filehandles exceeds your system's limit\n", __FUNCTION__, __FILE__, __LINE__, file->filename ); @@ -920,7 +891,7 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { if( NULL == file->file ) { /* show this error every time */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot open file %s for writing. Maybe the number of " "opened filehandles exceeds your system's limit\n", __FUNCTION__, __FILE__, __LINE__, file->filename ); @@ -929,12 +900,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { } } - /* - OTF_fprintf( stderr, "OTF_File_revive() WRITE: register opened file with FileManager\n" ); - */ if ( 0 == OTF_FileManager_registerFile( file->manager, file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_registerFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -944,12 +912,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { } else { /* file already opened */ - /* - OTF_fprintf( stderr, "OTF_File_revive() WRITE: update FileManagers LRU list\n" ); - */ if ( 0 == OTF_FileManager_touchFile( file->manager, file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_touchFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -967,12 +932,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { /* file currently closed */ - /* - OTF_fprintf( stderr, "OTF_File_revive() READ: ask FileManager for free handle\n" ); - */ if ( 0 == OTF_FileManager_guaranteeFile( file->manager ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_guaranteeFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -987,7 +949,7 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { if( NULL == file->file ) { /* show this error every time */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot open file %s for reading. Maybe the number of " "opened filehandles exceeds your system's limit\n", __FUNCTION__, __FILE__, __LINE__, file->filename ); @@ -1009,7 +971,7 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { if( NULL == file->file ) { /* show this error every time */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot open file %s for reading. Maybe the number of " "opened filehandles exceeds your system's limit\n", __FUNCTION__, __FILE__, __LINE__, file->filename ); @@ -1018,12 +980,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { } } - /* - OTF_fprintf( stderr, "OTF_File_revive() SEEK: register opened file with FileManager\n" ); - */ if ( 0 == OTF_FileManager_registerFile( file->manager, file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_registerFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1033,12 +992,9 @@ int OTF_File_revive( OTF_File* file, OTF_FileMode mode ) { } else { /* file already opened */ - /* - OTF_fprintf( stderr, "OTF_File_revive() READ: update FileManagers LRU list\n" ); - */ if ( 0 == OTF_FileManager_touchFile( file->manager, file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_touchFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1067,7 +1023,7 @@ void OTF_File_setZBufferSize( OTF_File* file, uint32_t size ) { void *tmp; if ( 32 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended zbuffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -1075,13 +1031,13 @@ void OTF_File_setZBufferSize( OTF_File* file, uint32_t size ) { } else if ( 512 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "zbuffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "zbuffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -1094,7 +1050,7 @@ void OTF_File_setZBufferSize( OTF_File* file, uint32_t size ) { tmp = realloc( file->zbuffer, size ); if (tmp == NULL) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "No memory left to reallocate zlib buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); return; @@ -1121,7 +1077,7 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, /* Check input parameters */ if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no filename has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1129,19 +1085,17 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, } if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); return NULL; } - /* OTF_fprintf( stderr, "OTF_File_open_zlevel() zlevel: %u, filename: \"%s\"\n", zlevel, filename ); */ - ret= (OTF_File*) malloc( sizeof(OTF_File) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1154,7 +1108,7 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, ret->filename= malloc( len +3 ); if( NULL == ret->filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1178,10 +1132,6 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, strncpy( ret->filename +len, ".z", 3 ); - /* - OTF_fprintf( stderr, "try '%s'\n", ret->filename ); - */ - if ( 0 != access( ret->filename, F_OK ) ) { /* file still not found, give up */ @@ -1196,7 +1146,7 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, ret->z= malloc( sizeof(z_stream) ); if( NULL == ret->z ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1217,10 +1167,10 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, inflateInit( ret->z ); ret->zbuffer= malloc( ret->zbuffersize ); - + if( NULL == ret->zbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1241,16 +1191,16 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, #else /* HAVE_ZLIB */ if ( 0 != access( ret->filename, F_OK ) ) { - - strncpy( ret->filename +len, ".z", 3 ); - - if ( 0 == access( ret->filename, F_OK ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "cannot open %s. Zlib is not enabled.\n", - __FUNCTION__, __FILE__, __LINE__, ret->filename ); + strncpy( ret->filename +len, ".z", 3 ); - } + if ( 0 == access( ret->filename, F_OK ) ) { + + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "cannot open %s. Zlib is not enabled.\n", + __FUNCTION__, __FILE__, __LINE__, ret->filename ); + + } /* file still not found, give up */ free( ret->filename ); @@ -1275,7 +1225,7 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, ret->z= malloc( sizeof(z_stream) ); if( NULL == ret->z ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1298,7 +1248,7 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager, ret->zbuffer= malloc( ret->zbuffersize ); if( NULL == ret->zbuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1337,13 +1287,6 @@ size_t OTF_File_read_internal( OTF_File* file, void* dest, size_t length ) { actual_length= file->externallen - file->externalpos; actual_length= ( length <= actual_length ) ? length : actual_length; -/* - OTF_fprintf( stderr, "OTF_File_read_internal from external buffer: " - "addr %x, length %llu, actual_length %llu, pos %llu, bufflen %llu\n", - file->externalbuffer, - (uint64_t)length, (uint64_t)actual_length, (uint64_t) file->externalpos, (uint64_t) file->externallen ); -*/ - memcpy( dest, file->externalbuffer + file->externalpos, actual_length ); file->externalpos += actual_length; diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_FileManager.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_FileManager.c index 3acb7e2c8d..f8de64a1f7 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_FileManager.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_FileManager.c @@ -76,7 +76,7 @@ void OTF_FileManager_finalize( OTF_FileManager* manager ) { # ifdef OTF_DEBUG if ( 0 < manager->count ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "open file remaining.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -108,7 +108,7 @@ OTF_FileManager* OTF_FileManager_open( uint32_t number ) { OTF_FileManager* ret= (OTF_FileManager*) malloc( sizeof(OTF_FileManager) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -153,7 +153,7 @@ uint32_t OTF_FileManager_setNumber( OTF_FileManager* fh, uint32_t number ) { if ( 0 == number ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "illegal value 0 ignored.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -171,39 +171,23 @@ return 1 on success, 0 otherwise (which is not supposed to happen) */ int OTF_FileManager_guaranteeFile( OTF_FileManager* m ) { - /* - OTF_fprintf( stderr, "OTF_FileManager_guaranteeFile()\n" ); - */ - if ( m->count < m->number ) { /* free file handles available */ - /* - OTF_fprintf( stderr, " OTF_FileManager_guaranteeFile() free handles left\n" ); - */ - return 1; } - /* - OTF_fprintf( stderr, " OTF_FileManager_guaranteeFile() need new handles\n" ); - */ - /* suspend last entry in list */ if ( 0 == OTF_FileManager_suspendFile( m, m->list->file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_FileManager_suspendFile() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); return 0; } - /* - OTF_fprintf( stderr, "post suspend %u / %u\n", m->count, m->number ); - */ - return 1; } @@ -212,13 +196,9 @@ int OTF_FileManager_guaranteeFile( OTF_FileManager* m ) { int OTF_FileManager_registerFile( OTF_FileManager* m, OTF_File* file ) { - /* - OTF_fprintf( stderr, "OTF_FileManager_registerFile()\n" ); - */ - if ( OTF_FILESTATUS_ACTIVE != OTF_File_status( file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "file not open.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -227,9 +207,9 @@ int OTF_FileManager_registerFile( OTF_FileManager* m, OTF_File* file ) { if ( m->count >= m->number ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot register new file because limit %u exceeded, call " - "'OTF_FileManager_guaranteeFile()' before.\n", + "'OTF_FileManager_guaranteeFile()' before.\n", __FUNCTION__, __FILE__, __LINE__, m->number ); return 0; @@ -237,9 +217,6 @@ int OTF_FileManager_registerFile( OTF_FileManager* m, OTF_File* file ) { OTF_FileManager_listInsertAtHead( &(m->list), file ); - /* - OTF_fprintf( stderr, " c++ %u -> %u\n", m->count, m->count+1 ); - */ m->count++; return 1; @@ -252,10 +229,6 @@ return 1 on success or 0 for an suspended file. */ int OTF_FileManager_touchFile( OTF_FileManager* m, OTF_File* file ) { - /* - OTF_fprintf( stderr, "OTF_FileManager_touchFile()\n" ); - */ - if ( OTF_FILESTATUS_ACTIVE != OTF_File_status( file ) ) { return 0; @@ -275,15 +248,11 @@ internally. return 1 on success, 0 otherwise. */ int OTF_FileManager_suspendFile( OTF_FileManager* m, OTF_File* file ) { - /* - OTF_fprintf( stderr, "OTF_FileManager_suspendFile()\n" ); - */ - if ( OTF_FILESTATUS_ACTIVE != OTF_File_status( file ) ) { /* file not open, so cannot be suspended */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "file to be suspended is not open.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -292,16 +261,13 @@ int OTF_FileManager_suspendFile( OTF_FileManager* m, OTF_File* file ) { if ( 0 == OTF_FileManager_listUnlinkAtTail( &(m->list), file ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "could not unlink this entry.\n", __FUNCTION__, __FILE__, __LINE__ ); return 0; }; - /* - OTF_fprintf( stderr, " c-- %u -> %u\n", m->count, m->count-1 ); - */ m->count--; OTF_File_suspend( file ); @@ -316,7 +282,7 @@ int OTF_FileManager_listInsertAtHead( OTF_FileList** list, OTF_File* entry ) { OTF_FileList* newentry= (OTF_FileList*) malloc( sizeof(OTF_FileList) ); if( NULL == newentry ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -325,30 +291,15 @@ int OTF_FileManager_listInsertAtHead( OTF_FileList** list, OTF_File* entry ) { if ( NULL != (*list) ) { - /* - OTF_fprintf( stderr, "pre insert : %p --> %p (%p)\n", - *list, (*list)->next, (*list)->file ); - */ - newentry->file= entry; newentry->prev= (*list)->prev; newentry->next= (*list); - /* - OTF_fprintf( stderr, "new entry : %p --> %p (%p)\n", - newentry, newentry->next, newentry->file ); - */ - (*list)->prev->next= newentry; (*list)->prev= newentry; *list= newentry; - /* - OTF_fprintf( stderr, "post insert: %p --> %p (%p)\n", - *list, (*list)->next, (*list)->file ); - */ - } else { /* empty list */ @@ -360,13 +311,6 @@ int OTF_FileManager_listInsertAtHead( OTF_FileList** list, OTF_File* entry ) { *list= newentry; } - /* - OTF_fprintf( stderr, "after OTF_FileManager_listInsertAtHead():\n" ); - */ - /* - OTF_FileManager_listPrint( list ); - */ - return 0; } @@ -409,23 +353,10 @@ int OTF_FileManager_listUnlinkAtHead( OTF_FileList** list, OTF_File* file ) { free( pos ); pos = NULL; - /* - OTF_fprintf( stderr, "after OTF_FileManager_listUnlinkAtHead() %p found:\n", file ); - */ - /* - OTF_FileManager_listPrint( listHead, listTail ); - */ - return 1; } /* not found */ - /* - OTF_fprintf( stderr, "after OTF_FileManager_listUnlinkAtHead() %p not found:\n", file ); - */ - /* - OTF_FileManager_listPrint( listHead, listTail ); - */ return 0; } @@ -469,23 +400,10 @@ int OTF_FileManager_listUnlinkAtTail( OTF_FileList** list, OTF_File* file ) { free( pos ); pos = NULL; - /* - OTF_fprintf( stderr, "after OTF_FileManager_listUnlinkAtHead() %p found:\n", file ); - */ - /* - OTF_FileManager_listPrint( listHead, listTail ); - */ - return 1; } /* not found */ - /* - OTF_fprintf( stderr, "after OTF_FileManager_listUnlinkAtTail():\n" ); - */ - /* - OTF_FileManager_listPrint( listHead, listTail ); - */ return 0; } @@ -499,25 +417,25 @@ void OTF_FileManager_listPrint( OTF_FileList** list ) { if ( NULL == *list ) { - OTF_fprintf( stderr, "empty list\n ----- \n" ); + fprintf( stderr, "empty list\n ----- \n" ); return; } pos= *list; - OTF_fprintf( stderr, "head: %p --> %p (%p %u)\n", (void*)pos, + fprintf( stderr, "head: %p --> %p (%p %u)\n", (void*)pos, (void*)pos->next, (void*)pos->file, OTF_File_status( pos->file ) ); while ( pos != (*list)->prev ) { pos= pos->next; - OTF_fprintf( stderr, " %p --> %p (%p %u)\n", (void*)pos, + fprintf( stderr, " %p --> %p (%p %u)\n", (void*)pos, (void*)pos->next, (void*)pos->file, OTF_File_status( pos->file ) ); } - OTF_fprintf( stderr, " ----- \n" ); + fprintf( stderr, " ----- \n" ); } diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Filenames.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Filenames.c index 7ca694082c..12f394c3f3 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Filenames.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Filenames.c @@ -117,7 +117,7 @@ char* OTF_stripFilename( const char* filename ) { if( NULL == p ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -132,7 +132,7 @@ char* OTF_stripFilename( const char* filename ) { /* fail if the resulting filename is empty */ if ( '\0' == *ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "empty filename base.\n", __FUNCTION__, __FILE__, __LINE__ ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.c index 7204e4986c..2f18ce4460 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.c @@ -63,7 +63,7 @@ OTF_HandlerArray* OTF_HandlerArray_open() { ret = (OTF_HandlerArray*) malloc( sizeof( OTF_HandlerArray ) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -74,7 +74,7 @@ OTF_HandlerArray* OTF_HandlerArray_open() { OTF_NRECORDS * sizeof( OTF_FunctionPointer* ) ); if( NULL == ret->pointer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -87,7 +87,7 @@ OTF_HandlerArray* OTF_HandlerArray_open() { ret->firsthandlerarg = (void**) malloc( OTF_NRECORDS * sizeof( void* ) ); if( NULL == ret->firsthandlerarg ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -110,7 +110,7 @@ int OTF_HandlerArray_close( OTF_HandlerArray* handlers ) { if( NULL == handlers ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "handlers have not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -132,7 +132,7 @@ int OTF_HandlerArray_setHandler( OTF_HandlerArray* handlers, if( recordtype >= OTF_NRECORDS ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "unknown record type %u.\n", __FUNCTION__, __FILE__, __LINE__, recordtype ); @@ -150,7 +150,7 @@ int OTF_HandlerArray_setFirstHandlerArg( OTF_HandlerArray* handlers, if( recordtype >= OTF_NRECORDS ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "unknown record type %u.\n", __FUNCTION__, __FILE__, __LINE__, recordtype ); @@ -268,18 +268,23 @@ int OTF_HandlerArray_getCopyHandler( OTF_HandlerArray* handlers, OTF_HandlerArray_setFirstHandlerArg( handlers, writer, OTF_DEFKEYVALUE_RECORD ); - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) OTF_CopyHandler_DefTimeRange, - OTF_DEFTIMERANGE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, writer, - OTF_DEFTIMERANGE_RECORD ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) OTF_CopyHandler_DefTimeRange, + OTF_DEFTIMERANGE_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, writer, + OTF_DEFTIMERANGE_RECORD ); - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) OTF_CopyHandler_DefCounterAssignments, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, writer, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) OTF_CopyHandler_DefCounterAssignments, + OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, writer, + OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) OTF_CopyHandler_DefProcessSubstitutes, + OTF_DEFPROCESSSUBSTITUTES_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, writer, + OTF_DEFPROCESSSUBSTITUTES_RECORD ); OTF_HandlerArray_setHandler( handlers, (OTF_FunctionPointer*) OTF_CopyHandler_NoOp, @@ -603,19 +608,24 @@ int OTF_HandlerArray_getCopyHandler_stream( OTF_HandlerArray* handlers, OTF_HandlerArray_setFirstHandlerArg( handlers, wstream, OTF_DEFKEYVALUE_RECORD ); - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) OTF_CopyHandler_stream_DefTimeRange, - OTF_DEFTIMERANGE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, wstream, - OTF_DEFTIMERANGE_RECORD ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) OTF_CopyHandler_stream_DefTimeRange, + OTF_DEFTIMERANGE_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, wstream, + OTF_DEFTIMERANGE_RECORD ); - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) OTF_CopyHandler_stream_DefCounterAssignments, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, wstream, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) OTF_CopyHandler_stream_DefCounterAssignments, + OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, wstream, + OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) OTF_CopyHandler_stream_DefProcessSubstitutes, + OTF_DEFPROCESSSUBSTITUTES_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, wstream, + OTF_DEFPROCESSSUBSTITUTES_RECORD ); - OTF_HandlerArray_setHandler( handlers, (OTF_FunctionPointer*) OTF_CopyHandler_stream_NoOp, OTF_NOOP_RECORD ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.h index e96c6f86d5..34c475ec34 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_HandlerArray.h @@ -270,6 +270,14 @@ typedef int (OTF_Handler_DefCounterAssignments) ( void* userData, const uint32_t* procs_or_groups, OTF_KeyValueList* list ); +/* # OTF_DEFPROCESSSUBSTITUTES_RECORD */ +typedef int (OTF_Handler_DefProcessSubstitutes) ( void* userData, + uint32_t stream, + uint32_t representative, + uint32_t numberOfProcs, + const uint32_t* procs, + OTF_KeyValueList* list ); + /* typedefs for OTF event records ****************************************** */ /* # OTF_NOOP_RECORD */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.c index b493b6a910..77d8f78148 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_KeyValue.c @@ -9,6 +9,7 @@ #include #include + uint8_t OTF_KeyValueList_getValue(OTF_KeyValueList *list, uint32_t key, OTF_Type otf_type, OTF_Value *otf_value); OTF_KeyValueList *OTF_KeyValueList_new() { @@ -17,7 +18,7 @@ OTF_KeyValueList *OTF_KeyValueList_new() { if (list == NULL) { /* error: not enough memory left */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); return NULL; @@ -27,7 +28,7 @@ OTF_KeyValueList *OTF_KeyValueList_new() { if (list->kvBegin == NULL) { /* error: not enough memory left */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); free(list); @@ -46,7 +47,7 @@ OTF_KeyValueList *OTF_KeyValueList_new() { if( OTF_KeyValueList_realloc(list, 9) ) { /* an error ocurred while realloc */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -64,7 +65,7 @@ uint8_t OTF_KeyValueList_close(OTF_KeyValueList* list) { if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); return 1; @@ -92,7 +93,7 @@ uint8_t OTF_KeyValueList_reset(OTF_KeyValueList* list) { if ( list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); return 1; @@ -137,7 +138,7 @@ uint8_t OTF_KeyValueList_realloc(OTF_KeyValueList* list, uint32_t num) { if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); return 1; @@ -176,7 +177,7 @@ uint8_t OTF_KeyValueList_appendPair(OTF_KeyValueList* list, OTF_KeyValuePair pai if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); return 255; @@ -201,7 +202,7 @@ uint8_t OTF_KeyValueList_appendPair(OTF_KeyValueList* list, OTF_KeyValuePair pai /* an error ocurred while realloc */ if ( (list->size - list->count) < 1 ) { /* if no memory left, return with error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -411,7 +412,7 @@ uint8_t OTF_KeyValueList_appendKeyValueList(OTF_KeyValueList *dest_list, OTF_Key for( i = 0; i < source_list->count; i++ ) { if ( 255 == OTF_KeyValueList_appendPair(dest_list, p->kvPair) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "error while appending OTF_KeyValueList.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -457,7 +458,7 @@ uint8_t OTF_KeyValueList_getValue(OTF_KeyValueList *list, uint32_t key, OTF_Type if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); return 255; @@ -482,258 +483,244 @@ uint8_t OTF_KeyValueList_getValue(OTF_KeyValueList *list, uint32_t key, OTF_Type /* no key in list matches the searched key */ return 1; - } uint8_t OTF_KeyValueList_getChar(OTF_KeyValueList *list, uint32_t key, char *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_CHAR, &otf_value)) ) { *value = otf_value.otf_char; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getInt8(OTF_KeyValueList *list, uint32_t key, int8_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_INT8, &otf_value)) ) { *value = otf_value.otf_int8; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getUint8(OTF_KeyValueList *list, uint32_t key, uint8_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_UINT8, &otf_value)) ) { *value = otf_value.otf_uint8; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getInt16(OTF_KeyValueList *list, uint32_t key, int16_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_INT16, &otf_value)) ) { *value = otf_value.otf_int16; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getUint16(OTF_KeyValueList *list, uint32_t key, uint16_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_UINT16, &otf_value)) ) { *value = otf_value.otf_uint16; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getInt32(OTF_KeyValueList *list, uint32_t key, int32_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_INT32, &otf_value)) ) { *value = otf_value.otf_int32; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getUint32(OTF_KeyValueList *list, uint32_t key, uint32_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_UINT32, &otf_value)) ) { *value = otf_value.otf_uint32; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getInt64(OTF_KeyValueList *list, uint32_t key, int64_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_INT64, &otf_value)) ) { *value = otf_value.otf_int64; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getUint64(OTF_KeyValueList *list, uint32_t key, uint64_t *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_UINT64, &otf_value)) ) { *value = otf_value.otf_uint64; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getFloat(OTF_KeyValueList *list, uint32_t key, float *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_FLOAT, &otf_value)) ) { *value = otf_value.otf_float; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getDouble(OTF_KeyValueList *list, uint32_t key, double *value) { - OTF_Value otf_value; + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_DOUBLE, &otf_value)) ) { *value = otf_value.otf_double; } - - return ret; - + + return ret; } uint8_t OTF_KeyValueList_getByteArray(OTF_KeyValueList *list, uint32_t key, uint8_t *value, uint32_t *len) { - - OTF_KeyValuePairList *p; - uint32_t i; - uint32_t max_len; - - if (list == NULL) { - /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "no list has been specified.\n", - __FUNCTION__, __FILE__, __LINE__ ); - return 255; - } - p = list->kvBegin; - - max_len = *len; - *len = 0; + OTF_KeyValuePairList *p; + uint32_t i; + uint32_t max_len; - /* search key */ - for ( i=0; icount; i++) { - - if ( p->kvPair.key == key ) { - - if ( p->kvPair.type == OTF_BYTE_ARRAY) { - - if( p->kvPair.value.otf_byte_array.len <= OTF_KEYVALUE_MAX_ARRAY_LEN ) { - - if( ( *len + p->kvPair.value.otf_byte_array.len ) > max_len ) { - - /* allocated memory pointed by "value" is not big enough to store the howle byte array */ - /* fill memory until the end and exit with an error-code */ - memcpy(value, p->kvPair.value.otf_byte_array.array, max_len - *len); - - *len = max_len; - - return 255; - - } - - *len += p->kvPair.value.otf_byte_array.len; - - memcpy(value, p->kvPair.value.otf_byte_array.array, p->kvPair.value.otf_byte_array.len); - - /* end of byte array reached, all right */ - return 0; - - } else { - - if( ( *len + OTF_KEYVALUE_MAX_ARRAY_LEN ) > max_len ) { - - /* allocated memory pointed by "value" is not big enough to store the howle byte array */ - /* fill memory until the end and exit with an error-code */ - memcpy(value, p->kvPair.value.otf_byte_array.array, max_len - *len); - - *len = max_len; - - return 255; - - } - - *len += OTF_KEYVALUE_MAX_ARRAY_LEN; - - memcpy(value, p->kvPair.value.otf_byte_array.array, OTF_KEYVALUE_MAX_ARRAY_LEN); - - value += OTF_KEYVALUE_MAX_ARRAY_LEN; - - } - - } else { - - /* type of found key differs */ - return 2; - } - - } else { - - if( *len > 0 ) { - - /* byte-array not completed */ - return 255; - - } - - } - - p = p->kvNext; - - } - - /* no key in list matches the searched key */ - return 1; - + if (list == NULL) { + /* error */ + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "no list has been specified.\n", + __FUNCTION__, __FILE__, __LINE__ ); + return 255; + } + + p = list->kvBegin; + + max_len = *len; + *len = 0; + + /* search key */ + for ( i=0; icount; i++) { + + if ( p->kvPair.key == key ) { + + if ( p->kvPair.type == OTF_BYTE_ARRAY) { + + if( p->kvPair.value.otf_byte_array.len <= OTF_KEYVALUE_MAX_ARRAY_LEN ) { + + if( ( *len + p->kvPair.value.otf_byte_array.len ) > max_len ) { + + /* allocated memory pointed by "value" is not big enough to store the howle byte array */ + /* fill memory until the end and exit with an error-code */ + memcpy(value, p->kvPair.value.otf_byte_array.array, max_len - *len); + + *len = max_len; + + return 255; + + } + + *len += p->kvPair.value.otf_byte_array.len; + + memcpy(value, p->kvPair.value.otf_byte_array.array, p->kvPair.value.otf_byte_array.len); + + /* end of byte array reached, all right */ + return 0; + + } else { + + if( ( *len + OTF_KEYVALUE_MAX_ARRAY_LEN ) > max_len ) { + + /* allocated memory pointed by "value" is not big enough to store the howle byte array */ + /* fill memory until the end and exit with an error-code */ + memcpy(value, p->kvPair.value.otf_byte_array.array, max_len - *len); + + *len = max_len; + + return 255; + + } + + *len += OTF_KEYVALUE_MAX_ARRAY_LEN; + + memcpy(value, p->kvPair.value.otf_byte_array.array, OTF_KEYVALUE_MAX_ARRAY_LEN); + + value += OTF_KEYVALUE_MAX_ARRAY_LEN; + + } + + } else { + + /* type of found key differs */ + return 2; + } + + } else { + + if( *len > 0 ) { + + /* byte-array not completed */ + return 255; + + } + + } + + p = p->kvNext; + + } + + /* no key in list matches the searched key */ + return 1; } uint8_t OTF_KeyValueList_getArrayLength(OTF_KeyValueList *list, uint32_t key, uint32_t *len) { - - OTF_Value otf_value; + + OTF_Value otf_value; int ret; if( ! (ret = OTF_KeyValueList_getValue(list, key, OTF_BYTE_ARRAY, &otf_value)) ) { *len = otf_value.otf_byte_array.len; } - - return ret; - + + return ret; } OTF_Type OTF_KeyValueList_getTypeForKey(OTF_KeyValueList *list, uint32_t key) { @@ -743,7 +730,7 @@ OTF_Type OTF_KeyValueList_getTypeForKey(OTF_KeyValueList *list, uint32_t key) { if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -771,7 +758,7 @@ uint8_t OTF_KeyValueList_hasKey(OTF_KeyValueList *list, uint32_t key) { if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -800,7 +787,7 @@ uint8_t OTF_KeyValueList_removeKey(OTF_KeyValueList *list, uint32_t key) { if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -840,7 +827,7 @@ uint8_t OTF_KeyValueList_getKeyByIndex(OTF_KeyValueList *list, uint32_t index, u if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -884,7 +871,7 @@ uint8_t OTF_KeyValueList_getPairByIndex(OTF_KeyValueList *list, uint32_t index, if (list == NULL) { /* error */ - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no list has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Keywords.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Keywords.h index 12caa664d3..ae582e96d1 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Keywords.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Keywords.h @@ -137,6 +137,10 @@ the keywords identifying record types must follow some special rules: #define OTF_KEYWORD_S_DEFCOUNTERASSIGNMENTS "CA" #define OTF_KEYWORD_F_DEFCOUNTERASSIGNMENTS 'C' +#define OTF_KEYWORD_L_DEFPROCESSSUBSTITUTES "PROCESSSUBSTITUTES" +#define OTF_KEYWORD_S_DEFPROCESSSUBSTITUTES "PS" +#define OTF_KEYWORD_F_DEFPROCESSSUBSTITUTES 'P' + /* *** event record keywords *** */ #define OTF_KEYWORD_L_NOOP "NOOP" diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_MasterControl.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_MasterControl.c index 7f848a33fd..f9c69ecae7 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_MasterControl.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_MasterControl.c @@ -111,8 +111,10 @@ void OTF_MasterControl_finish( OTF_MasterControl* mc ) { OTF_MasterControl_close( mc ); - OTF_fprintf( stderr, "OTF_MasterControl_finish() deprecated, " - "use OTF_MasterControl_close() instead\n" ); + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " + "OTF_MasterControl_finish() is deprecated, " + "use OTF_MasterControl_close() instead.\n", + __FUNCTION__, __FILE__, __LINE__ ); } @@ -137,12 +139,9 @@ int OTF_MasterControl_read( OTF_MasterControl* mc, const char* namestub ) { int r; - - /* OTF_fprintf( stderr, "OTF_MasterControl_read()\n" ); */ - if( NULL == mc ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "master control has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -159,7 +158,7 @@ int OTF_MasterControl_read( OTF_MasterControl* mc, const char* namestub ) { if ( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -169,7 +168,7 @@ int OTF_MasterControl_read( OTF_MasterControl* mc, const char* namestub ) { /* if ( ! OTF_fileExists( filename ) ) { - OTF_fprintf( stderr, "ERROR in '%s'.c: " + OTF_Error( "ERROR in '%s'.c: " "Invalid input file '%s'\n", __FUNCTION__, filename ); return 0; } @@ -178,7 +177,7 @@ int OTF_MasterControl_read( OTF_MasterControl* mc, const char* namestub ) { buffer = OTF_RBuffer_open( filename, mc->manager ); if( NULL == buffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "could not open file \"%s\" for reading.\n", __FUNCTION__, __FILE__, __LINE__, filename ); @@ -190,8 +189,6 @@ int OTF_MasterControl_read( OTF_MasterControl* mc, const char* namestub ) { OTF_RBuffer_setSize( buffer, 1024 ); - /* OTF_fprintf( stderr, "filename=%s\n", filename ); */ - free( filename ); filename = NULL; @@ -210,20 +207,16 @@ int OTF_MasterControl_read( OTF_MasterControl* mc, const char* namestub ) { continue; } - /* OTF_fprintf( stderr ,"arg= %x\n", argument ); */ - do { value= OTF_RBuffer_readUint32( buffer ); - /* OTF_fprintf( stderr ," v= %x\n", value ); */ - r= OTF_MasterControl_append( mc, argument, value ); if ( 0 == r ) { - OTF_fprintf( stderr ,"OTF_MasterControl_read() " - "ERROR: appending (%u,%u)\n", - argument, value ); + OTF_Error( "OTF_MasterControl_read() " + "ERROR: appending (%u,%u)\n", + argument, value ); } } while ( OTF_RBuffer_testChar( buffer, ',' ) ); @@ -247,7 +240,7 @@ OTF_MasterControl* OTF_MasterControl_new( OTF_FileManager* manager ) { ret= (OTF_MasterControl*) malloc( sizeof(OTF_MasterControl) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -258,7 +251,7 @@ OTF_MasterControl* OTF_MasterControl_new( OTF_FileManager* manager ) { if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -288,7 +281,7 @@ OTF_MapEntry* OTF_MasterControl_insertMapEntry( OTF_MasterControl* mc, mc->s * sizeof(OTF_MapEntry) ); if( NULL == mc->map ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -410,7 +403,7 @@ int OTF_MapEntry_insertValue( OTF_MapEntry* mc, uint32_t value ) { mc->s * sizeof(uint32_t) ); if( NULL == mc->values ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -484,7 +477,7 @@ int OTF_MasterControl_insertRMapEntry( OTF_MasterControl* mc, mc->rs * sizeof(OTF_Pair) ); if( NULL == mc->rmap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -542,7 +535,7 @@ int OTF_MasterControl_append( OTF_MasterControl* mc, if ( 0 == argument || ((uint32_t) -1) == argument ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "invalid argument value %x.\n", __FUNCTION__, __FILE__, __LINE__, argument ); @@ -555,7 +548,7 @@ int OTF_MasterControl_append( OTF_MasterControl* mc, if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_insertRMapEntry() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -566,7 +559,7 @@ int OTF_MasterControl_append( OTF_MasterControl* mc, entry = OTF_MasterControl_getMapEntry( mc, argument ); if( NULL == entry ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_getMapEntry() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -607,16 +600,6 @@ uint32_t OTF_MasterControl_mapReverse( OTF_MasterControl* mc, a= 0; b= mc->rn; - /* - OTF_fprintf( stderr, "OTF_MasterControl_mapReverse\n" ); - for ( c= a; c < b; ++c ) { - - OTF_fprintf( stderr, " %u: %u\n", - mc->rmap[c].argument, - mc->rmap[c].value ); - } - */ - if ( 0 >= mc->rn ) { return 0; @@ -675,7 +658,7 @@ int OTF_MasterControl_write( OTF_MasterControl* mc, const char* namestub ) { 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -685,7 +668,7 @@ int OTF_MasterControl_write( OTF_MasterControl* mc, const char* namestub ) { buffer = OTF_WBuffer_open( filename, mc->manager ); if ( NULL == buffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_open( %s ) failed.\n", __FUNCTION__, __FILE__, __LINE__, filename ); @@ -742,7 +725,7 @@ int OTF_MasterControl_check( OTF_MasterControl* mc ) { if ( mc->map[i].argument <= mc->map[i-1].argument ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "map not sorted at %u.\n", __FUNCTION__, __FILE__, __LINE__, i ); @@ -760,7 +743,7 @@ int OTF_MasterControl_check( OTF_MasterControl* mc ) { if ( e->values[j] <= e->values[j-1] ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "map[%u] not sorted at %u.\n", __FUNCTION__, __FILE__, __LINE__, i, j ); @@ -774,7 +757,7 @@ int OTF_MasterControl_check( OTF_MasterControl* mc ) { if ( mc->rmap[i].argument <= mc->rmap[i-1].argument ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "rmap not sorted at %u.\n", __FUNCTION__, __FILE__, __LINE__, i ); @@ -794,25 +777,25 @@ void OTF_MasterControl_print( OTF_MasterControl* mc ) { OTF_MapEntry* e; - OTF_fprintf( stderr, "map:\n" ); + fprintf( stderr, "map:\n" ); for ( i= 0; i < mc->n; i++ ) { e= &(mc->map[i]); - OTF_fprintf( stderr, "%x: ", e->argument ); + fprintf( stderr, "%x: ", e->argument ); for ( j= 0; j < e->n; j++ ) { - OTF_fprintf( stderr, "%x ", e->values[j] ); + fprintf( stderr, "%x ", e->values[j] ); } - OTF_fprintf( stderr, "\n" ); + fprintf( stderr, "\n" ); } - OTF_fprintf( stderr, "rmap:\n" ); + fprintf( stderr, "rmap:\n" ); for ( i= 0; i < mc->rn; i++ ) { - OTF_fprintf( stderr, "%x: %x\n", + fprintf( stderr, "%x: %x\n", mc->rmap[i].argument, mc->rmap[i].value ); } } diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Parse.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Parse.c index 62df84d41f..71040d525a 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Parse.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Parse.c @@ -125,11 +125,13 @@ int OTF_Reader_readDefKeyValue( OTF_RBuffer* buffer, OTF_HandlerArray* handlers, uint32_t streamid ); int OTF_Reader_readDefTimeRange( OTF_RBuffer* buffer, - OTF_HandlerArray* handlers, - uint32_t streamid ); + OTF_HandlerArray* handlers, uint32_t streamid ); + int OTF_Reader_readDefCounterAssignments( OTF_RBuffer* buffer, - OTF_HandlerArray* handlers, - uint32_t streamid ); + OTF_HandlerArray* handlers, uint32_t streamid ); + +int OTF_Reader_readDefProcessSubstitutes( OTF_RBuffer* buffer, + OTF_HandlerArray* handlers, uint32_t streamid ); /* *** Event records *** ****************************************** */ @@ -671,11 +673,11 @@ int OTF_Reader_parseDefRecord( OTF_RBuffer* buffer, return OTF_Reader_readDefCounter( buffer, handlers, streamid ); } - if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_DEFCOUNTERASSIGNMENTS ) || - OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_DEFCOUNTERASSIGNMENTS ) ) { + if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_DEFCOUNTERASSIGNMENTS ) || + OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_DEFCOUNTERASSIGNMENTS ) ) { - return OTF_Reader_readDefCounterAssignments( buffer, handlers, streamid ); - } + return OTF_Reader_readDefCounterAssignments( buffer, handlers, streamid ); + } break; @@ -733,6 +735,14 @@ int OTF_Reader_parseDefRecord( OTF_RBuffer* buffer, /* case OTF_KEYWORD_F_DEFPROCESS 'P' : */ case OTF_KEYWORD_F_DEFPROCESSGROUP /* 'P' */ : + if ( OTF_RBuffer_testKeyword( buffer, + OTF_KEYWORD_S_DEFPROCESSORGROUPATTR ) || + OTF_RBuffer_testKeyword( buffer, + OTF_KEYWORD_L_DEFPROCESSORGROUPATTR ) ) { + + return OTF_Reader_readDefProcessOrGroupAttributes( buffer, handlers, streamid ); + } + if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_DEFPROCESSGROUP ) || OTF_RBuffer_testKeyword( buffer, @@ -748,11 +758,11 @@ int OTF_Reader_parseDefRecord( OTF_RBuffer* buffer, } if ( OTF_RBuffer_testKeyword( buffer, - OTF_KEYWORD_S_DEFPROCESSORGROUPATTR ) || + OTF_KEYWORD_S_DEFPROCESSSUBSTITUTES ) || OTF_RBuffer_testKeyword( buffer, - OTF_KEYWORD_L_DEFPROCESSORGROUPATTR ) ) { + OTF_KEYWORD_L_DEFPROCESSSUBSTITUTES ) ) { - return OTF_Reader_readDefProcessOrGroupAttributes( buffer, handlers, streamid ); + return OTF_Reader_readDefProcessSubstitutes( buffer, handlers, streamid ); } break; @@ -785,11 +795,11 @@ int OTF_Reader_parseDefRecord( OTF_RBuffer* buffer, return OTF_Reader_readDefTimerResolution( buffer, handlers, streamid ); } - if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_DEFTIMERANGE ) || - OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_DEFTIMERANGE ) ) { + if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_DEFTIMERANGE ) || + OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_DEFTIMERANGE ) ) { - return OTF_Reader_readDefTimeRange( buffer, handlers, streamid ); - } + return OTF_Reader_readDefTimeRange( buffer, handlers, streamid ); + } break; @@ -1385,7 +1395,7 @@ int OTF_Reader_readDefProcessOrGroupAttributes( OTF_RBuffer* buffer, uint32_t proc_token; uint32_t attr_token; - if ( handlers->pointer[OTF_DEFATTRLIST_RECORD] == NULL ) { + if ( handlers->pointer[OTF_DEFPROCESSORGROUPATTR_RECORD] == NULL ) { return OTF_RBuffer_readNewline( buffer ); } @@ -2235,103 +2245,154 @@ int OTF_Reader_readDefKeyValue( OTF_RBuffer* buffer, } } -int OTF_Reader_readDefTimeRange( OTF_RBuffer* buffer, - OTF_HandlerArray* handlers, - uint32_t streamid ) { +int OTF_Reader_readDefTimeRange( OTF_RBuffer* buffer, + OTF_HandlerArray* handlers, uint32_t streamid ) { - uint64_t minTime; - uint64_t maxTime; - if ( handlers->pointer[OTF_DEFTIMERANGE_RECORD] == NULL ) { + uint64_t minTime; + uint64_t maxTime; - return OTF_RBuffer_readNewline( buffer ); - } + if ( handlers->pointer[OTF_DEFTIMERANGE_RECORD] == NULL ) { - minTime= OTF_RBuffer_readUint64( buffer ); + return OTF_RBuffer_readNewline( buffer ); + } - if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_LOCAL_TIME ) || - OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_LOCAL_TIME ) ) { + minTime= OTF_RBuffer_readUint64( buffer ); - maxTime= OTF_RBuffer_readUint64( buffer ); + if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_LOCAL_TIME ) || + OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_LOCAL_TIME ) ) { - } else { + maxTime= OTF_RBuffer_readUint64( buffer ); - PARSE_ERROR( buffer ); + } else { - return 0; - } + PARSE_ERROR( buffer ); - if ( OTF_RBuffer_readNewline( buffer ) ) { + return 0; + } - /* 0 is considered as the non-error return value of call-back handlers, - but the current function returns 0 on errors! */ + if ( OTF_RBuffer_readNewline( buffer ) ) { - return ( OTF_RETURN_OK /*0*/ == ( ( - (OTF_Handler_DefTimeRange*) - handlers->pointer[OTF_DEFTIMERANGE_RECORD] ) - ( handlers->firsthandlerarg[OTF_DEFTIMERANGE_RECORD], - streamid, - minTime, - maxTime, - buffer->list ) ) ); + /* 0 is considered as the non-error return value of call-back handlers, + but the current function returns 0 on errors! */ - } else { + return ( OTF_RETURN_OK /*0*/ == ( ( + (OTF_Handler_DefTimeRange*) + handlers->pointer[OTF_DEFTIMERANGE_RECORD] ) + ( handlers->firsthandlerarg[OTF_DEFTIMERANGE_RECORD], + streamid, + minTime, + maxTime, + buffer->list ) ) ); - PARSE_ERROR( buffer ); + } else { - return 0; - } + PARSE_ERROR( buffer ); + + return 0; + } } -int OTF_Reader_readDefCounterAssignments( OTF_RBuffer* buffer, - OTF_HandlerArray* handlers, - uint32_t streamid ) { +int OTF_Reader_readDefCounterAssignments( OTF_RBuffer* buffer, + OTF_HandlerArray* handlers, uint32_t streamid ) { - uint32_t counter; - uint32_t number_of_members; - if ( handlers->pointer[OTF_DEFCOUNTERASSIGNMENTS_RECORD] == NULL ) { + uint32_t counter; + uint32_t number_of_members; - return OTF_RBuffer_readNewline( buffer ); - } + if ( handlers->pointer[OTF_DEFCOUNTERASSIGNMENTS_RECORD] == NULL ) { - counter= OTF_RBuffer_readUint32( buffer ); + return OTF_RBuffer_readNewline( buffer ); + } - if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_LOCAL_MEMBERS ) || - OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_LOCAL_MEMBERS ) ) { + counter= OTF_RBuffer_readUint32( buffer ); - number_of_members= OTF_RBuffer_readArray( buffer, - &buffer->array, - &buffer->arraysize ); + if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_LOCAL_MEMBERS ) || + OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_LOCAL_MEMBERS ) ) { - } else { + number_of_members= OTF_RBuffer_readArray( buffer, + &buffer->array, + &buffer->arraysize ); - PARSE_ERROR( buffer ); + } else { - return 0; - } + PARSE_ERROR( buffer ); - if ( OTF_RBuffer_readNewline( buffer ) ) { + return 0; + } - /* 0 is considered as the non-error return value of call-back handlers, - but the current function returns 0 on errors! */ + if ( OTF_RBuffer_readNewline( buffer ) ) { - return ( OTF_RETURN_OK /*0*/ == ( ( - (OTF_Handler_DefCounterAssignments*) - handlers->pointer[OTF_DEFCOUNTERASSIGNMENTS_RECORD] ) - ( handlers->firsthandlerarg[OTF_DEFCOUNTERASSIGNMENTS_RECORD], - streamid, - counter, - number_of_members, - buffer->array, - buffer->list ) ) ); + /* 0 is considered as the non-error return value of call-back handlers, + but the current function returns 0 on errors! */ - } else { + return ( OTF_RETURN_OK /*0*/ == ( ( + (OTF_Handler_DefCounterAssignments*) + handlers->pointer[OTF_DEFCOUNTERASSIGNMENTS_RECORD] ) + ( handlers->firsthandlerarg[OTF_DEFCOUNTERASSIGNMENTS_RECORD], + streamid, + counter, + number_of_members, + buffer->array, + buffer->list ) ) ); - PARSE_ERROR( buffer ); + } else { - return 0; - } + PARSE_ERROR( buffer ); + + return 0; + } +} + +int OTF_Reader_readDefProcessSubstitutes( OTF_RBuffer* buffer, + OTF_HandlerArray* handlers, uint32_t streamid ) { + + + uint32_t representative; + uint32_t number_of_procs; + + if ( handlers->pointer[OTF_DEFPROCESSSUBSTITUTES_RECORD] == NULL ) { + + return OTF_RBuffer_readNewline( buffer ); + } + + representative= OTF_RBuffer_readUint32( buffer ); + + if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_LOCAL_MEMBERS ) || + OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_LOCAL_MEMBERS ) ) { + + number_of_procs= OTF_RBuffer_readArray( buffer, + &buffer->array, + &buffer->arraysize ); + + } else { + + PARSE_ERROR( buffer ); + + return 0; + } + + if ( OTF_RBuffer_readNewline( buffer ) ) { + + /* 0 is considered as the non-error return value of call-back handlers, + but the current function returns 0 on errors! */ + + return ( OTF_RETURN_OK /*0*/ == ( ( + (OTF_Handler_DefProcessSubstitutes*) + handlers->pointer[OTF_DEFPROCESSSUBSTITUTES_RECORD] ) + ( handlers->firsthandlerarg[OTF_DEFPROCESSSUBSTITUTES_RECORD], + streamid, + representative, + number_of_procs, + buffer->array, + buffer->list ) ) ); + + } else { + + PARSE_ERROR( buffer ); + + return 0; + } } /* *** Event records *** ****************************************** */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.c index 2effa148c4..764e6a05a4 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.c @@ -195,7 +195,7 @@ OTF_RBuffer* OTF_RBuffer_open( const char* filename, OTF_FileManager* manager ) /* Check the input parameters */ if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been defined.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -205,7 +205,7 @@ OTF_RBuffer* OTF_RBuffer_open( const char* filename, OTF_FileManager* manager ) ret= (OTF_RBuffer*) malloc( sizeof(OTF_RBuffer) ); if ( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -219,7 +219,7 @@ OTF_RBuffer* OTF_RBuffer_open( const char* filename, OTF_FileManager* manager ) if ( NULL == ret->file ) { /* *** commented because it can happen when defstream cannot be loaded - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_open() failed. filename '%s'\n", __FUNCTION__, __FILE__, __LINE__, filename ); */ @@ -260,7 +260,7 @@ OTF_RBuffer* OTF_RBuffer_open_with_external_buffer( uint32_t len, const char* bu ret= (OTF_RBuffer*) malloc( sizeof(OTF_RBuffer) ); if ( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -273,7 +273,7 @@ OTF_RBuffer* OTF_RBuffer_open_with_external_buffer( uint32_t len, const char* bu if ( NULL == ret->file ) { /* *** commented because it can happen when defstream cannot be loaded - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_open() failed. filename '%s'\n", __FUNCTION__, __FILE__, __LINE__, filename ); */ @@ -331,7 +331,7 @@ int OTF_RBuffer_setSize( OTF_RBuffer* rbuffer, size_t size ) { if ( size < 100 ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "buffer size %u too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, (uint32_t) size ); @@ -340,7 +340,7 @@ int OTF_RBuffer_setSize( OTF_RBuffer* rbuffer, size_t size ) { if ( size < rbuffer->size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot shrink buffer from %u to %u.\n", __FUNCTION__, __FILE__, __LINE__, (uint32_t) rbuffer->size, (uint32_t) size ); @@ -353,7 +353,7 @@ int OTF_RBuffer_setSize( OTF_RBuffer* rbuffer, size_t size ) { rbuffer->size * sizeof(char) ); if( NULL == rbuffer->buffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -376,7 +376,7 @@ void OTF_RBuffer_setZBufferSize( OTF_RBuffer* rbuffer, uint32_t size ) { if ( 32 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended zbuffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -384,13 +384,13 @@ void OTF_RBuffer_setZBufferSize( OTF_RBuffer* rbuffer, uint32_t size ) { } else if ( 512 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } @@ -410,7 +410,7 @@ int OTF_RBuffer_setJumpSize( OTF_RBuffer* rbuffer, size_t size ) { if ( size < 100 ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "jump buffer size %u too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, (uint32_t) size ); @@ -419,7 +419,7 @@ int OTF_RBuffer_setJumpSize( OTF_RBuffer* rbuffer, size_t size ) { if ( size > rbuffer->size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "buffer size is smaller than jump size %u.\n", __FUNCTION__, __FILE__, __LINE__, (uint32_t) size ); @@ -489,7 +489,7 @@ int OTF_RBuffer_guaranteeRecord( OTF_RBuffer* rbuffer ) { /* no complete record available! end of file. */ /* - OTF_fprintf( stderr, "OTF_RBuffer_guaranteeRecord() " + OTF_Error( "OTF_RBuffer_guaranteeRecord() " "cannot read, file exceeded\n" ); */ @@ -511,16 +511,11 @@ int OTF_RBuffer_guaranteeRecord( OTF_RBuffer* rbuffer ) { while ( 1024*1024*100 > rbuffer->size ) { - /* - OTF_fprintf( stderr, "OTF_RBuffer_guaranteeRecord() double buffer size " - "%u -> %u!\n", rbuffer->size, 2* rbuffer->size ); - */ - ret= OTF_RBuffer_setSize( rbuffer, 2* rbuffer->size ); if ( 1 != ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot double buffer size.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -530,7 +525,7 @@ int OTF_RBuffer_guaranteeRecord( OTF_RBuffer* rbuffer ) { add= OTF_RBuffer_advance( rbuffer ); if ( 0 == add ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "file exceeded.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -544,7 +539,7 @@ int OTF_RBuffer_guaranteeRecord( OTF_RBuffer* rbuffer ) { } } - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "buffer is too small. Extending buffer has finally failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -568,7 +563,7 @@ char *OTF_RBuffer_printRecord( OTF_RBuffer* rbuffer ) { ret= (char*) realloc( ret, size + REALLOCSIZE ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -599,7 +594,7 @@ size_t OTF_RBuffer_advance( OTF_RBuffer* rbuffer ) { if ( 0 == rbuffer->size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "buffer size not set!\n", __FUNCTION__, __FILE__, __LINE__ ); exit(1); @@ -607,7 +602,7 @@ size_t OTF_RBuffer_advance( OTF_RBuffer* rbuffer ) { if( rbuffer->pos > rbuffer->end ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current position exceeds the file length.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -660,13 +655,11 @@ int OTF_RBuffer_jump( OTF_RBuffer* rbuffer, uint64_t filepos ) { /* uint64_t currentPos; */ uint32_t i; - /* OTF_fprintf( stderr, "\tjump to %llu\n", (unsigned long long) filepos ); */ - ret= OTF_File_seek( rbuffer->file, filepos ); if ( 0 != ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_seek() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -697,7 +690,7 @@ int OTF_RBuffer_jump( OTF_RBuffer* rbuffer, uint64_t filepos ) { if ( 0 == OTF_RBuffer_guaranteeRecord( rbuffer ) ) { /* - OTF_fprintf( stderr, "OTF_RBuffer_jump() " + OTF_Error( "OTF_RBuffer_jump() " "ERROR: could not read far enough\n" ); */ @@ -715,7 +708,7 @@ int OTF_RBuffer_jump( OTF_RBuffer* rbuffer, uint64_t filepos ) { if ( 0 == OTF_RBuffer_guaranteeRecord( rbuffer ) ) { /* - OTF_fprintf( stderr, "OTF_RBuffer_jump() " + OTF_Error( "OTF_RBuffer_jump() " "ERROR: could not read far enough\n" ); */ @@ -736,7 +729,7 @@ int OTF_RBuffer_jump( OTF_RBuffer* rbuffer, uint64_t filepos ) { if ( 0 == OTF_RBuffer_guaranteeRecord( rbuffer ) ) { /* - OTF_fprintf( stderr, "OTF_RBuffer_jump() " + OTF_Error( "OTF_RBuffer_jump() " "ERROR: could not read far enough\n" ); */ return 0; @@ -761,7 +754,7 @@ int OTF_RBuffer_jump( OTF_RBuffer* rbuffer, uint64_t filepos ) { if ( 0 == OTF_RBuffer_guaranteeRecord( rbuffer ) ) { /* - OTF_fprintf( stderr, "OTF_RBuffer_jump() " + OTF_Error( "OTF_RBuffer_jump() " "ERROR: could not read far enough\n" ); */ @@ -821,7 +814,10 @@ uint64_t OTF_RBuffer_getCurrentTime( OTF_RBuffer* rbuffer ) { if ( rbuffer->time == (uint64_t) -1 ) { - OTF_fprintf( stderr, "Warning: Invalid time\n" ); + + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " + "Invalid time.", + __FUNCTION__, __FILE__, __LINE__ ); } return rbuffer->time; @@ -1024,10 +1020,6 @@ int OTF_RBuffer_getFileProperties( OTF_RBuffer* rbuffer ) { } - /* - OTF_fprintf( stderr, "search min/max time\n" ); - */ - /* get very first timestamp easily */ rbuffer->firstTime= rbuffer->time; @@ -1037,7 +1029,7 @@ int OTF_RBuffer_getFileProperties( OTF_RBuffer* rbuffer ) { searchRange= ( SEARCH_RANGE < rbuffer->size ) ? SEARCH_RANGE : rbuffer->size; if( 0 >= searchRange ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "the search range is not allowed to be '0'.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1053,7 +1045,6 @@ int OTF_RBuffer_getFileProperties( OTF_RBuffer* rbuffer ) { ( pos > searchRange ) ) { pos= pos - searchRange; - OTF_fprintf( stderr, " jumppos: %llu\n", (long long unsigned) pos ); }*/ while ( ( !OTF_RBuffer_jump( rbuffer, pos ) ) && ( pos > 0 ) ) { @@ -1102,7 +1093,7 @@ int OTF_RBuffer_searchTime( OTF_RBuffer* rbuffer, uint64_t time ) { if ( 0 == OTF_RBuffer_getFileProperties( rbuffer ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "could not determine file size (%llu) or first/last time" " (%llx/%llx)\n", __FUNCTION__, __FILE__, __LINE__, @@ -1126,7 +1117,7 @@ int OTF_RBuffer_searchTime( OTF_RBuffer* rbuffer, uint64_t time ) { ret= OTF_RBuffer_jump( rbuffer, posA ); if ( 1 != ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "unsuccessful jump to begin pos= %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) posA ); @@ -1184,7 +1175,7 @@ int OTF_RBuffer_searchTime( OTF_RBuffer* rbuffer, uint64_t time ) { ret= OTF_RBuffer_jump( rbuffer, posA ); if ( 1 != ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "unsuccessful jump to begin pos= %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) posA ); @@ -1197,7 +1188,7 @@ int OTF_RBuffer_searchTime( OTF_RBuffer* rbuffer, uint64_t time ) { ret2= OTF_RBuffer_getRecord( rbuffer ); if( NULL == ret2 ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_getRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1429,30 +1420,30 @@ uint32_t OTF_RBuffer_readKeyValueList(OTF_RBuffer* buffer ) { break; case OTF_BYTE_ARRAY: pair.value.otf_byte_array.len = OTF_RBuffer_readBytes( buffer, - pair.value.otf_byte_array.array, OTF_KEYVALUE_MAX_ARRAY_LEN); - - if( pair.value.otf_byte_array.len > OTF_KEYVALUE_MAX_ARRAY_LEN ) { - - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " - "byte-array exceeds the maximum length of %u bytes per line.\n", - __FUNCTION__, __FILE__, __LINE__, OTF_KEYVALUE_MAX_ARRAY_LEN ); - - PARSE_ERROR( buffer ); - - return 0; - - } - - if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_LOCAL_LENGTH ) || - OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_LOCAL_LENGTH ) ) { + pair.value.otf_byte_array.array, OTF_KEYVALUE_MAX_ARRAY_LEN); - pair.value.otf_byte_array.len = OTF_RBuffer_readUint32( buffer ); + if( pair.value.otf_byte_array.len > OTF_KEYVALUE_MAX_ARRAY_LEN ) { + + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " + "byte-array exceeds the maximum length of %u bytes per line.\n", + __FUNCTION__, __FILE__, __LINE__, OTF_KEYVALUE_MAX_ARRAY_LEN ); + + PARSE_ERROR( buffer ); + + return 0; + + } + + if ( OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_S_LOCAL_LENGTH ) || + OTF_RBuffer_testKeyword( buffer, OTF_KEYWORD_L_LOCAL_LENGTH ) ) { + + pair.value.otf_byte_array.len = OTF_RBuffer_readUint32( buffer ); + + } - } - break; default: - + /* Pasre error */ PARSE_ERROR( buffer ); @@ -1460,7 +1451,7 @@ uint32_t OTF_RBuffer_readKeyValueList(OTF_RBuffer* buffer ) { } } else { - + /* Parse error */ PARSE_ERROR( buffer ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.h index 8305cfca61..b3744b14d0 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RBuffer.h @@ -29,7 +29,7 @@ /* *** some macros *** ****************************************** */ #define PARSE_ERROR( buffer ) \ - OTF_fprintf( stderr, "parse error in %s() %s:%u : %s\n", \ + OTF_Error( "Parse error in function %s, file: %s, line: %i:\n %s\n", \ __FUNCTION__, __FILE__, __LINE__, OTF_RBuffer_printRecord( buffer ) ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RStream.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RStream.c index 7a5515d3ba..eec4e669fe 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RStream.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_RStream.c @@ -72,7 +72,7 @@ int OTF_RStream_finish( OTF_RStream* rstream ) { ret &= tmpret; if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot close defbuffer.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -85,7 +85,7 @@ int OTF_RStream_finish( OTF_RStream* rstream ) { ret &= tmpret; if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot close event buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -99,7 +99,7 @@ int OTF_RStream_finish( OTF_RStream* rstream ) { ret &= tmpret; if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot close snapshots buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -113,7 +113,7 @@ int OTF_RStream_finish( OTF_RStream* rstream ) { ret &= tmpret; if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot close statistics buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -127,7 +127,7 @@ int OTF_RStream_finish( OTF_RStream* rstream ) { ret &= tmpret; if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot close defbuffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -147,7 +147,7 @@ OTF_RStream* OTF_RStream_open( const char* namestub, uint32_t id, OTF_FileManage /* Check the input parameters */ if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -157,7 +157,7 @@ OTF_RStream* OTF_RStream_open( const char* namestub, uint32_t id, OTF_FileManage ret= (OTF_RStream*) malloc( sizeof(OTF_RStream) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -184,7 +184,7 @@ int OTF_RStream_close( OTF_RStream* rstream ) { if( NULL == rstream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "rstream has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -212,7 +212,7 @@ OTF_RBuffer* OTF_RStream_getDefBuffer( OTF_RStream* rstream ) { rstream->id, OTF_FILETYPE_DEF, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -266,7 +266,7 @@ int OTF_RStream_closeDefBuffer( OTF_RStream* rstream ) { ret&= OTF_RBuffer_close( rstream->defBuffer ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "closing defbuffer failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -291,7 +291,7 @@ OTF_RBuffer* OTF_RStream_getEventBuffer( OTF_RStream* rstream ) { rstream->id, OTF_FILETYPE_EVENT, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed for event file with name stub '%s' and ID %u.\n", __FUNCTION__, __FILE__, __LINE__, rstream->namestub, rstream->id ); @@ -305,7 +305,7 @@ OTF_RBuffer* OTF_RStream_getEventBuffer( OTF_RStream* rstream ) { if ( NULL == rstream->eventBuffer ) { /* *** commented because it can happen when file not exists - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_open() failed for filename '%s'.\n", __FUNCTION__, __FILE__, __LINE__, filename ); */ @@ -333,7 +333,7 @@ int OTF_RStream_closeEventBuffer( OTF_RStream* rstream ) { ret&= OTF_RBuffer_close( rstream->eventBuffer ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "closing event buffer failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -357,7 +357,7 @@ OTF_RBuffer* OTF_RStream_getSnapsBuffer( OTF_RStream* rstream ) { rstream->id, OTF_FILETYPE_SNAPS, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -393,7 +393,7 @@ int OTF_RStream_closeSnapsBuffer( OTF_RStream* rstream ) { ret&= OTF_RBuffer_close( rstream->snapsBuffer ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "closing snapshots buffer failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -417,7 +417,7 @@ OTF_RBuffer* OTF_RStream_getStatsBuffer( OTF_RStream* rstream ) { rstream->id, OTF_FILETYPE_STATS, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -454,7 +454,7 @@ int OTF_RStream_closeStatsBuffer( OTF_RStream* rstream ) { ret&= OTF_RBuffer_close( rstream->statsBuffer ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "closing statistics buffer failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -478,7 +478,7 @@ OTF_RBuffer* OTF_RStream_getMarkerBuffer( OTF_RStream* rstream ) { rstream->id, OTF_FILETYPE_MARKER, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -514,7 +514,7 @@ int OTF_RStream_closeMarkerBuffer( OTF_RStream* rstream ) { ret&= OTF_RBuffer_close( rstream->markerBuffer ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "closing defbuffer failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -532,7 +532,7 @@ void OTF_RStream_setBufferSizes( OTF_RStream* rstream, uint32_t size ) { if ( 50 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended buffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -540,13 +540,13 @@ void OTF_RStream_setBufferSizes( OTF_RStream* rstream, uint32_t size ) { } else if ( 500 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -570,7 +570,7 @@ void OTF_RStream_setZBufferSizes( OTF_RStream* rstream, uint32_t size ) { if ( 32 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended zbuffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -578,13 +578,13 @@ void OTF_RStream_setZBufferSizes( OTF_RStream* rstream, uint32_t size ) { } else if ( 512 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -612,7 +612,7 @@ void OTF_RStream_setRecordLimit( OTF_RStream* rstream, uint64_t limit ) { if( limit == OTF_READ_ERROR ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "limit cannot be set to %llu. Reset to OTF_READ_MAXRECORDS.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) limit ); @@ -651,7 +651,7 @@ uint64_t OTF_RStream_readDefinitions( OTF_RStream* rstream, OTF_HandlerArray* ha if ( NULL == rstream->defBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "the stream has no def buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -716,7 +716,7 @@ uint64_t OTF_RStream_readEvents( OTF_RStream* rstream, OTF_HandlerArray* handler rstream->eventBuffer= OTF_RStream_getEventBuffer( rstream ); if( NULL == rstream->eventBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "the stream has no event buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -745,7 +745,7 @@ uint64_t OTF_RStream_readEvents( OTF_RStream* rstream, OTF_HandlerArray* handler if ( oldtime > currenttime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "Time does decrease!!! %llu < %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) currenttime, @@ -802,7 +802,7 @@ uint64_t OTF_RStream_readSnapshots( OTF_RStream* rstream, OTF_HandlerArray* hand rstream->snapsBuffer= OTF_RStream_getSnapsBuffer( rstream ); if( NULL == rstream->snapsBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "the stream has no snapshots buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -831,7 +831,7 @@ uint64_t OTF_RStream_readSnapshots( OTF_RStream* rstream, OTF_HandlerArray* hand if ( oldtime > currenttime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "Time does decrease!!! %llu < %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) currenttime, @@ -888,7 +888,7 @@ uint64_t OTF_RStream_readStatistics( OTF_RStream* rstream, OTF_HandlerArray* han rstream->statsBuffer= OTF_RStream_getStatsBuffer( rstream ); if( NULL == rstream->statsBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "the stream has no statistics buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -917,7 +917,7 @@ uint64_t OTF_RStream_readStatistics( OTF_RStream* rstream, OTF_HandlerArray* han if ( oldtime > currenttime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "Time does decrease!!! %llu < %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) currenttime, @@ -972,7 +972,7 @@ uint64_t OTF_RStream_readMarker( OTF_RStream* rstream, OTF_HandlerArray* handler if ( NULL == rstream->markerBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "the stream has no def buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Reader.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Reader.c index 64c45cf2ca..29575fc64d 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Reader.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Reader.c @@ -190,7 +190,7 @@ int OTF_ProcessList_init( OTF_ProcessList* list, OTF_MasterControl* mc ) { n= OTF_MasterControl_getrCount( mc ); if ( 0 >= n ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no process has been defined in the master control.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -202,7 +202,7 @@ int OTF_ProcessList_init( OTF_ProcessList* list, OTF_MasterControl* mc ) { list->processes= (uint32_t*) malloc( n * sizeof(uint32_t) ); if( NULL == list->processes ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -212,7 +212,7 @@ int OTF_ProcessList_init( OTF_ProcessList* list, OTF_MasterControl* mc ) { list->status= (uint8_t*) malloc( n * sizeof(uint8_t) ); if( NULL == list->status ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -228,7 +228,7 @@ int OTF_ProcessList_init( OTF_ProcessList* list, OTF_MasterControl* mc ) { pair= OTF_MasterControl_getREntryByIndex( mc, i ); if( NULL == pair ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_getREntryByIndex() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -259,7 +259,7 @@ int OTF_ProcessList_init( OTF_ProcessList* list, OTF_MasterControl* mc ) { if ( 1 != test ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "process list from MasterControl is not sorted.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -425,7 +425,7 @@ int OTF_Heap_initEventHeap( OTF_Heap* heap, OTF_Reader* reader ) { heap->buffers= (OTF_RBuffer**) malloc( heap->s * sizeof(OTF_RBuffer*) ); if( NULL == heap->buffers ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -437,7 +437,7 @@ int OTF_Heap_initEventHeap( OTF_Heap* heap, OTF_Reader* reader ) { entry= OTF_MasterControl_getEntryByIndex( reader->mc, i ); if( NULL == entry ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_getEntryByIndex() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -461,7 +461,7 @@ int OTF_Heap_initEventHeap( OTF_Heap* heap, OTF_Reader* reader ) { stream= OTF_Reader_getStream( reader, entry->argument ); if( NULL == stream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot get stream '%llu'\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) entry->argument ); @@ -480,7 +480,7 @@ int OTF_Heap_initEventHeap( OTF_Heap* heap, OTF_Reader* reader ) { } else { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot open event buffer of stream %u.\n", __FUNCTION__, __FILE__, __LINE__, entry->argument ); @@ -520,7 +520,7 @@ int OTF_Heap_initDefHeap( OTF_Heap* heap, OTF_Reader* reader ) { heap->buffers= (OTF_RBuffer**) malloc( heap->s * sizeof(OTF_RBuffer*) ); if( NULL == heap->buffers ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -547,7 +547,7 @@ int OTF_Heap_initDefHeap( OTF_Heap* heap, OTF_Reader* reader ) { entry= OTF_MasterControl_getEntryByIndex( reader->mc, i ); if( NULL == entry ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_getEntryByIndex() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -570,7 +570,7 @@ int OTF_Heap_initDefHeap( OTF_Heap* heap, OTF_Reader* reader ) { stream= OTF_Reader_getStream( reader, entry->argument ); if( NULL == stream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot get stream '%llu'\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) entry->argument ); @@ -613,7 +613,7 @@ int OTF_Heap_initStatisticsHeap( OTF_Heap* heap, OTF_Reader* reader ) { heap->buffers= (OTF_RBuffer**) malloc( heap->s * sizeof(OTF_RBuffer*) ); if( NULL == heap->buffers ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -625,7 +625,7 @@ int OTF_Heap_initStatisticsHeap( OTF_Heap* heap, OTF_Reader* reader ) { entry= OTF_MasterControl_getEntryByIndex( reader->mc, i ); if( NULL == entry ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_getEntryByIndex() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -649,7 +649,7 @@ int OTF_Heap_initStatisticsHeap( OTF_Heap* heap, OTF_Reader* reader ) { stream= OTF_Reader_getStream( reader, entry->argument ); if( NULL == stream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot get stream '%llu'\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) entry->argument ); @@ -698,7 +698,7 @@ int OTF_Heap_initSnapshotsHeap( OTF_Heap* heap, OTF_Reader* reader ) { heap->buffers= (OTF_RBuffer**) malloc( heap->s * sizeof(OTF_RBuffer*) ); if( NULL == heap->buffers ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -710,7 +710,7 @@ int OTF_Heap_initSnapshotsHeap( OTF_Heap* heap, OTF_Reader* reader ) { entry= OTF_MasterControl_getEntryByIndex( reader->mc, i ); if( NULL == entry ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_getEntryByIndex() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -734,7 +734,7 @@ int OTF_Heap_initSnapshotsHeap( OTF_Heap* heap, OTF_Reader* reader ) { stream= OTF_Reader_getStream( reader, entry->argument ); if( NULL == stream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot get stream '%llu'\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) entry->argument ); @@ -783,7 +783,7 @@ int OTF_Heap_initMarkerHeap( OTF_Heap* heap, OTF_Reader* reader ) { heap->buffers= (OTF_RBuffer**) malloc( heap->s * sizeof(OTF_RBuffer*) ); if( NULL == heap->buffers ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -810,7 +810,7 @@ int OTF_Heap_initMarkerHeap( OTF_Heap* heap, OTF_Reader* reader ) { entry= OTF_MasterControl_getEntryByIndex( reader->mc, i ); if( NULL == entry ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_getEntryByIndex() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -833,7 +833,7 @@ int OTF_Heap_initMarkerHeap( OTF_Heap* heap, OTF_Reader* reader ) { stream= OTF_Reader_getStream( reader, entry->argument ); if( NULL == stream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot get stream '%llu'\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) entry->argument ); @@ -993,10 +993,10 @@ int OTF_Heap_checksorted( OTF_Heap* heap ) { /* - OTF_fprintf( stderr, "%u-heap with %u entries:\n", HEAP_CHILDRENCOUNT, heap->n ); + fprintf( stderr, "%u-heap with %u entries:\n", HEAP_CHILDRENCOUNT, heap->n ); for ( i= 0; i < heap->n; i++ ) { - OTF_fprintf( stderr, "%i: %llu\n", i, heap->buffers[i]->time ); + fprintf( stderr, "%i: %llu\n", i, heap->buffers[i]->time ); } */ @@ -1156,7 +1156,7 @@ OTF_Reader* OTF_Reader_open( const char* namestub, OTF_FileManager* manager ) { ret = (OTF_Reader*) malloc( sizeof( OTF_Reader ) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1169,7 +1169,7 @@ OTF_Reader* OTF_Reader_open( const char* namestub, OTF_FileManager* manager ) { if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1184,7 +1184,7 @@ OTF_Reader* OTF_Reader_open( const char* namestub, OTF_FileManager* manager ) { ret->mc= OTF_MasterControl_new( manager ); if( NULL == ret->mc ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_new() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1199,7 +1199,7 @@ OTF_Reader* OTF_Reader_open( const char* namestub, OTF_FileManager* manager ) { read= OTF_MasterControl_read( ret->mc, ret->namestub ); if( 0 == read ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_read() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1213,7 +1213,7 @@ OTF_Reader* OTF_Reader_open( const char* namestub, OTF_FileManager* manager ) { ret->processList= (OTF_ProcessList*) malloc( sizeof(OTF_ProcessList) ); if( NULL == ret->processList ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1228,7 +1228,7 @@ OTF_Reader* OTF_Reader_open( const char* namestub, OTF_FileManager* manager ) { if( 0 == OTF_ProcessList_init( ret->processList, ret->mc ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_ProcessList_init() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1249,7 +1249,7 @@ int OTF_Reader_close( OTF_Reader* reader ) { int ret=1; if( NULL == reader ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no reader has been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1269,7 +1269,7 @@ int OTF_Reader_setBufferSizes( OTF_Reader* reader, uint32_t size ) { if ( 50 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended buffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -1277,20 +1277,20 @@ int OTF_Reader_setBufferSizes( OTF_Reader* reader, uint32_t size ) { } else if ( 500 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } reader->buffersizes= size; - + return 1; } @@ -1309,7 +1309,7 @@ void OTF_Reader_setZBufferSizes( OTF_Reader* reader, uint32_t size ) { if ( 32 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended zbuffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -1317,13 +1317,13 @@ void OTF_Reader_setZBufferSizes( OTF_Reader* reader, uint32_t size ) { } else if ( 512 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } @@ -1403,7 +1403,7 @@ OTF_RStream* OTF_Reader_getStream( OTF_Reader* reader, uint32_t id ) { reader->n * sizeof( OTF_RStream* ) ); if( NULL == reader->stream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1493,7 +1493,7 @@ uint64_t OTF_Reader_readDefinitions( OTF_Reader* reader, reader->definitionHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->definitionHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1502,7 +1502,7 @@ uint64_t OTF_Reader_readDefinitions( OTF_Reader* reader, if( 0 == OTF_Heap_initDefHeap( reader->definitionHeap, reader ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Heap_initDefHeap() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1564,7 +1564,7 @@ uint64_t OTF_Reader_readDefinitions( OTF_Reader* reader, handlers, streamId ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseDefRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1596,7 +1596,7 @@ uint64_t OTF_Reader_readDefinitions( OTF_Reader* reader, handlers, streamId ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownDefRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1652,7 +1652,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) reader->eventHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->eventHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1661,7 +1661,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) if( 0 == OTF_Heap_initEventHeap( reader->eventHeap, reader) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Heap_initEventHeap() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1717,7 +1717,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_searchTime() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1754,7 +1754,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) /* inlined OTF_RBuffer_getCurrentTime() */ if( reader->eventHeap->buffers[i]->time < reader->minTime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current time %llu < mintime %llu.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) reader->eventHeap->buffers[i]->time, @@ -1809,7 +1809,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) # ifdef OTF_DEBUG if ( oldtime > reader->eventHeap->progressCurrent ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "Time decreases. %llu < %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) reader->eventHeap->progressCurrent, @@ -1862,7 +1862,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) ret= OTF_Reader_readUnknownRecord( reader->eventHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1897,7 +1897,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) ret= OTF_Reader_parseEventRecord( reader->eventHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseEventRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1929,7 +1929,7 @@ uint64_t OTF_Reader_readEvents( OTF_Reader* reader, OTF_HandlerArray* handlers ) ret= OTF_Reader_readUnknownRecord( reader->eventHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1981,7 +1981,7 @@ uint64_t OTF_Reader_readEventsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ha reader->eventHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->eventHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1990,7 +1990,7 @@ uint64_t OTF_Reader_readEventsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ha if( 0 == OTF_Heap_initEventHeap( reader->eventHeap, reader) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Heap_initEventHeap() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2046,7 +2046,7 @@ uint64_t OTF_Reader_readEventsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ha if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_searchTime() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2083,7 +2083,7 @@ uint64_t OTF_Reader_readEventsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ha /* inlined OTF_RBuffer_getCurrentTime() */ if( reader->eventHeap->buffers[i]->time < reader->minTime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current time %llu < mintime %llu.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) reader->eventHeap->buffers[i]->time, @@ -2170,7 +2170,7 @@ uint64_t OTF_Reader_readEventsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ha ret= OTF_Reader_readUnknownRecord( reader->eventHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2204,7 +2204,7 @@ uint64_t OTF_Reader_readEventsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ha ret= OTF_Reader_parseEventRecord( reader->eventHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseEventRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2235,7 +2235,7 @@ uint64_t OTF_Reader_readEventsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ha ret= OTF_Reader_readUnknownRecord( reader->eventHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2293,7 +2293,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, reader->snapshotsHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->snapshotsHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2302,7 +2302,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, if( 0 == OTF_Heap_initSnapshotsHeap( reader->snapshotsHeap, reader) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2361,7 +2361,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_searchTime() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2399,7 +2399,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, /* inlined OTF_RBuffer_getCurrentTime() */ if( reader->snapshotsHeap->buffers[i]->time < reader->minTime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current time %llu < mintime %llu.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) reader->snapshotsHeap->buffers[i]->time, @@ -2454,7 +2454,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, # ifdef OTF_DEBUG if ( oldtime > reader->snapshotsHeap->progressCurrent ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "Time does decrease. %llu < %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) reader->snapshotsHeap->progressCurrent, @@ -2508,7 +2508,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, ret= OTF_Reader_readUnknownRecord( reader->snapshotsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2542,7 +2542,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, ret= OTF_Reader_parseSnapshotsRecord( reader->snapshotsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseSnapshotsRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2573,7 +2573,7 @@ uint64_t OTF_Reader_readSnapshots( OTF_Reader* reader, ret= OTF_Reader_readUnknownRecord( reader->snapshotsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2625,7 +2625,7 @@ uint64_t OTF_Reader_readSnapshotsUnsorted( OTF_Reader* reader, OTF_HandlerArray* reader->snapshotsHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->snapshotsHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2634,7 +2634,7 @@ uint64_t OTF_Reader_readSnapshotsUnsorted( OTF_Reader* reader, OTF_HandlerArray* if( 0 == OTF_Heap_initSnapshotsHeap( reader->snapshotsHeap, reader) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Heap_initSnapshotsHeap() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2690,7 +2690,7 @@ uint64_t OTF_Reader_readSnapshotsUnsorted( OTF_Reader* reader, OTF_HandlerArray* if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_searchTime() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2727,7 +2727,7 @@ uint64_t OTF_Reader_readSnapshotsUnsorted( OTF_Reader* reader, OTF_HandlerArray* /* inlined OTF_RBuffer_getCurrentTime() */ if( reader->snapshotsHeap->buffers[i]->time < reader->minTime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current time %llu < mintime %llu.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) reader->snapshotsHeap->buffers[i]->time, @@ -2813,7 +2813,7 @@ uint64_t OTF_Reader_readSnapshotsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ret= OTF_Reader_readUnknownRecord( reader->snapshotsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2847,7 +2847,7 @@ uint64_t OTF_Reader_readSnapshotsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ret= OTF_Reader_parseSnapshotsRecord( reader->snapshotsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseSnapshotsRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2878,7 +2878,7 @@ uint64_t OTF_Reader_readSnapshotsUnsorted( OTF_Reader* reader, OTF_HandlerArray* ret= OTF_Reader_readUnknownRecord( reader->snapshotsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2934,7 +2934,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, reader->statisticsHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->statisticsHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -2943,7 +2943,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, if( 0 == OTF_Heap_initStatisticsHeap( reader->statisticsHeap, reader) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Heap_initStatisticsHeap() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3003,7 +3003,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, reader->statisticsHeap->buffers[i] ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_searchTime() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3041,7 +3041,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, /* inlined OTF_RBuffer_getCurrentTime() */ if( reader->statisticsHeap->buffers[i]->time < reader->minTime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current time %llu < mintime %llu.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) reader->statisticsHeap->buffers[i]->time, @@ -3097,7 +3097,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, # ifdef OTF_DEBUG if ( oldtime > reader->statisticsHeap->progressCurrent ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "Time does decrease. %llu < %llu.\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long) reader->statisticsHeap->progressCurrent, @@ -3151,7 +3151,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, ret= OTF_Reader_readUnknownRecord( reader->statisticsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3185,7 +3185,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, ret= OTF_Reader_parseStatisticsRecord( reader->statisticsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseStatisticsRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3216,7 +3216,7 @@ uint64_t OTF_Reader_readStatistics( OTF_Reader* reader, ret= OTF_Reader_readUnknownRecord( reader->statisticsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3271,7 +3271,7 @@ uint64_t OTF_Reader_readStatisticsUnsorted( OTF_Reader* reader, OTF_HandlerArray reader->statisticsHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->statisticsHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3280,7 +3280,7 @@ uint64_t OTF_Reader_readStatisticsUnsorted( OTF_Reader* reader, OTF_HandlerArray if( 0 == OTF_Heap_initStatisticsHeap( reader->statisticsHeap, reader) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Heap_initStatisticsHeap() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3336,7 +3336,7 @@ uint64_t OTF_Reader_readStatisticsUnsorted( OTF_Reader* reader, OTF_HandlerArray if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_RBuffer_searchTime() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3373,7 +3373,7 @@ uint64_t OTF_Reader_readStatisticsUnsorted( OTF_Reader* reader, OTF_HandlerArray /* inlined OTF_RBuffer_getCurrentTime() */ if( reader->statisticsHeap->buffers[i]->time < reader->minTime ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "current time %llu < mintime %llu.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) reader->statisticsHeap->buffers[i]->time, @@ -3459,7 +3459,7 @@ uint64_t OTF_Reader_readStatisticsUnsorted( OTF_Reader* reader, OTF_HandlerArray ret= OTF_Reader_readUnknownRecord( reader->statisticsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3493,7 +3493,7 @@ uint64_t OTF_Reader_readStatisticsUnsorted( OTF_Reader* reader, OTF_HandlerArray ret= OTF_Reader_parseStatisticsRecord( reader->statisticsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseStatisticsRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3524,7 +3524,7 @@ uint64_t OTF_Reader_readStatisticsUnsorted( OTF_Reader* reader, OTF_HandlerArray ret= OTF_Reader_readUnknownRecord( reader->statisticsHeap->buffers[0], handlers ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3577,7 +3577,7 @@ uint64_t OTF_Reader_readMarkers( OTF_Reader* reader, OTF_HandlerArray* handlers reader->markerHeap= (OTF_Heap*) malloc( sizeof(OTF_Heap) ); if( NULL == reader->markerHeap ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3586,7 +3586,7 @@ uint64_t OTF_Reader_readMarkers( OTF_Reader* reader, OTF_HandlerArray* handlers if( 0 == OTF_Heap_initMarkerHeap( reader->markerHeap, reader ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Heap_initDefHeap() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3647,7 +3647,7 @@ uint64_t OTF_Reader_readMarkers( OTF_Reader* reader, OTF_HandlerArray* handlers ret= OTF_Reader_parseMarkerRecord( reader->markerHeap->buffers[i], handlers, streamId ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_parseMarkerRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3678,7 +3678,7 @@ uint64_t OTF_Reader_readMarkers( OTF_Reader* reader, OTF_HandlerArray* handlers ret= OTF_Reader_readUnknownMarkerRecord( reader->markerHeap->buffers[i], handlers, streamId ); if ( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Reader_readUnknownMarkerRecord() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -3731,7 +3731,7 @@ void OTF_Reader_setRecordLimit( OTF_Reader* reader, uint64_t limit ) { if( limit == OTF_READ_ERROR ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "limit cannot be set to %llu. Reset to OTF_READ_MAXRECORDS.\n", __FUNCTION__, __FILE__, __LINE__, (long long unsigned) limit ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h index d4439ef392..bc07ac71fb 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h @@ -17,8 +17,8 @@ #define OTF_VERSION_MAJOR 1 -#define OTF_VERSION_MINOR 9 -#define OTF_VERSION_SUB 1 +#define OTF_VERSION_MINOR 10 +#define OTF_VERSION_SUB 0 #define OTF_VERSION_STRING "openmpi" /** diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WBuffer.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WBuffer.c index 74943a6a2b..c9facbabca 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WBuffer.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WBuffer.c @@ -55,7 +55,7 @@ int OTF_WBuffer_finish( OTF_WBuffer* wbuffer ) { /* buffer shall be empty now */ if( 0 != wbuffer->pos ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "buffer is not empty (but is supposed to).\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -92,7 +92,7 @@ int OTF_WBuffer_close( OTF_WBuffer* wbuffer ) { int tmpret= OTF_WBuffer_flush( wbuffer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_flush() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -101,7 +101,7 @@ int OTF_WBuffer_close( OTF_WBuffer* wbuffer ) { tmpret= OTF_File_close( wbuffer->file ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_close() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -110,7 +110,7 @@ int OTF_WBuffer_close( OTF_WBuffer* wbuffer ) { tmpret= OTF_WBuffer_finish( wbuffer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_finish() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -130,7 +130,7 @@ int OTF_WBuffer_setSize( OTF_WBuffer* wbuffer, size_t size ) { if ( size < wbuffer->size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "cannot shrink buffer from %u to %u.\n", __FUNCTION__, __FILE__, __LINE__, (uint32_t) wbuffer->size, (uint32_t) size ); @@ -142,7 +142,7 @@ int OTF_WBuffer_setSize( OTF_WBuffer* wbuffer, size_t size ) { size * sizeof(char) ); if( NULL == wbuffer->buffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -162,7 +162,7 @@ void OTF_WBuffer_setZBufferSize( OTF_WBuffer* wbuffer, uint32_t size ) { if ( 32 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended zbuffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -170,13 +170,13 @@ void OTF_WBuffer_setZBufferSize( OTF_WBuffer* wbuffer, uint32_t size ) { } else if ( 512 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } @@ -201,7 +201,7 @@ int OTF_WBuffer_flush( OTF_WBuffer* wbuffer ) { ret= OTF_File_write( wbuffer->file, wbuffer->buffer, wbuffer->pos ); if( ret != wbuffer->pos ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_write() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); retval= 0; @@ -225,7 +225,7 @@ int OTF_WBuffer_guarantee( OTF_WBuffer* wbuffer, size_t space ) { if ( space > wbuffer->size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "requested %u bytes > buffer size %u.\n", __FUNCTION__, __FILE__, __LINE__, (uint32_t) space, wbuffer->size ); @@ -235,7 +235,7 @@ int OTF_WBuffer_guarantee( OTF_WBuffer* wbuffer, size_t space ) { if( 0 == OTF_WBuffer_flush( wbuffer ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_flush() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -291,7 +291,7 @@ int OTF_WBuffer_setTimeAndProcess( OTF_WBuffer* wbuffer, there should be _no_ way to avoid this error message! */ if ( ( (uint64_t) -1 ) != wbuffer->time ) { - OTF_fprintf( stderr, "OTF ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "time not increasing. (t= %llu, p= %u)\n", __FUNCTION__, __FILE__, __LINE__, (unsigned long long int) t, (unsigned int) p ); @@ -320,7 +320,7 @@ uint32_t OTF_WBuffer_writeKeyword( OTF_WBuffer* wbuffer, int ret= OTF_WBuffer_guarantee( wbuffer, l ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -348,7 +348,7 @@ uint32_t OTF_WBuffer_writeString( OTF_WBuffer* wbuffer, const char* string ) { if( 0 == OTF_WBuffer_guarantee( wbuffer, l+2 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -388,7 +388,7 @@ uint32_t OTF_WBuffer_writeChar( OTF_WBuffer* wbuffer, const char character ) { if( 0 == OTF_WBuffer_guarantee( wbuffer, 1 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -419,7 +419,7 @@ uint32_t OTF_WBuffer_writeUint8( OTF_WBuffer* wbuffer, uint8_t value ) { /* at max 2 digits will be written */ if( 0 == OTF_WBuffer_guarantee( wbuffer, 2 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -467,7 +467,7 @@ uint32_t OTF_WBuffer_writeUint16( OTF_WBuffer* wbuffer, uint16_t value ) { /* at max 4 digits will be written */ if( 0 == OTF_WBuffer_guarantee( wbuffer, 4 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -514,7 +514,7 @@ uint32_t OTF_WBuffer_writeUint32( OTF_WBuffer* wbuffer, uint32_t value ) { /* at max 8 digits will be written */ if( 0 == OTF_WBuffer_guarantee( wbuffer, 8 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -561,7 +561,7 @@ uint32_t OTF_WBuffer_writeUint64( OTF_WBuffer* wbuffer, uint64_t value ) { /* at max 16 digits will be written */ if( 0 == OTF_WBuffer_guarantee( wbuffer, 16 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -599,7 +599,7 @@ uint32_t OTF_WBuffer_writeNewline( OTF_WBuffer* wbuffer ) { if( 0 == OTF_WBuffer_guarantee( wbuffer, 1 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -626,7 +626,7 @@ uint32_t OTF_WBuffer_writeBytes( OTF_WBuffer* wbuffer, const uint8_t *value, uin /* at max 2 * len digits will be written */ if( 0 == OTF_WBuffer_guarantee( wbuffer, len*2 ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_guarantee() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -867,7 +867,7 @@ OTF_WBuffer* OTF_WBuffer_open_zlevel( const char* filename, OTF_WBuffer* ret= (OTF_WBuffer*) malloc( sizeof(OTF_WBuffer) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -878,7 +878,7 @@ OTF_WBuffer* OTF_WBuffer_open_zlevel( const char* filename, if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -890,7 +890,7 @@ OTF_WBuffer* OTF_WBuffer_open_zlevel( const char* filename, ret->file= OTF_File_open_zlevel( filename, manager, OTF_FILEMODE_WRITE, compression ); if( NULL == ret->file ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_File_open() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.c index 1ed281fd76..03281282af 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.c @@ -84,7 +84,7 @@ int OTF_WStream_finish( OTF_WStream* wstream ) { # else tmpret= OTF_WBuffer_close( wstream->defBuffer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_close() failed for the def buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -101,7 +101,7 @@ int OTF_WStream_finish( OTF_WStream* wstream ) { # else tmpret= OTF_WBuffer_close( wstream->eventBuffer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_close() failed for the event buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -118,7 +118,7 @@ int OTF_WStream_finish( OTF_WStream* wstream ) { # else tmpret= OTF_WBuffer_close( wstream->snapsBuffer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_close() failed for the snapshots buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -135,7 +135,7 @@ int OTF_WStream_finish( OTF_WStream* wstream ) { # else tmpret= OTF_WBuffer_close( wstream->statsBuffer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_close() failed for the statistics buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -152,7 +152,7 @@ int OTF_WStream_finish( OTF_WStream* wstream ) { # else tmpret= OTF_WBuffer_close( wstream->markerBuffer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_close() failed for the statistics buffer.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -174,7 +174,7 @@ OTF_WStream* OTF_WStream_open( const char* namestub, uint32_t id, if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -184,7 +184,7 @@ OTF_WStream* OTF_WStream_open( const char* namestub, uint32_t id, ret= (OTF_WStream*) malloc( sizeof(OTF_WStream) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -213,7 +213,7 @@ int OTF_WStream_close( OTF_WStream* wstream ) { ret= OTF_WStream_finish( wstream ); if( 0 == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WStream_finish() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -277,7 +277,7 @@ OTF_WBuffer* OTF_WStream_getDefBuffer( OTF_WStream* wstream ) { wstream->id, OTF_FILETYPE_DEF, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -293,7 +293,7 @@ OTF_WBuffer* OTF_WStream_getDefBuffer( OTF_WStream* wstream ) { if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -305,7 +305,7 @@ OTF_WBuffer* OTF_WStream_getDefBuffer( OTF_WStream* wstream ) { if( NULL == wstream->defBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_open() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -325,7 +325,7 @@ OTF_WBuffer* OTF_WStream_getDefBuffer( OTF_WStream* wstream ) { if( 0 == OTF_WBuffer_setSize( wstream->defBuffer, wstream->buffersizes ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_setSize() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -353,7 +353,7 @@ OTF_WBuffer* OTF_WStream_getEventBuffer( OTF_WStream* wstream ) { wstream->id, OTF_FILETYPE_EVENT, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -369,7 +369,7 @@ OTF_WBuffer* OTF_WStream_getEventBuffer( OTF_WStream* wstream ) { if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -381,7 +381,7 @@ OTF_WBuffer* OTF_WStream_getEventBuffer( OTF_WStream* wstream ) { if( NULL == wstream->eventBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_open() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -401,7 +401,7 @@ OTF_WBuffer* OTF_WStream_getEventBuffer( OTF_WStream* wstream ) { if( 0 == OTF_WBuffer_setSize( wstream->eventBuffer, wstream->buffersizes ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_setSize() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -429,7 +429,7 @@ OTF_WBuffer* OTF_WStream_getSnapshotBuffer( OTF_WStream* wstream ) { wstream->id, OTF_FILETYPE_SNAPS, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -445,7 +445,7 @@ OTF_WBuffer* OTF_WStream_getSnapshotBuffer( OTF_WStream* wstream ) { if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -456,7 +456,7 @@ OTF_WBuffer* OTF_WStream_getSnapshotBuffer( OTF_WStream* wstream ) { wstream->compression ); if( NULL == wstream->snapsBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_open() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -476,7 +476,7 @@ OTF_WBuffer* OTF_WStream_getSnapshotBuffer( OTF_WStream* wstream ) { if( 0 == OTF_WBuffer_setSize( wstream->snapsBuffer, wstream->buffersizes ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_setSize() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -504,7 +504,7 @@ OTF_WBuffer* OTF_WStream_getStatsBuffer( OTF_WStream* wstream ) { wstream->id, OTF_FILETYPE_STATS, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -520,7 +520,7 @@ OTF_WBuffer* OTF_WStream_getStatsBuffer( OTF_WStream* wstream ) { if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -531,7 +531,7 @@ OTF_WBuffer* OTF_WStream_getStatsBuffer( OTF_WStream* wstream ) { wstream->compression ); if( NULL == wstream->statsBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_open() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -547,7 +547,7 @@ OTF_WBuffer* OTF_WStream_getStatsBuffer( OTF_WStream* wstream ) { if( 0 == OTF_WBuffer_setSize( wstream->statsBuffer, wstream->buffersizes ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_setSize() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -575,7 +575,7 @@ OTF_WBuffer* OTF_WStream_getMarkerBuffer( OTF_WStream* wstream ) { wstream->id, OTF_FILETYPE_MARKER, 0, NULL ); if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_getFilename() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -591,7 +591,7 @@ OTF_WBuffer* OTF_WStream_getMarkerBuffer( OTF_WStream* wstream ) { if( NULL == filename ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -602,7 +602,7 @@ OTF_WBuffer* OTF_WStream_getMarkerBuffer( OTF_WStream* wstream ) { wstream->compression ); if( NULL == wstream->markerBuffer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_open() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -618,7 +618,7 @@ OTF_WBuffer* OTF_WStream_getMarkerBuffer( OTF_WStream* wstream ) { if( 0 == OTF_WBuffer_setSize( wstream->markerBuffer, wstream->buffersizes ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_WBuffer_setSize() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -642,7 +642,7 @@ int OTF_WStream_setCompression( OTF_WStream* wstream, OTF_FileCompression } else { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "compression is no expected value (%u). ignored.\n", __FUNCTION__, __FILE__, __LINE__, compression ); @@ -679,20 +679,20 @@ void OTF_WStream_setBufferSizes( OTF_WStream* wstream, uint32_t size ) { if ( 50 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended buffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); return; } else if ( 500 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -716,7 +716,7 @@ void OTF_WStream_setZBufferSizes( OTF_WStream* wstream, uint32_t size ) { if ( 32 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended zbuffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -724,13 +724,13 @@ void OTF_WStream_setZBufferSizes( OTF_WStream* wstream, uint32_t size ) { } else if ( 512 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "zbuffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -758,7 +758,7 @@ void OTF_WStream_setFormat( OTF_WStream* wstream, uint32_t format ) { if ( format > 1 ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "unknown ouput format chosen.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -809,7 +809,9 @@ int OTF_WStream_writeDefinitionCommentKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefinitionComment( OTF_WStream* wstream, @@ -852,7 +854,9 @@ int OTF_WStream_writeDefTimerResolutionKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefTimerResolution( OTF_WStream* wstream, @@ -874,7 +878,7 @@ int OTF_WStream_writeDefProcessKV( OTF_WStream* wstream, uint32_t deftoken, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -930,7 +934,9 @@ int OTF_WStream_writeDefProcessKV( OTF_WStream* wstream, uint32_t deftoken, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefProcess( OTF_WStream* wstream, uint32_t deftoken, @@ -953,7 +959,7 @@ int OTF_WStream_writeDefProcessGroupKV( OTF_WStream* wstream, uint32_t deftoken, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1004,7 +1010,9 @@ int OTF_WStream_writeDefProcessGroupKV( OTF_WStream* wstream, uint32_t deftoken, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefProcessGroup( OTF_WStream* wstream, uint32_t deftoken, @@ -1067,7 +1075,9 @@ int OTF_WStream_writeDefAttributeListKV( OTF_WStream* wstream, uint32_t attr_tok } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefAttributeList( OTF_WStream* wstream, uint32_t attr_token, @@ -1118,7 +1128,9 @@ int OTF_WStream_writeDefProcessOrGroupAttributesKV( OTF_WStream* wstream, } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefProcessOrGroupAttributes( OTF_WStream* wstream, @@ -1140,7 +1152,7 @@ int OTF_WStream_writeDefFunctionKV( OTF_WStream* wstream, uint32_t deftoken, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1192,7 +1204,9 @@ int OTF_WStream_writeDefFunctionKV( OTF_WStream* wstream, uint32_t deftoken, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefFunction( OTF_WStream* wstream, uint32_t deftoken, @@ -1214,7 +1228,7 @@ int OTF_WStream_writeDefFunctionGroupKV( OTF_WStream* wstream, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1248,7 +1262,9 @@ int OTF_WStream_writeDefFunctionGroupKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefFunctionGroup( OTF_WStream* wstream, @@ -1270,7 +1286,7 @@ int OTF_WStream_writeDefCollectiveOperationKV( OTF_WStream* wstream, # ifdef OTF_DEBUG if( 0 == collOp ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1308,7 +1324,9 @@ int OTF_WStream_writeDefCollectiveOperationKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefCollectiveOperation( OTF_WStream* wstream, @@ -1331,7 +1349,7 @@ int OTF_WStream_writeDefCounterKV( OTF_WStream* wstream, uint32_t deftoken, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1377,7 +1395,9 @@ int OTF_WStream_writeDefCounterKV( OTF_WStream* wstream, uint32_t deftoken, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefCounter( OTF_WStream* wstream, uint32_t deftoken, @@ -1401,7 +1421,7 @@ int OTF_WStream_writeDefCounterGroupKV( OTF_WStream* wstream, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1435,7 +1455,9 @@ int OTF_WStream_writeDefCounterGroupKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefCounterGroup( OTF_WStream* wstream, @@ -1457,7 +1479,7 @@ int OTF_WStream_writeDefSclKV( OTF_WStream* wstream, uint32_t deftoken, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1495,7 +1517,9 @@ int OTF_WStream_writeDefSclKV( OTF_WStream* wstream, uint32_t deftoken, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefScl( OTF_WStream* wstream, uint32_t deftoken, @@ -1517,7 +1541,7 @@ int OTF_WStream_writeDefSclFileKV( OTF_WStream* wstream, uint32_t deftoken, # ifdef OTF_DEBUG if( 0 == deftoken ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1551,7 +1575,9 @@ int OTF_WStream_writeDefSclFileKV( OTF_WStream* wstream, uint32_t deftoken, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefSclFile( OTF_WStream* wstream, uint32_t deftoken, @@ -1603,7 +1629,9 @@ int OTF_WStream_writeDefVersionKV( OTF_WStream* wstream, uint8_t major, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefVersion( OTF_WStream* wstream, uint8_t major, @@ -1652,7 +1680,9 @@ int OTF_WStream_writeDefCreatorKV( OTF_WStream* wstream, const char* creator, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefCreator( OTF_WStream* wstream, const char* creator ) { @@ -1673,7 +1703,7 @@ int OTF_WStream_writeDefFileKV( OTF_WStream* wstream, uint32_t token, # ifdef OTF_DEBUG if( 0 == token ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1714,7 +1744,9 @@ int OTF_WStream_writeDefFileKV( OTF_WStream* wstream, uint32_t token, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefFile( OTF_WStream* wstream, uint32_t token, @@ -1735,7 +1767,7 @@ int OTF_WStream_writeDefFileGroupKV( OTF_WStream* wstream, uint32_t token, # ifdef OTF_DEBUG if( 0 == token ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "'0' is an invalid token.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -1772,7 +1804,9 @@ int OTF_WStream_writeDefFileGroupKV( OTF_WStream* wstream, uint32_t token, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefFileGroup( OTF_WStream* wstream, uint32_t token, @@ -1829,7 +1863,9 @@ int OTF_WStream_writeDefKeyValueKV( OTF_WStream* wstream, uint32_t key, OTF_Type OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefKeyValue( OTF_WStream* wstream, uint32_t key, @@ -1876,7 +1912,9 @@ int OTF_WStream_writeDefTimeRange( OTF_WStream* wstream, uint64_t minTime, OTF_WBuffer_writeNewline( buffer ); - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefCounterAssignments( OTF_WStream* wstream, @@ -1922,9 +1960,75 @@ int OTF_WStream_writeDefCounterAssignments( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } +int OTF_WStream_writeDefProcessSubstitutes( OTF_WStream* wstream, + uint32_t representative, uint32_t numberOfProcs, + const uint32_t* procs, OTF_KeyValueList* list ) { + + + unsigned int i; + OTF_WBuffer* buffer= OTF_WStream_getDefBuffer( wstream ); + + +# ifdef OTF_DEBUG + if( 0 == representative ) { + + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " + "'0' is an invalid token.\n", + __FUNCTION__, __FILE__, __LINE__ ); + } +# endif + + if ( OTF_WSTREAM_FORMAT_SHORT == wstream->format ) { + + OTF_WBuffer_writeKeyValueList_short(buffer, list); + + OTF_WBuffer_writeKeyword( buffer, + OTF_KEYWORD_S_DEF_PREFIX + OTF_KEYWORD_S_DEFPROCESSSUBSTITUTES ); + + OTF_WBuffer_writeUint32( buffer, representative ); + OTF_WBuffer_writeKeyword( buffer, OTF_KEYWORD_S_LOCAL_MEMBERS ); + + for ( i = 0; i < numberOfProcs; ++i ) { + + OTF_WBuffer_writeUint32( buffer, procs[i] ); + OTF_WBuffer_writeChar( buffer, ',' ); + } + + OTF_WBuffer_writeNewline( buffer ); + + } else if ( OTF_WSTREAM_FORMAT_LONG == wstream->format ) { + + OTF_WBuffer_writeKeyValueList_long(buffer, list); + + OTF_WBuffer_writeKeyword( buffer, + OTF_KEYWORD_L_DEF_PREFIX + OTF_KEYWORD_L_DEFPROCESSSUBSTITUTES " " ); + + OTF_WBuffer_writeUint32( buffer, representative ); + OTF_WBuffer_writeKeyword( buffer, " " OTF_KEYWORD_L_LOCAL_MEMBERS " " ); + + for ( i = 0; i < numberOfProcs; ++i ) { + + OTF_WBuffer_writeUint32( buffer, procs[i] ); + OTF_WBuffer_writeChar( buffer, ',' ); + } + + OTF_WBuffer_writeNewline( buffer ); + } + + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; +} + + + /* *** event record write handlers *** ************************************* */ int OTF_WStream_writeNoOpKV( OTF_WStream* wstream, uint64_t time, @@ -1957,7 +2061,9 @@ int OTF_WStream_writeNoOpKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeEnterKV( OTF_WStream* wstream, uint64_t time, @@ -2007,7 +2113,9 @@ int OTF_WStream_writeEnterKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeEnter( OTF_WStream* wstream, uint64_t time, @@ -2077,7 +2185,9 @@ int OTF_WStream_writeRecvMsgKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeRecvMsg( OTF_WStream* wstream, uint64_t time, @@ -2149,7 +2259,9 @@ int OTF_WStream_writeSendMsgKV( OTF_WStream* wstream, uint64_t time, uint32_t se OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeSendMsg( OTF_WStream* wstream, uint64_t time, uint32_t sender, @@ -2215,7 +2327,9 @@ int OTF_WStream_writeLeaveKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeLeave( OTF_WStream* wstream, uint64_t time, @@ -2262,7 +2376,9 @@ int OTF_WStream_writeCounterKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeCounter( OTF_WStream* wstream, uint64_t time, @@ -2338,7 +2454,9 @@ int OTF_WStream_writeCollectiveOperationKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeCollectiveOperation( OTF_WStream* wstream, uint64_t time, @@ -2363,8 +2481,7 @@ int OTF_WStream_writeBeginCollectiveOperationKV( OTF_WStream* wstream, /* buffer can be NULL if file-open fails */ if ( NULL == buffer ) return 0; - if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) - return 0; + if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) return 0; if( OTF_WSTREAM_FORMAT_SHORT == wstream->format ) { @@ -2429,7 +2546,9 @@ int OTF_WStream_writeBeginCollectiveOperationKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeBeginCollectiveOperation( OTF_WStream* wstream, @@ -2453,8 +2572,7 @@ int OTF_WStream_writeEndCollectiveOperationKV( OTF_WStream* wstream, /* buffer can be NULL if file-open fails */ if ( NULL == buffer ) return 0; - if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) - return 0; + if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) return 0; if( OTF_WSTREAM_FORMAT_SHORT == wstream->format ) { @@ -2479,7 +2597,9 @@ int OTF_WStream_writeEndCollectiveOperationKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeEndCollectiveOperation( OTF_WStream* wstream, @@ -2521,7 +2641,9 @@ int OTF_WStream_writeEventCommentKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeEventComment( OTF_WStream* wstream, uint64_t time, @@ -2562,7 +2684,9 @@ int OTF_WStream_writeBeginProcessKV( OTF_WStream* wstream, uint64_t time, } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeBeginProcess( OTF_WStream* wstream, uint64_t time, @@ -2603,7 +2727,9 @@ int OTF_WStream_writeEndProcessKV( OTF_WStream* wstream, uint64_t time, } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeEndProcess( OTF_WStream* wstream, uint64_t time, @@ -2679,7 +2805,9 @@ int OTF_WStream_writeFileOperationKV( OTF_WStream* wstream, uint64_t time, } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeFileOperation( OTF_WStream* wstream, uint64_t time, @@ -2700,8 +2828,7 @@ int OTF_WStream_writeBeginFileOperationKV( OTF_WStream* wstream, uint64_t time, /* buffer can be NULL if file-open fails */ if ( NULL == buffer ) return 0; - if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) - return 0; + if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) return 0; if( OTF_WSTREAM_FORMAT_SHORT == wstream->format ) { @@ -2739,7 +2866,9 @@ int OTF_WStream_writeBeginFileOperationKV( OTF_WStream* wstream, uint64_t time, } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeBeginFileOperation( OTF_WStream* wstream, uint64_t time, @@ -2761,8 +2890,7 @@ int OTF_WStream_writeEndFileOperationKV( OTF_WStream* wstream, uint64_t time, /* buffer can be NULL if file-open fails */ if ( NULL == buffer ) return 0; - if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) - return 0; + if( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) return 0; if( OTF_WSTREAM_FORMAT_SHORT == wstream->format ) { @@ -2819,7 +2947,9 @@ int OTF_WStream_writeEndFileOperationKV( OTF_WStream* wstream, uint64_t time, } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeEndFileOperation( OTF_WStream* wstream, uint64_t time, @@ -2894,7 +3024,9 @@ int OTF_WStream_writeRMAPutKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeRMAPut( OTF_WStream* wstream, uint64_t time, @@ -2969,7 +3101,9 @@ int OTF_WStream_writeRMAPutRemoteEndKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeRMAPutRemoteEnd( OTF_WStream* wstream, uint64_t time, @@ -3044,7 +3178,9 @@ int OTF_WStream_writeRMAGetKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeRMAGet( OTF_WStream* wstream, uint64_t time, @@ -3110,7 +3246,9 @@ int OTF_WStream_writeRMAEndKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeRMAEnd( OTF_WStream* wstream, uint64_t time, @@ -3157,7 +3295,9 @@ int OTF_WStream_writeSnapshotCommentKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeSnapshotComment( OTF_WStream* wstream, uint64_t time, @@ -3220,7 +3360,9 @@ int OTF_WStream_writeEnterSnapshotKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeEnterSnapshot( OTF_WStream* wstream, uint64_t time, @@ -3302,8 +3444,10 @@ int OTF_WStream_writeSendSnapshotKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - - return 1; + + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeSendSnapshot( OTF_WStream* wstream, uint64_t time, @@ -3374,8 +3518,9 @@ int OTF_WStream_writeOpenFileSnapshotKV( OTF_WStream* wstream,uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; - + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeOpenFileSnapshot( OTF_WStream* wstream,uint64_t time, @@ -3397,8 +3542,7 @@ int OTF_WStream_writeBeginCollopSnapshotKV( OTF_WStream* wstream, uint64_t time, /* buffer can be NULL if file-open fails */ if ( NULL == buffer ) return 0; - if ( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) - return 0; + if ( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) return 0; if ( OTF_WSTREAM_FORMAT_SHORT == wstream->format ) { @@ -3471,7 +3615,9 @@ int OTF_WStream_writeBeginCollopSnapshotKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeBeginCollopSnapshot( OTF_WStream* wstream, uint64_t time, @@ -3494,8 +3640,7 @@ int OTF_WStream_writeBeginFileOpSnapshotKV( OTF_WStream* wstream, uint64_t time, /* buffer can be NULL if file-open fails */ if ( NULL == buffer ) return 0; - if ( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) - return 0; + if ( 0 == OTF_WBuffer_setTimeAndProcess( buffer, time, process ) ) return 0; if ( OTF_WSTREAM_FORMAT_SHORT == wstream->format ) { @@ -3538,8 +3683,9 @@ int OTF_WStream_writeBeginFileOpSnapshotKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeBeginFileOpSnapshot( OTF_WStream* wstream, uint64_t time, @@ -3587,7 +3733,9 @@ int OTF_WStream_writeSummaryCommentKV( OTF_WStream* wstream, uint64_t time, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeSummaryComment( OTF_WStream* wstream, uint64_t time, @@ -3642,8 +3790,10 @@ int OTF_WStream_writeFunctionSummaryKV( OTF_WStream* wstream, } OTF_WBuffer_writeNewline( buffer ); - - return 1; + + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeFunctionSummary( OTF_WStream* wstream, @@ -3700,8 +3850,10 @@ int OTF_WStream_writeFunctionGroupSummaryKV( OTF_WStream* wstream, } OTF_WBuffer_writeNewline( buffer ); - - return 1; + + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeFunctionGroupSummary( OTF_WStream* wstream, @@ -3771,8 +3923,10 @@ int OTF_WStream_writeMessageSummaryKV( OTF_WStream* wstream, } OTF_WBuffer_writeNewline( buffer ); - - return 1; + + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeMessageSummary( OTF_WStream* wstream, @@ -3840,7 +3994,9 @@ int OTF_WStream_writeCollopSummaryKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeCollopSummary( OTF_WStream* wstream, @@ -3914,9 +4070,10 @@ int OTF_WStream_writeFileOperationSummaryKV( OTF_WStream* wstream, uint64_t time } OTF_WBuffer_writeNewline( buffer ); - - return 1; - + + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeFileOperationSummary( OTF_WStream* wstream, uint64_t time, @@ -3990,9 +4147,10 @@ int OTF_WStream_writeFileGroupOperationSummaryKV( OTF_WStream* wstream, uint64_t } OTF_WBuffer_writeNewline( buffer ); - - return 1; - + + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeFileGroupOperationSummary( OTF_WStream* wstream, uint64_t time, @@ -4046,7 +4204,9 @@ int OTF_WStream_writeDefMarkerKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeDefMarker( OTF_WStream* wstream, @@ -4103,7 +4263,9 @@ int OTF_WStream_writeMarkerKV( OTF_WStream* wstream, OTF_WBuffer_writeNewline( buffer ); } - return 1; + /* one or more of the last write operations could be failed; + check otf_errno for errors */ + return ( OTF_NO_ERROR == otf_errno ) ? 1 : 0; } int OTF_WStream_writeMarker( OTF_WStream* wstream, diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.h index 0f1b0abea6..c57d600ca6 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_WStream.h @@ -617,8 +617,9 @@ int OTF_WStream_writeDefKeyValue( OTF_WStream* wstream, uint32_t key, * @see OTF_Writer_writeDefKeyValueKV() * \ingroup wstream */ -int OTF_WStream_writeDefKeyValueKV( OTF_WStream* wstream, uint32_t key, OTF_Type type, - const char* name, const char *description, OTF_KeyValueList* list ); +int OTF_WStream_writeDefKeyValueKV( OTF_WStream* wstream, uint32_t key, + OTF_Type type, const char* name, const char *description, + OTF_KeyValueList* list ); /** Write a DEFTIMERANGE record including an OTF_KeyValueList to @@ -626,21 +627,27 @@ int OTF_WStream_writeDefKeyValueKV( OTF_WStream* wstream, uint32_t key, OTF_Type * @see OTF_Writer_writeDefTimeRange() * \ingroup wstream */ -int OTF_WStream_writeDefTimeRange( OTF_WStream* wstream, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* list ); +int OTF_WStream_writeDefTimeRange( OTF_WStream* wstream, uint64_t minTime, + uint64_t maxTime, OTF_KeyValueList* list ); /** Write a DEFCOUNTERASSIGNMENTS record including an OTF_KeyValueList to * stream 'wstream'. * @see OTF_Writer_writeDefCounterAssignments() * \ingroup wstream */ -int OTF_WStream_writeDefCounterAssignments( OTF_WStream* wstream, - uint32_t counter_token, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* list ); +int OTF_WStream_writeDefCounterAssignments( OTF_WStream* wstream, + uint32_t counter_token, uint32_t number_of_members, + const uint32_t* procs_or_groups, OTF_KeyValueList* list ); + +/** Write a DEFPROCESSSUBTITUTES record including an OTF_KeyValueList to + * stream 'wstream'. + * @see OTF_Writer_writeDefProcessSubsitutes() + * \ingroup wstream + */ +int OTF_WStream_writeDefProcessSubstitutes( OTF_WStream* wstream, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* list ); + /* *** event record write handlers *** ************************************* */ diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.c b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.c index 1cb218128c..dde144dddb 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.c +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.c @@ -112,7 +112,7 @@ int OTF_Writer_finish( OTF_Writer* writer ) { tmpret= OTF_MasterControl_write( writer->mc, writer->namestub ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_write() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -121,7 +121,7 @@ int OTF_Writer_finish( OTF_Writer* writer ) { tmpret= OTF_Writer_closeAllStreams( writer ); if( 0 == tmpret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Writer_closeAllStreams() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); } @@ -152,7 +152,7 @@ OTF_Writer* OTF_Writer_open( const char* namestub, uint32_t m, OTF_FileManager* if( NULL == manager ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "manager has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -162,7 +162,7 @@ OTF_Writer* OTF_Writer_open( const char* namestub, uint32_t m, OTF_FileManager* ret= (OTF_Writer*) malloc( sizeof(OTF_Writer) ); if( NULL == ret ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -180,7 +180,7 @@ OTF_Writer* OTF_Writer_open( const char* namestub, uint32_t m, OTF_FileManager* ret->mc= OTF_MasterControl_new( ret->manager ); if( NULL == ret->mc ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_new() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -201,7 +201,7 @@ int OTF_Writer_close( OTF_Writer* writer ) { if( NULL == writer ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "writer has not been specified.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -210,7 +210,7 @@ int OTF_Writer_close( OTF_Writer* writer ) { if( 0 == OTF_Writer_finish( writer ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_Writer_finish() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -239,7 +239,7 @@ int OTF_Writer_setCompression( OTF_Writer* writer, OTF_FileCompression } else { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "compression is no expected value (%u). ignored.\n", __FUNCTION__, __FILE__, __LINE__, compression ); @@ -276,7 +276,7 @@ void OTF_Writer_setBufferSizes( OTF_Writer* writer, uint32_t size ) { if ( 50 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended buffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -284,13 +284,13 @@ void OTF_Writer_setBufferSizes( OTF_Writer* writer, uint32_t size ) { } else if ( 500 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "buffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } @@ -313,7 +313,7 @@ void OTF_Writer_setZBufferSizes( OTF_Writer* writer, uint32_t size ) { if ( 32 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "intended zbuffer size %u is too small, rejected.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -321,13 +321,13 @@ void OTF_Writer_setZBufferSizes( OTF_Writer* writer, uint32_t size ) { } else if ( 512 > size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "ERROR in function %s, file: %s, line: %i:\n " "buffer size %u is very small, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); } else if ( 10 * 1024 *1024 < size ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Warning( "ERROR in function %s, file: %s, line: %i:\n " "buffer size %u is rather big, accepted though.\n", __FUNCTION__, __FILE__, __LINE__, size ); @@ -355,7 +355,7 @@ void OTF_Writer_setFormat( OTF_Writer* writer, uint32_t format ) { if ( format > 1 ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "unknown ouput format chosen.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -428,7 +428,7 @@ OTF_WStream* OTF_Writer_getStream( OTF_Writer* writer, uint32_t streamId ) { /* not found, create & append at position 'a' */ /* - OTF_fprintf(stderr," at %u\n", a ); + fprintf(stderr," at %u\n", a ); */ if ( writer->s <= writer->n ) { @@ -438,7 +438,7 @@ OTF_WStream* OTF_Writer_getStream( OTF_Writer* writer, uint32_t streamId ) { writer->s * sizeof(OTF_WStream*) ); if( NULL == writer->streams ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no memory left.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -474,7 +474,7 @@ uint32_t OTF_Writer_assignProcess( OTF_Writer* writer, if ( 0 == stream ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "stream id must not be '0'.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -483,7 +483,7 @@ uint32_t OTF_Writer_assignProcess( OTF_Writer* writer, if( 0 == OTF_MasterControl_append( writer->mc, stream, process ) ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "OTF_MasterControl_append() failed.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -513,7 +513,7 @@ uint32_t OTF_Writer_mapProcess( OTF_Writer* writer, uint32_t process ) { if ( 0 == ret ) { /* - OTF_fprintf( stderr, "OTF_Writer_mapProcess() %u unknown\n", process ); + fprintf( stderr, "OTF_Writer_mapProcess() %u unknown\n", process ); */ ret= (uint32_t) -1; @@ -532,7 +532,7 @@ uint32_t OTF_Writer_mapProcess( OTF_Writer* writer, uint32_t process ) { must take care!*/ if( 0 >= n ) { - OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n " + OTF_Error( "ERROR in function %s, file: %s, line: %i:\n " "no processes/stream have been defined.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1001,7 +1001,7 @@ int OTF_Writer_writeDefSclFileKV( OTF_Writer* writer, uint32_t streamid, int OTF_Writer_writeOtfVersion( OTF_Writer* writer, uint32_t streamid ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "calling depricated function. ignored.\n", __FUNCTION__, __FILE__, __LINE__ ); @@ -1155,6 +1155,25 @@ int OTF_Writer_writeDefCounterAssignments( OTF_Writer* writer, } +int OTF_Writer_writeDefProcessSubstitutes( OTF_Writer* writer, uint32_t streamid, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* list ) { + + + OTF_WStream* stream= OTF_Writer_getStream( writer, (uint32_t) streamid ); + + if ( 0 == writer->versionWritten ) { + + OTF_WStream_writeOtfVersion( OTF_Writer_getStream( writer, 0 ) ); + writer->versionWritten= 1; + } + + return OTF_WStream_writeDefProcessSubstitutes( stream, representative, + numberOfProcs, (uint32_t*) procs, list ); +} + + + /* *** Event Records *** ****************************************** */ int OTF_Writer_writeNoOpKV( OTF_Writer* writer, uint64_t time, @@ -1977,7 +1996,7 @@ int OTF_Writer_writeDefMarker( OTF_Writer* writer, uint32_t streamID, #ifdef OTF_DEBUG if ( 0 != streamID ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "streamID value of '%u' is ignored, '0' is used instead\n", __FUNCTION__, __FILE__, __LINE__, streamID ); } @@ -1994,7 +2013,7 @@ int OTF_Writer_writeDefMarkerKV( OTF_Writer* writer, uint32_t streamID, #ifdef OTF_DEBUG if ( 0 != streamID ) { - OTF_fprintf( stderr, "WARNING in function %s, file: %s, line: %i:\n " + OTF_Warning( "WARNING in function %s, file: %s, line: %i:\n " "streamID value of '%u' is ignored, '0' is used instead\n", __FUNCTION__, __FILE__, __LINE__, streamID ); } diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.h index 1a58b5d6ed..b858eafb8e 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Writer.h @@ -1081,6 +1081,38 @@ int OTF_Writer_writeDefCounterAssignments( OTF_Writer* writer, const uint32_t* procs_or_groups, OTF_KeyValueList* list ); + +/** + * Writes a ProcessSubstitutes definition record + * + * @param userData Pointer to user data which can be set with + * OTF_HandlerArray_setFirstHandlerArg(). + * @param streamid Identifies the stream to which this definition + * belongs to. stream = 0 represents a global + * definition. + * + * @param representative Process ID of the process that represents several others. + * + * @param numberOfProcs Number of entries in @procs array. + * + * @param procs The processes which are represented by + * @representative. It may or may not include + * @representative itself. + * + * @param list Pointer to an OTF_KeyValueList() that contains individual data. + * + * @return 1 on success, 0 if an error occurs. + * + * \ingroup writer + */ +int OTF_Writer_writeDefProcessSubstitutes( OTF_Writer* writer, + uint32_t streamid, + uint32_t representative, + uint32_t numberOfProcs, + const uint32_t* procs, + OTF_KeyValueList* list ); + + /** * Write a no-operation record. This can be used to write an OTF_KeyValueList * that is not attached to a special event record. diff --git a/ompi/contrib/vt/vt/extlib/otf/tests/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tests/Makefile.am index 27353e9444..8e3c058681 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tests/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/tests/Makefile.am @@ -3,7 +3,8 @@ SUBDIRS = \ generic_streams \ generic_streams-mpi \ progress \ - read_from_buffer + read_from_buffer \ + thumbnail EXTRA_DIST= \ python/groups.py \ diff --git a/ompi/contrib/vt/vt/extlib/otf/tests/thumbnail/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tests/thumbnail/Makefile.am new file mode 100644 index 0000000000..b70416f628 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tests/thumbnail/Makefile.am @@ -0,0 +1,11 @@ +check_PROGRAMS = otf_thumbnail + +otf_thumbnail_SOURCES = thumbnail.c +otf_thumbnail_DEPENDENCIES = $(top_builddir)/otfauxlib/libotfaux.la \ + $(top_builddir)/otflib/libotf.la +otf_thumbnail_LDADD = $(top_builddir)/otfauxlib/libotfaux.la \ + $(top_builddir)/otflib/libotf.la +otf_thumbnail_CPPFLAGS = $(AM_CPPFLAGS) \ + -I$(top_builddir)/otflib \ + -I$(top_srcdir)/otflib \ + -I$(top_srcdir)/otfauxlib diff --git a/ompi/contrib/vt/vt/extlib/otf/tests/thumbnail/thumbnail.c b/ompi/contrib/vt/vt/extlib/otf/tests/thumbnail/thumbnail.c new file mode 100644 index 0000000000..85eb5f1b42 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tests/thumbnail/thumbnail.c @@ -0,0 +1,63 @@ +/* -*- c -*- */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +static void +process_handler( void* data, + uint64_t process, + uint32_t start_pixel, + uint32_t function ) +{ + printf( "%lx: %x/%x\n", process, start_pixel, function ); +} + +int +main(int ac, char *av[]) +{ + OTF_FileManager* manager; + OTFAUX_ThumbnailWriter* writer; + OTFAUX_ThumbnailReader* reader; + OTFAUX_Thumbnail_Data data; + uint32_t start_pixels[7] = { 2, 5, 23, 49, 63, 76, 80 }; + uint32_t functions[7] = { 1, 2, 3, 2, 3, 2, 1 }; + uint32_t height, width; + int i; + + manager = OTF_FileManager_open( 16 ); + + writer = OTFAUX_ThumbnailWriter_create( "test_thumbnail.otf", + 4, + 16, + manager ); + + for ( i = 1; i <= 4; i++ ) + { + data.start_pixel = start_pixels; + data.function = functions; + data.size = 7; + + OTFAUX_ThumbnailWriter_writeProcess( writer, i, &data ); + } + OTFAUX_ThumbnailWriter_destroy( writer ); + + reader = OTFAUX_ThumbnailReader_create( "test_thumbnail.otf", + manager ); + OTFAUX_ThumbnailReader_getDimension( reader, &height, &width ); + printf( "%xx%x\n", height, width ); + + OTFAUX_ThumbnailReader_read( reader, process_handler, NULL ); + + OTFAUX_ThumbnailReader_destroy( reader ); + + OTF_FileManager_close( manager ); + + return 0; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/Makefile.am index 9e87b952c9..c6df4255c4 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/tools/Makefile.am @@ -12,10 +12,4 @@ else TMP3 = endif -if AMHAVEMPI -TMP4 = otfmerge-mpi otfprofile-mpi -else -TMP4 = -endif - -SUBDIRS = $(TMP1) $(TMP2) $(TMP3) $(TMP4) +SUBDIRS = $(TMP1) $(TMP2) $(TMP3) diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfcompress/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfcompress/Makefile.am index d48402472a..9502ccc4cc 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfcompress/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfcompress/Makefile.am @@ -6,7 +6,7 @@ if AMBUILDBINARIES bin_PROGRAMS = otfcompress install-exec-hook: - (cd $(DESTDIR)$(bindir); rm -f otfdecompress; \ + (cd $(DESTDIR)$(bindir); rm -f otfdecompress$(EXEEXT); \ $(LN_S) otfcompress$(EXEEXT) otfdecompress$(EXEEXT)) uninstall-local: rm -f $(DESTDIR)$(bindir)/otfdecompress$(EXEEXT) diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfconfig/otfconfig.c b/ompi/contrib/vt/vt/extlib/otf/tools/otfconfig/otfconfig.c index 9a0c2b5d97..5c2854075d 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfconfig/otfconfig.c +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfconfig/otfconfig.c @@ -8,6 +8,9 @@ #endif +#include +#include + #include "OTF_inttypes.h" #include "OTF_Platform.h" #include "otf.h" @@ -35,64 +38,93 @@ int main( int argc, char** argv ) { int i; - char tmp[1024]; + char includes[1024]= ""; + char libs[1024]= ""; if( argc == 1 ) { SHOW_HELPTEXT; + return 0; } - for( i= 1; i < argc; ++i ) { - if( 0 == strcmp( argv[i], "-h" ) || 0 == strcmp( argv[i], "--help" )) { - + if( 0 == strcmp( argv[i], "-h" ) || + 0 == strcmp( argv[i], "--help" ) ) { + SHOW_HELPTEXT; - + return 0; + } else if ( 0 == strcmp( argv[i], "--version" ) ) { - printf( "%u.%u.%u %s\n", OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB, OTF_VERSION_STRING ); - + printf( "%u.%u.%u %s\n", + OTF_VERSION_MAJOR, OTF_VERSION_MINOR, + OTF_VERSION_SUB, OTF_VERSION_STRING ); + return 0; + } else if ( 0 == strcmp( argv[i], "--have-zlib" ) ) { #ifdef HAVE_ZLIB - printf( "yes\n" ); -#else - printf( "no\n" ); -#endif + printf( "yes\n" ); +#else /* HAVE_ZLIB */ + printf( "no\n" ); +#endif /* HAVE_ZLIB */ + return 0; } else if ( 0 == strcmp( argv[i], "--includes" ) ) { - printf( "-I%s\n", OTFCONFIG_INCLUDEDIR ); - + if ( !(*includes) ) { + + strncpy( includes, "-I"OTFCONFIG_INCLUDEDIR, + sizeof( includes) - 1 ); + } + } else if ( 0 == strcmp( argv[i], "--libs" ) ) { -#ifdef HAVE_ZLIB - snprintf( tmp, sizeof(tmp) -1, "-L%s -lotfaux -lotf -lz\n", - OTFCONFIG_LIBDIR ); -#else /* HAVE_ZLIB */ - snprintf( tmp, sizeof(tmp) -1, "-L%s -lotfaux -lotf\n", - OTFCONFIG_LIBDIR ); -#endif /* HAVE_ZLIB */ + if ( !(*libs) ) { - printf( "%s", tmp ); + strncpy( libs, + "-L"OTFCONFIG_LIBDIR" -lotfaux -lotf", + sizeof( libs ) - 1 ); +#ifdef HAVE_ZLIB + strncat( libs, " -lz", + sizeof( libs ) - strlen( libs ) - 1 ); +#endif /* HAVE_ZLIB */ + } } else if ( 0 == strcmp( argv[i], "--sizes" ) ) { /* print size of integer types */ - printf( " sizeof(%s)= %llu\n", " int8_t ", (long long unsigned) sizeof(int8_t) ); - printf( " sizeof(%s)= %llu\n", " int16_t ", (long long unsigned) sizeof(int16_t) ); - printf( " sizeof(%s)= %llu\n", " int32_t ", (long long unsigned) sizeof(int32_t) ); - printf( " sizeof(%s)= %llu\n", " int64_t ", (long long unsigned) sizeof(int64_t) ); - printf( " sizeof(%s)= %llu\n", " uint8_t ", (long long unsigned) sizeof(uint8_t) ); - printf( " sizeof(%s)= %llu\n", " uint16_t ", (long long unsigned) sizeof(uint16_t) ); - printf( " sizeof(%s)= %llu\n", " uint32_t ", (long long unsigned) sizeof(uint32_t) ); - printf( " sizeof(%s)= %llu\n", " uint64_t ", (long long unsigned) sizeof(uint64_t) ); + printf( " sizeof(%s)= %llu\n", " int8_t ", + (long long unsigned) sizeof(int8_t) ); + printf( " sizeof(%s)= %llu\n", " int16_t ", + (long long unsigned) sizeof(int16_t) ); + printf( " sizeof(%s)= %llu\n", " int32_t ", + (long long unsigned) sizeof(int32_t) ); + printf( " sizeof(%s)= %llu\n", " int64_t ", + (long long unsigned) sizeof(int64_t) ); + printf( " sizeof(%s)= %llu\n", " uint8_t ", + (long long unsigned) sizeof(uint8_t) ); + printf( " sizeof(%s)= %llu\n", " uint16_t ", + (long long unsigned) sizeof(uint16_t) ); + printf( " sizeof(%s)= %llu\n", " uint32_t ", + (long long unsigned) sizeof(uint32_t) ); + printf( " sizeof(%s)= %llu\n", " uint64_t ", + (long long unsigned) sizeof(uint64_t) ); + return 0; } } + if ( *includes ) { + + printf( "%s%c", includes, *libs ? ' ' : '\n' ); + } + if ( *libs ) { + + printf( "%s\n", libs ); + } return 0; } diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.cpp index 8a7b999ffb..f564f342f0 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.cpp @@ -241,12 +241,38 @@ int handleDefProcessGroup( void* userData, uint32_t stream, return OTF_RETURN_OK; } +#define CASE_RETURN(type) \ + case OTF_ATTR_##type: return #type + +static const char* attrToString( OTF_ATTR_TYPE type ) { + + switch( type ) { + CASE_RETURN( IsMPIRank ); + CASE_RETURN( IsPThread ); + CASE_RETURN( IsOMPThread ); + CASE_RETURN( IsCellSPUThread ); + CASE_RETURN( hasGroupCounters ); + CASE_RETURN( hasEnterLeaveRecords ); + CASE_RETURN( IsCommunicator ); + + case OTF_ATTR_UNKNOWN: + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", type ); + return unknown_buffer; + } + + } +} + +#undef CASE_RETURN + int handleDefAttributeList( void* userData, uint32_t stream, uint32_t attr_token, uint32_t num, OTF_ATTR_TYPE* array, OTF_KeyValueList* kvlist ) { - uint32_t i; + uint32_t i; Control* c= (Control*) userData; @@ -258,12 +284,12 @@ int handleDefAttributeList( void* userData, uint32_t stream, fprintf( c->outfile, "(#%llu) \tDefAttributeList: stream %u, attr_token %u, attributes ", (long long unsigned) c->num, stream, attr_token); - for( i= 0; i < (num - 1); ++i ) { - fprintf( c->outfile, "%u, ", array[i] ); + const char* sep = ""; + for( i= 0; i < num; ++i ) { + fprintf( c->outfile, "%s%s", sep, attrToString( array[i] ) ); + sep = ", "; } - fprintf( c->outfile, "%u", array[i] ); - printKeyValueList(c, kvlist); } } @@ -345,6 +371,29 @@ int handleDefFunctionGroup( void* userData, uint32_t stream, } +#define CASE_RETURN(name) \ + case OTF_COLLECTIVE_TYPE_##name: return #name + +static const char* collOpTypeToString( uint32_t type ) { + + switch( type ) { + CASE_RETURN( BARRIER ); + CASE_RETURN( ONE2ALL ); + CASE_RETURN( ALL2ONE ); + CASE_RETURN( ALL2ALL ); + + case OTF_COLLECTIVE_TYPE_UNKNOWN: + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", type ); + return unknown_buffer; + } + + } +} + +#undef CASE_RETURN + int handleDefCollectiveOperation( void* userData, uint32_t stream, uint32_t collOp, const char* name, uint32_t type, OTF_KeyValueList* kvlist ) { @@ -357,8 +406,8 @@ int handleDefCollectiveOperation( void* userData, uint32_t stream, ++c->num; if( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - fprintf( c->outfile, "(#%llu) \tDefCollective: stream %u, collective %u, name \"%s\", type %u", - (long long unsigned) c->num, stream, collOp, name, type ); + fprintf( c->outfile, "(#%llu) \tDefCollective: stream %u, collective %u, name \"%s\", type %s", + (long long unsigned) c->num, stream, collOp, name, collOpTypeToString( type ) ); printKeyValueList(c, kvlist); } @@ -369,6 +418,67 @@ int handleDefCollectiveOperation( void* userData, uint32_t stream, } +#define CASE_RETURN(prefix, name) \ + case OTF_COUNTER_##prefix##_##name: return #name + +static const char* counterTypeToString( uint32_t properties ) { + + switch( properties & OTF_COUNTER_TYPE_BITS ) { + CASE_RETURN( TYPE, ACC ); + CASE_RETURN( TYPE, ABS ); + + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", + properties & OTF_COUNTER_TYPE_BITS ); + return unknown_buffer; + } + + } +} + +static const char* counterScopeToString( uint32_t properties ) { + + switch( properties & OTF_COUNTER_SCOPE_BITS ) { + CASE_RETURN( SCOPE, START ); + CASE_RETURN( SCOPE, POINT ); + CASE_RETURN( SCOPE, LAST ); + CASE_RETURN( SCOPE, NEXT ); + + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", + properties & OTF_COUNTER_SCOPE_BITS ); + return unknown_buffer; + } + + } +} + +static const char* counterVarTypeToString( uint32_t properties ) { + + switch( properties & OTF_COUNTER_VARTYPE_BITS ) { + CASE_RETURN( VARTYPE, UNSIGNED8 ); + CASE_RETURN( VARTYPE, SIGNED8 ); + CASE_RETURN( VARTYPE, UNSIGNED4 ); + CASE_RETURN( VARTYPE, SIGNED4 ); + CASE_RETURN( VARTYPE, UNSIGNED2 ); + CASE_RETURN( VARTYPE, SIGNED2 ); + CASE_RETURN( VARTYPE, FLOAT ); + CASE_RETURN( VARTYPE, DOUBLE ); + + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", + properties & OTF_COUNTER_VARTYPE_BITS ); + return unknown_buffer; + } + + } +} + +#undef CASE_RETURN + int handleDefCounter( void* userData, uint32_t stream, uint32_t counter, const char* name, uint32_t properties, uint32_t counterGroup, const char* unit, OTF_KeyValueList* kvlist ) { @@ -376,14 +486,20 @@ int handleDefCounter( void* userData, uint32_t stream, uint32_t counter, Control* c= (Control*) userData; + c->counter_props[counter] = properties; if( c->records[OTF_DEFCOUNTER_RECORD] ) { ++c->num; if( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - fprintf( c->outfile, "(#%llu) \tDefCounter: stream %u, counter %u, name \"%s\", properties %u, group %u, unit \"%s\"", - (long long unsigned) c->num, stream, counter, name, properties, counterGroup, unit ); + fprintf( c->outfile, "(#%llu) \tDefCounter: stream %u, " + "counter %u, name \"%s\", properties %s/%s/%s, group %u, unit \"%s\"", + (long long unsigned) c->num, stream, counter, name, + counterTypeToString( properties ), + counterScopeToString( properties ), + counterVarTypeToString( properties ), + counterGroup, unit ); printKeyValueList(c, kvlist); } @@ -555,10 +671,41 @@ int handleDefFileGroup( void* userData, uint32_t stream, } -int handleDefKeyValue( void *userData, uint32_t streamid, uint32_t token, OTF_Type type, - const char *name, const char* desc, OTF_KeyValueList* kvlist ) { +#define CASE_RETURN(type) \ + case OTF_##type: return #type +static const char* keyValueTypeToString( OTF_Type type ) { + switch( type ) { + CASE_RETURN( CHAR ); + CASE_RETURN( INT8 ); + CASE_RETURN( UINT8 ); + CASE_RETURN( INT16 ); + CASE_RETURN( UINT16 ); + CASE_RETURN( INT32 ); + CASE_RETURN( UINT32 ); + CASE_RETURN( INT64 ); + CASE_RETURN( UINT64 ); + CASE_RETURN( FLOAT ); + CASE_RETURN( DOUBLE ); + CASE_RETURN( BYTE_ARRAY ); + + case OTF_UNKNOWN: + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", type ); + return unknown_buffer; + } + + } +} + +#undef CASE_RETURN + +int handleDefKeyValue( void *userData, uint32_t streamid, uint32_t token, + OTF_Type type, const char *name, const char* desc, + OTF_KeyValueList* kvlist ) +{ Control* c= (Control*) userData; @@ -567,8 +714,9 @@ int handleDefKeyValue( void *userData, uint32_t streamid, uint32_t token, OTF_Ty ++c->num; if( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - fprintf( c->outfile, "(#%llu) \tDefKeyValue: stream %u, token %u, type %u, name \"%s\", desc \"%s\"", - (long long unsigned) c->num, streamid, token, type, name, desc ); + fprintf( c->outfile, "(#%llu) \tDefKeyValue: stream %u, token %u, type %s, name \"%s\", desc \"%s\"", + (long long unsigned) c->num, streamid, token, + keyValueTypeToString( type ), name, desc ); printKeyValueList(c, kvlist); } @@ -578,69 +726,102 @@ int handleDefKeyValue( void *userData, uint32_t streamid, uint32_t token, OTF_Ty } -int handleDefTimeRange( void* userData, - uint32_t streamid, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* kvlist ) { +int handleDefTimeRange( void* userData, uint32_t streamid, uint64_t minTime, + uint64_t maxTime, OTF_KeyValueList* kvlist ) +{ + Control* c= (Control*) userData; - Control* c= (Control*) userData; + if ( c->records[OTF_DEFTIMERANGE_RECORD] ) { - if ( c->records[OTF_DEFTIMERANGE_RECORD] ) { + ++c->num; + if ( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - ++c->num; - if ( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { + fprintf( c->outfile, + "(#%llu) \tDefTimeRange: stream %u, " + "[%llu, %llu]", + (long long unsigned) c->num, + streamid, + (long long unsigned) minTime, + (long long unsigned) maxTime ); - fprintf( c->outfile, - "(#%llu) \tDefTimeRange: stream %u, " - "[%llu, %llu]", - (long long unsigned) c->num, - streamid, - (long long unsigned) minTime, - (long long unsigned) maxTime ); + printKeyValueList( c, kvlist ); + } + } - printKeyValueList( c, kvlist ); - } - } - - return OTF_RETURN_OK; + return OTF_RETURN_OK; } -int handleDefCounterAssignments( void* userData, - uint32_t streamid, - uint32_t counter_token, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* kvlist ) { - uint32_t i; - Control* c= (Control*) userData; +int handleDefCounterAssignments( void* userData, uint32_t streamid, + uint32_t counter_token, uint32_t number_of_members, + const uint32_t* procs_or_groups, OTF_KeyValueList* kvlist ) +{ + Control* c= (Control*) userData; + uint32_t i; - if ( c->records[OTF_DEFCOUNTERASSIGNMENTS_RECORD] ) { + if ( c->records[OTF_DEFCOUNTERASSIGNMENTS_RECORD] ) { - ++c->num; - if ( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - const char* sep= ""; + ++c->num; + if ( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - fprintf( c->outfile, - "(#%llu) \tDefCounterAssignments: stream %u, " - "counter_token %u, assignees ", - (long long unsigned) c->num, - streamid, - counter_token ); + const char* sep= ""; - for ( i= 0; i < number_of_members; ++i ) { - fprintf( c->outfile, "%s%u", sep, procs_or_groups[i] ); - sep= ", "; - } + fprintf( c->outfile, + "(#%llu) \tDefCounterAssignments: stream %u, " + "counter_token %u, assignees ", + (long long unsigned) c->num, + streamid, + counter_token ); - printKeyValueList( c, kvlist ); - } - } + for ( i= 0; i < number_of_members; ++i ) { - return OTF_RETURN_OK; + fprintf( c->outfile, "%s%u", sep, procs_or_groups[i] ); + sep= ", "; + } + + printKeyValueList( c, kvlist ); + } + } + + return OTF_RETURN_OK; } + +int handleDefProcessSubstitutes( void* userData, uint32_t streamid, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* kvlist ) { + + Control* c= (Control*) userData; + uint32_t i; + + if ( c->records[OTF_DEFPROCESSSUBSTITUTES_RECORD] ) { + + ++c->num; + if ( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { + + const char* sep= ""; + + fprintf( c->outfile, + "(#%llu) \tDefProcessSubstitutes: stream %u, " + "representative process %u, procs ", + (long long unsigned) c->num, + streamid, + representative ); + + for ( i= 0; i < numberOfProcs; ++i ) { + + fprintf( c->outfile, "%s%u", sep, procs[i] ); + sep= ", "; + } + + printKeyValueList( c, kvlist ); + } + } + + return OTF_RETURN_OK; +} + + int handleNoOp( void* userData, uint64_t time, uint32_t process, OTF_KeyValueList* kvlist ) { @@ -665,6 +846,7 @@ int handleNoOp( void* userData, uint64_t time, uint32_t process, return OTF_RETURN_OK; } + int handleEnter( void* userData, uint64_t time, uint32_t function, uint32_t process, uint32_t source, OTF_KeyValueList* kvlist ) { @@ -767,6 +949,55 @@ int handleRecvMsg( void* userData, uint64_t time, uint32_t recvProc, } +static void printCounterValue( Control* c, uint32_t counter, uint64_t value ) { + + uint32_t properties = OTF_COUNTER_VARTYPE_UNSIGNED8; + std::map::const_iterator it + = c->counter_props.find( counter ); + if( it != c->counter_props.end() ) { + + properties= it->second & OTF_COUNTER_VARTYPE_BITS; + } + + switch( properties ) { + + case OTF_COUNTER_VARTYPE_UNSIGNED8: + case OTF_COUNTER_VARTYPE_UNSIGNED4: + case OTF_COUNTER_VARTYPE_UNSIGNED2: { + + uint64_t conv_value = OTF_Counter2Unsigned( value ); + fprintf( c->outfile, "%llu", (unsigned long long) conv_value ); + break; + } + + case OTF_COUNTER_VARTYPE_SIGNED8: + case OTF_COUNTER_VARTYPE_SIGNED4: + case OTF_COUNTER_VARTYPE_SIGNED2: { + + int64_t conv_value = OTF_Counter2Signed( value ); + fprintf( c->outfile, "%lld", (long long) conv_value ); + break; + } + + case OTF_COUNTER_VARTYPE_FLOAT: { + + float conv_value = OTF_Counter2Float( value ); + fprintf( c->outfile, "%f", conv_value ); + break; + } + + case OTF_COUNTER_VARTYPE_DOUBLE: { + + double conv_value = OTF_Counter2Double( value ); + fprintf( c->outfile, "%f", conv_value ); + break; + } + + default: + fprintf( c->outfile, "" ); + } +} + int handleCounter( void* userData, uint64_t time, uint32_t process, uint32_t counter, uint64_t value, OTF_KeyValueList* kvlist ) { @@ -779,9 +1010,10 @@ int handleCounter( void* userData, uint64_t time, uint32_t process, ++c->num; if( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - fprintf( c->outfile, "(#%llu) \t%llu Counter: process %u, counter %u, value %llu", + fprintf( c->outfile, "(#%llu) \t%llu Counter: process %u, counter %u, value ", (long long unsigned) c->num, (long long unsigned) time, - process, counter, (long long unsigned) value ); + process, counter ); + printCounterValue( c, counter, value ); printKeyValueList(c, kvlist); } @@ -1256,6 +1488,68 @@ int handleCollopSummary( void* userData, uint64_t time, uint32_t process, } +#define CASE_RETURN(op) \ + case OTF_FILEOP_##op: return #op + +static const char* fileOpToString( uint32_t operation ) { + + switch( operation & OTF_FILEOP_BITS ) { + CASE_RETURN( OPEN ); + CASE_RETURN( CLOSE ); + CASE_RETURN( READ ); + CASE_RETURN( WRITE ); + CASE_RETURN( SEEK ); + CASE_RETURN( UNLINK ); + CASE_RETURN( RENAME ); + CASE_RETURN( DUP ); + CASE_RETURN( SYNC ); + CASE_RETURN( LOCK ); + CASE_RETURN( UNLOCK ); + + case OTF_FILEOP_OTHER: + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", + operation & OTF_FILEOP_BITS ); + return unknown_buffer; + } + + } +} + +#undef CASE_RETURN + +#define PRINT_FLAG(flag) \ + if( io_flags & OTF_IOFLAG_##flag ) { \ + fprintf( c->outfile, "%s%s", sep, #flag ); \ + io_flags &= ~OTF_IOFLAG_##flag; \ + sep = ", "; \ + } + +static void printIoFlags( Control* c, uint32_t operation ) { + + + uint32_t io_flags = operation & OTF_IOFLAGS_BITS; + + if( !io_flags ) { + fprintf( c->outfile, "%s", "EMPTY" ); + } + + const char* sep = ""; + PRINT_FLAG( IOFAILED ) + PRINT_FLAG( ASYNC ) + PRINT_FLAG( COLL ) + PRINT_FLAG( DIRECT ) + PRINT_FLAG( SYNC ) + PRINT_FLAG( ISREADLOCK ) + + if( io_flags ) { + fprintf( c->outfile, "%sUNKNOWN <%u>", sep, io_flags ); + } +} + +#undef PRINT_FLAG + int handleFileOperation( void* userData, uint64_t time, uint32_t fileid, uint32_t process, uint64_t handleid, uint32_t operation, @@ -1270,11 +1564,12 @@ int handleFileOperation( void* userData, uint64_t time, if( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { fprintf( c->outfile, "(#%llu) \t%llu FileOperation: file ID %llu, " - "process %llu, handle ID %llu, operation %llu, " - "bytes %llu, duration %llu, source %llu", + "process %llu, handle ID %llu, operation %s, flags ", (long long unsigned) c->num, (long long unsigned) time, (long long unsigned) fileid, (long long unsigned) process, - (long long unsigned) handleid, (long long unsigned) operation, + (long long unsigned) handleid, fileOpToString( operation ) ); + printIoFlags( c, operation ); + fprintf( c->outfile, ", bytes %llu, duration %llu, source %llu", (long long unsigned) bytes, (long long unsigned) duration, (long long unsigned) source); @@ -1326,15 +1621,17 @@ int handleEndFileOperation( void* userData, uint64_t time, fprintf( c->outfile, "(#%llu) \t%llu EndFileOperation: " "process %llu, file ID %llu, " - "matching ID %llu, handle ID %llu, operation %llu, " - "bytes %llu, source %llu", + "matching ID %llu, handle ID %llu, " + "operation %s, flags ", (long long unsigned) c->num, (long long unsigned) time, (long long unsigned) process, (long long unsigned) fileid, (long long unsigned) matchingId, - (long long unsigned) handleId, - (long long unsigned) operation, + (long long unsigned) handleId, + fileOpToString( operation ) ); + printIoFlags( c, operation ); + fprintf( c->outfile, ", bytes %llu, source %llu", (long long unsigned) bytes, (long long unsigned) scltoken ); @@ -1520,6 +1817,27 @@ int handleUnknown( void* userData, uint64_t time, uint32_t process, } +#define CASE_RETURN(name) \ + case OTF_MARKER_TYPE_##name: return #name + +static const char* markerTypeToString( uint32_t type ) { + + switch( type ) { + CASE_RETURN( ERROR ); + CASE_RETURN( WARNING ); + CASE_RETURN( HINT ); + + default: { + static char unknown_buffer[ 64 ]; + sprintf( unknown_buffer, "UNKNOWN <%u>", type ); + return unknown_buffer; + } + + } +} + +#undef CASE_RETURN + int handleDefMarker( void *userData, uint32_t stream, uint32_t token, const char* name, uint32_t type, OTF_KeyValueList* kvlist ) { @@ -1530,8 +1848,9 @@ int handleDefMarker( void *userData, uint32_t stream, uint32_t token, const char ++c->num; if( c->num >= c->minNum && c->num <= c->maxNum && ! c->silent_mode ) { - fprintf( c->outfile, "(#%llu) DefMarker: ID %u, name \"%s\", type %u", - (long long unsigned) c->num, token, name, type ); + fprintf( c->outfile, "(#%llu) DefMarker: ID %u, name \"%s\", type %s", + (long long unsigned) c->num, token, name, + markerTypeToString( type ) ); printKeyValueList(c, kvlist); } diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.h index 6e3fd4a634..217c88713f 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.h +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/Handler.h @@ -10,6 +10,7 @@ #include "config.h" #endif +#include #include "OTF_inttypes.h" @@ -32,6 +33,8 @@ typedef struct { bool records[OTF_NRECORDS]; /* enabled record types */ + std::map counter_props; + FILE* outfile; } Control; @@ -93,19 +96,16 @@ int handleDefVersion( void* userData, uint32_t stream, uint8_t major, int handleDefKeyValue( void *userData, uint32_t streamid, uint32_t token, OTF_Type type, const char *name, const char *desc, OTF_KeyValueList* kvlist ); -int handleDefTimeRange( void* userData, - uint32_t streamid, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* kvlist ); +int handleDefTimeRange( void* userData, uint32_t streamid, uint64_t minTime, + uint64_t maxTime, OTF_KeyValueList* kvlist ); -int handleDefCounterAssignments( void* userData, - uint32_t streamid, - uint32_t counter_token, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* kvlist ); +int handleDefCounterAssignments( void* userData, uint32_t streamid, + uint32_t counter_token, uint32_t number_of_members, + const uint32_t* procs_or_groups, OTF_KeyValueList* kvlist ); +int handleDefProcessSubstitutes( void* userData, uint32_t streamid, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList* kvlist ); int handleNoOp( void* userData, uint64_t time, uint32_t process, OTF_KeyValueList* kvlist ); diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp index 72804e0b23..5bd170f85b 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp @@ -379,6 +379,12 @@ int main ( int argc, const char** argv ) { &fha, OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefProcessSubstitutes, + OTF_DEFPROCESSSUBSTITUTES_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + &fha, + OTF_DEFPROCESSSUBSTITUTES_RECORD ); OTF_HandlerArray_setHandler( handlers, (OTF_FunctionPointer*) handleNoOp, diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/Makefile.am deleted file mode 100644 index 2edbca4279..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/Makefile.am +++ /dev/null @@ -1,17 +0,0 @@ -INCLUDES = -I$(top_builddir)/otflib -I$(top_srcdir)/otflib $(MPI_INCLUDE_LINE) - -if AMBUILDBINARIES -bin_PROGRAMS = \ - otfmerge-mpi -endif - -CC = $(MPICC) - -otfmerge_mpi_CFLAGS = $(MPICFLAGS) -otfmerge_mpi_LDADD = $(top_builddir)/otflib/libotf.la $(MPI_LIB_LINE) -otfmerge_mpi_DEPENDENCIES = $(top_builddir)/otflib/libotf.la -otfmerge_mpi_SOURCES = \ - handler.h \ - otfmerge-mpi.c \ - handler.c - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/handler.c b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/handler.c deleted file mode 100644 index 2cd87b85f1..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/handler.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Johannes Spazier -*/ - -#include "handler.h" - -/* definitions */ -int handleDefinitionComment (void *userData, uint32_t stream, const char *comment, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefinitionCommentKV( wstream, comment, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefTimerResolution (void *userData, uint32_t stream, uint64_t ticksPerSecond, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefTimerResolutionKV( wstream, ticksPerSecond, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefProcess (void *userData, uint32_t stream, uint32_t process, const char *name, - uint32_t parent, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefProcessKV( wstream, process, name, - parent, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, const char *name, - uint32_t numberOfProcs, const uint32_t *procs, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefProcessGroupKV( wstream, procGroup, name, - numberOfProcs, procs, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefAttributeList (void *userData, uint32_t stream, uint32_t attr_token, uint32_t num, - OTF_ATTR_TYPE *array, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefAttributeListKV( wstream, attr_token, - num, array, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefProcessOrGroupAttributes(void *userData, uint32_t stream, uint32_t proc_token, - uint32_t attr_token, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefProcessOrGroupAttributesKV( wstream, proc_token, - attr_token, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefFunction (void *userData, uint32_t stream, uint32_t func, const char *name, - uint32_t funcGroup, uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefFunctionKV( wstream, func, name, funcGroup, - source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefFunctionGroup (void *userData, uint32_t stream, uint32_t funcGroup, - const char *name, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefFunctionGroupKV( wstream, funcGroup, - name, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefCollectiveOperation (void *userData, uint32_t stream, uint32_t collOp, - const char *name, uint32_t type, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefCollectiveOperationKV( wstream, collOp, - name, type, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefCounter (void *userData, uint32_t stream, uint32_t counter, const char *name, - uint32_t properties, uint32_t counterGroup, const char *unit, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefCounterKV( wstream, counter, name, - properties, counterGroup, unit, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefCounterGroup (void *userData, uint32_t stream, uint32_t counterGroup, const char *name, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefCounterGroupKV( wstream, counterGroup, - name, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefScl (void *userData, uint32_t stream, uint32_t source, uint32_t sourceFile, - uint32_t line, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefSclKV( wstream, source, sourceFile, - line, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefSclFile (void *userData, uint32_t stream, uint32_t sourceFile, const char *name, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefSclFileKV( wstream, sourceFile, - name, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefCreator (void *userData, uint32_t stream, const char *creator, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefCreatorKV( wstream, creator, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefVersion (void *userData, uint32_t stream, uint8_t major, uint8_t minor, - uint8_t sub, const char *string) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeOtfVersion( wstream ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefFile (void *userData, uint32_t stream, uint32_t token, const char *name, - uint32_t group, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefFileKV( wstream, token, name, - group, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefFileGroup (void *userData, uint32_t stream, uint32_t token, const char *name, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefFileGroupKV( wstream, token, name, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefKeyValue (void *userData, uint32_t stream, uint32_t token, OTF_Type type, - const char *name, const char *desc, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefKeyValueKV( wstream, token, type, - name, desc, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefTimeRange( void* userData, - uint32_t stream, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* kvlist ) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if ( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefTimeRange( - wstream, - minTime, - maxTime, - kvlist ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -int handleDefCounterAssignments( void* userData, - uint32_t stream, - uint32_t counter_token, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* kvlist ) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - if ( 0 != stream ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_WStream_writeDefCounterAssignments( - wstream, - counter_token, - number_of_members, - procs_or_groups, - kvlist ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - -/* events */ -int handleNoOp (void *userData, uint64_t time, uint32_t process, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeNoOpKV( wstream, time, process, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - -} - -int handleEnter( void *userData, uint64_t time, uint32_t function, uint32_t process, - uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeEnterKV( wstream, time, function, - process, source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - -} - -int handleLeave( void *userData, uint64_t time, uint32_t function, uint32_t process, - uint32_t source, OTF_KeyValueList *list ) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeLeaveKV( wstream, time, function, - process, source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleSendMsg( void *userData, uint64_t time, uint32_t sender, uint32_t receiver, - uint32_t group, uint32_t type, uint32_t length, uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeSendMsgKV( wstream, time, sender, receiver, group, - type, length, source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleRecvMsg( void *userData, uint64_t time, uint32_t recvProc, uint32_t sendProc, - uint32_t group, uint32_t type, uint32_t length, uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeRecvMsgKV( wstream, time, recvProc, sendProc, - group, type, length, source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleCounter( void *userData, uint64_t time, uint32_t process, uint32_t counter, - uint64_t value, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeCounterKV( wstream, time, process, counter, - value, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleCollectiveOperation( void *userData, uint64_t time, uint32_t process, uint32_t collective, - uint32_t procGroup, uint32_t rootProc, uint32_t sent, uint32_t received, uint64_t duration, - uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeCollectiveOperationKV( wstream, time, process, - collective, procGroup, rootProc, sent, received, duration, source, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleBeginCollectiveOperation( void *userData, uint64_t time, uint32_t process, uint32_t collOp, - uint64_t matchingId, uint32_t procGroup, uint32_t rootProc, uint64_t sent, uint64_t received, - uint32_t scltoken, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeBeginCollectiveOperationKV( wstream, time, process, - collOp, matchingId, procGroup, rootProc, sent, received, scltoken, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleEndCollectiveOperation( void *userData, uint64_t time, uint32_t process, - uint64_t matchingId, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeEndCollectiveOperationKV( wstream, time, process, - matchingId, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleEventComment( void *userData, uint64_t time, uint32_t process, const char *comment, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeEventCommentKV( wstream, time, process, comment, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleBeginProcess( void *userData, uint64_t time, uint32_t process, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeBeginProcessKV( wstream, time, process, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleEndProcess( void *userData, uint64_t time, uint32_t process, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeEndProcessKV( wstream, time, process, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleFileOperation( void *userData, uint64_t time, uint32_t fileid, uint32_t process, - uint64_t handleid, uint32_t operation, uint64_t bytes, uint64_t duration, uint32_t source, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeFileOperationKV( wstream, time, fileid, - process, handleid, operation, bytes, duration, source, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleBeginFileOperation( void *userData, uint64_t time, uint32_t process, - uint64_t matchingId, uint32_t scltoken, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeBeginFileOperationKV( wstream, time, process, - matchingId, scltoken, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleEndFileOperation( void *userData, uint64_t time, uint32_t process, - uint32_t fileid, uint64_t matchingId, uint64_t handleId, uint32_t operation, - uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeEndFileOperationKV( wstream, time, process, - fileid, matchingId, handleId, operation, bytes, scltoken, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleRMAPut( void *userData, uint64_t time, uint32_t process, uint32_t origin, - uint32_t target, uint32_t communicator, uint32_t tag, uint64_t bytes, - uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeRMAPutKV( wstream, time, process, origin, - target, communicator, tag, bytes, source, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleRMAPutRemoteEnd( void *userData, uint64_t time, uint32_t process, uint32_t origin, - uint32_t target, uint32_t communicator, uint32_t tag, uint64_t bytes, uint32_t source, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeRMAPutRemoteEndKV( wstream, time, process, origin, - target, communicator, tag, bytes, source, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleRMAGet( void *userData, uint64_t time, uint32_t process, uint32_t origin, - uint32_t target, uint32_t communicator, uint32_t tag, uint64_t bytes, - uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeRMAGetKV( wstream, time, process, origin, - target, communicator, tag, bytes, source, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleRMAEnd( void *userData, uint64_t time, uint32_t process, uint32_t remote, - uint32_t communicator, uint32_t tag, uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeRMAEndKV( wstream, time, process, remote, - communicator, tag, source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -/* snapshots */ -int handleSnapshotComment(void *userData, uint64_t time, uint32_t process, const char *comment, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeSnapshotCommentKV( wstream, time, process, - comment, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleEnterSnapshot(void *userData, uint64_t time, uint64_t originaltime, uint32_t function, - uint32_t process, uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeEnterSnapshotKV( wstream, time, originaltime, function, - process, source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - -} - -int handleSendSnapshot(void *userData, uint64_t time, uint64_t originaltime, - uint32_t sender, uint32_t receiver, uint32_t procGroup, uint32_t tag, - uint32_t length, uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeSendSnapshotKV( wstream, time, originaltime, sender, - receiver, procGroup, tag, length, source, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleOpenFileSnapshot(void *userData, uint64_t time, uint64_t originaltime, uint32_t fileid, - uint32_t process, uint64_t handleid, uint32_t source, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeOpenFileSnapshotKV( wstream, time, originaltime, fileid, - process, handleid, source, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - -} - -int handleBeginCollopSnapshot(void *userData, uint64_t time, uint64_t originaltime, uint32_t process, - uint32_t collOp, uint64_t matchingId, uint32_t procGroup, uint32_t rootProc, uint64_t sent, - uint64_t received, uint32_t scltoken, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeBeginCollopSnapshotKV( wstream, time, originaltime, process, - collOp, matchingId, procGroup, rootProc, sent, received, scltoken, list) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleBeginFileOpSnapshot(void *userData, uint64_t time, uint64_t originaltime, uint32_t process, - uint64_t matchingId, uint32_t scltoken, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeBeginFileOpSnapshotKV( wstream, time, originaltime, process, - matchingId, scltoken, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -/* statistics */ -int handleSummaryComment(void *userData, uint64_t time, uint32_t process, - const char *comment, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeSummaryCommentKV( wstream, time, process, comment, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleFunctionSummary(void *userData, uint64_t time, uint32_t function, - uint32_t process, uint64_t invocations, uint64_t exclTime, uint64_t inclTime, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeFunctionSummaryKV( wstream, time, function, process, - invocations, exclTime, inclTime, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleFunctionGroupSummary(void *userData, uint64_t time, uint32_t funcGroup, - uint32_t process, uint64_t invocations, uint64_t exclTime, uint64_t inclTime, - OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeFunctionGroupSummaryKV( wstream, time, funcGroup, - process, invocations, exclTime, inclTime, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleMessageSummary(void *userData, uint64_t time, uint32_t process, uint32_t peer, - uint32_t comm, uint32_t type, uint64_t sentNumber, uint64_t receivedNumber, - uint64_t sentBytes, uint64_t receivedBytes, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeMessageSummaryKV( wstream, time, process, - peer, comm, type, sentNumber, receivedNumber, sentBytes, receivedBytes, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleCollopSummary(void *userData, uint64_t time, uint32_t process, uint32_t comm, - uint32_t collective, uint64_t sentNumber, uint64_t receivedNumber, uint64_t sentBytes, - uint64_t receivedBytes, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeCollopSummaryKV( wstream, time, process, - comm, collective, sentNumber, receivedNumber, sentBytes, - receivedBytes, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleFileOperationSummary(void *userData, uint64_t time, uint32_t fileid, - uint32_t process, uint64_t nopen, uint64_t nclose, uint64_t nread, uint64_t nwrite, - uint64_t nseek, uint64_t bytesread, uint64_t byteswrite, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeFileOperationSummaryKV( wstream, time, - fileid, process, nopen, nclose, nread, nwrite, nseek, bytesread, - byteswrite, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleFileGroupOperationSummary(void *userData, uint64_t time, uint32_t groupid, - uint32_t process, uint64_t nopen, uint64_t nclose, uint64_t nread, uint64_t nwrite, - uint64_t nseek, uint64_t bytesread, uint64_t byteswrite, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeFileGroupOperationSummaryKV( wstream, time, - groupid, process, nopen, nclose, nread, nwrite, nseek, bytesread, - byteswrite, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -/* marker */ -int handleDefMarker( void *userData, uint32_t stream, uint32_t token, const char *name, - uint32_t type, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeDefMarkerKV( wstream, token, name, type, - list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleMarker( void *userData, uint64_t time, uint32_t process, uint32_t token, - const char *text, OTF_KeyValueList *list) { - - - OTF_WStream* wstream = (OTF_WStream*) userData; - - return ( 0 == OTF_WStream_writeMarkerKV( wstream, time, process, token, - text, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -/* unknown */ -int handleUnknownRecord (void *userData, uint64_t time, uint32_t process, const char *record) { - - - fprintf( stderr, "Error: unknown record >%s< at process 0x%x\n", record, process ); - - return OTF_RETURN_ABORT; -} - - -void setDefinitionHandlerArray( OTF_HandlerArray* handlers, OTF_WStream* wstream) { - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefinitionComment, - OTF_DEFINITIONCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFINITIONCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefTimerResolution, - OTF_DEFTIMERRESOLUTION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFTIMERRESOLUTION_RECORD); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefProcess, - OTF_DEFPROCESS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFPROCESS_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefProcessGroup, - OTF_DEFPROCESSGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFPROCESSGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefAttributeList, - OTF_DEFATTRLIST_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFATTRLIST_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefProcessOrGroupAttributes, - OTF_DEFPROCESSORGROUPATTR_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFPROCESSORGROUPATTR_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFunction, - OTF_DEFFUNCTION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFFUNCTION_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFunctionGroup, - OTF_DEFFUNCTIONGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFFUNCTIONGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCollectiveOperation, - OTF_DEFCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFCOLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCounter, - OTF_DEFCOUNTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFCOUNTER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCounterGroup, - OTF_DEFCOUNTERGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFCOUNTERGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefScl, - OTF_DEFSCL_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFSCL_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefSclFile, - OTF_DEFSCLFILE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFSCLFILE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefVersion, - OTF_DEFVERSION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFVERSION_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCreator, - OTF_DEFCREATOR_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFCREATOR_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFile, - OTF_DEFFILE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFFILE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFileGroup, - OTF_DEFFILEGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFFILEGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefKeyValue, - OTF_DEFKEYVALUE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFKEYVALUE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefMarker, - OTF_DEFMARKER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_DEFMARKER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefTimeRange, - OTF_DEFTIMERANGE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, - OTF_DEFTIMERANGE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCounterAssignments, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleUnknownRecord, - OTF_UNKNOWN_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_UNKNOWN_RECORD ); -} - -void setEventHandlerArray( OTF_HandlerArray* handlers, OTF_WStream* wstream) { - - /* events */ - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEventComment, - OTF_EVENTCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_EVENTCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleCounter, - OTF_COUNTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_COUNTER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleNoOp, - OTF_NOOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_NOOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEnter, - OTF_ENTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_ENTER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleCollectiveOperation, - OTF_COLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_COLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginCollectiveOperation, - OTF_BEGINCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_BEGINCOLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEndCollectiveOperation, - OTF_ENDCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_ENDCOLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRecvMsg, - OTF_RECEIVE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_RECEIVE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSendMsg, - OTF_SEND_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_SEND_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleLeave, - OTF_LEAVE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_LEAVE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginProcess, - OTF_BEGINPROCESS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_BEGINPROCESS_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEndProcess, - OTF_ENDPROCESS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_ENDPROCESS_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFileOperation, - OTF_FILEOPERATION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_FILEOPERATION_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginFileOperation, - OTF_BEGINFILEOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_BEGINFILEOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEndFileOperation, - OTF_ENDFILEOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_ENDFILEOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAPut, - OTF_RMAPUT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_RMAPUT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAPutRemoteEnd, - OTF_RMAPUTRE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_RMAPUTRE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAGet, - OTF_RMAGET_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_RMAGET_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAEnd, - OTF_RMAEND_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_RMAEND_RECORD ); - - /* snapshots */ - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSnapshotComment, - OTF_SNAPSHOTCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_SNAPSHOTCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEnterSnapshot, - OTF_ENTERSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_ENTERSNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSendSnapshot, - OTF_SENDSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_SENDSNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleOpenFileSnapshot, - OTF_OPENFILESNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_OPENFILESNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginCollopSnapshot, - OTF_BEGINCOLLOPSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_BEGINCOLLOPSNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginFileOpSnapshot, - OTF_BEGINFILEOPSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_BEGINFILEOPSNAPSHOT_RECORD ); - - /* statistics */ - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSummaryComment, - OTF_SUMMARYCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_SUMMARYCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFunctionSummary, - OTF_FUNCTIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_FUNCTIONSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFunctionGroupSummary, - OTF_FUNCTIONGROUPSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_FUNCTIONGROUPSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleMessageSummary, - OTF_MESSAGESUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_MESSAGESUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleCollopSummary, - OTF_COLLOPSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_COLLOPSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFileOperationSummary, - OTF_FILEOPERATIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_FILEOPERATIONSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFileGroupOperationSummary, - OTF_FILEGROUPOPERATIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_FILEGROUPOPERATIONSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleMarker, - OTF_MARKER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_MARKER_RECORD ); - - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleUnknownRecord, - OTF_UNKNOWN_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - (void*) wstream, OTF_UNKNOWN_RECORD ); - -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/handler.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/handler.h deleted file mode 100644 index dc78977c22..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/handler.h +++ /dev/null @@ -1,270 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Johannes Spazier -*/ - -#ifndef HANDLER_H -#define HANDLER_H - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include -#include - -#if defined(HAVE_SYS_SELECT_H) && HAVE_SYS_SELECT_H -# include -#else /* HAVE_SYS_SELECT_H */ -# include -# include -# include -#endif /* HAVE_SYS_SELECT_H */ - -#include - -#include "otf.h" - -/* data structures */ - -typedef struct { - int id; - int num_cpus; - int *cpus; -} OutStream; - -typedef struct { - int num_ostreams; - OutStream *ostreams; -} RankData; - -typedef struct { - double progress; - uint8_t is_alive; -} ProgressBuf; - -typedef struct { - MPI_Request request; - ProgressBuf value; - ProgressBuf buf; - int num_cpus; - double percent; -} ProgressInfo; - -typedef struct { - int my_rank; - int num_ranks; - int ranks_alive; - double tmp_progress; - MPI_Datatype buftype; -} GlobalData; - -/* function declarations */ - -double update_progress( ProgressInfo* info, GlobalData* data, int cur_ostream, int num_ostreams); - -int finish_everything( char *infile, char* outfile, ProgressInfo* info, - RankData* rank_data, int ret ); - -void setDefinitionHandlerArray( OTF_HandlerArray* handlers, OTF_WStream* wstream); - -void setEventHandlerArray( OTF_HandlerArray* handlers, OTF_WStream* wstream); - - -/* handlers */ - -/* definitions */ - -int handleDefinitionComment (void *userData, uint32_t stream, const char *comment, - OTF_KeyValueList *list); - -int handleDefTimerResolution (void *userData, uint32_t stream, uint64_t ticksPerSecond, - OTF_KeyValueList *list); - -int handleDefProcess (void *userData, uint32_t stream, uint32_t process, const char *name, - uint32_t parent, OTF_KeyValueList *list); - -int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, const char *name, - uint32_t numberOfProcs, const uint32_t *procs, OTF_KeyValueList *list); - -int handleDefAttributeList (void *userData, uint32_t stream, uint32_t attr_token, uint32_t num, - OTF_ATTR_TYPE *array, OTF_KeyValueList *list); - -int handleDefProcessOrGroupAttributes(void *userData, uint32_t stream, uint32_t proc_token, - uint32_t attr_token, OTF_KeyValueList *list); - -int handleDefFunction (void *userData, uint32_t stream, uint32_t func, const char *name, - uint32_t funcGroup, uint32_t source, OTF_KeyValueList *list); - -int handleDefFunctionGroup (void *userData, uint32_t stream, uint32_t funcGroup, - const char *name, OTF_KeyValueList *list); - -int handleDefCollectiveOperation (void *userData, uint32_t stream, uint32_t collOp, - const char *name, uint32_t type, OTF_KeyValueList *list); - -int handleDefCounter (void *userData, uint32_t stream, uint32_t counter, const char *name, - uint32_t properties, uint32_t counterGroup, const char *unit, OTF_KeyValueList *list); - -int handleDefCounterGroup (void *userData, uint32_t stream, uint32_t counterGroup, const char *name, - OTF_KeyValueList *list); - -int handleDefScl (void *userData, uint32_t stream, uint32_t source, uint32_t sourceFile, - uint32_t line, OTF_KeyValueList *list); - -int handleDefSclFile (void *userData, uint32_t stream, uint32_t sourceFile, const char *name, - OTF_KeyValueList *list); - -int handleDefCreator (void *userData, uint32_t stream, const char *creator, OTF_KeyValueList *list); - -int handleDefVersion (void *userData, uint32_t stream, uint8_t major, uint8_t minor, - uint8_t sub, const char *string); - -int handleDefFile (void *userData, uint32_t stream, uint32_t token, const char *name, - uint32_t group, OTF_KeyValueList *list); - -int handleDefFileGroup (void *userData, uint32_t stream, uint32_t token, const char *name, - OTF_KeyValueList *list); - -int handleDefKeyValue (void *userData, uint32_t stream, uint32_t token, OTF_Type type, - const char *name, const char *desc, OTF_KeyValueList *list); - -int handleDefTimeRange( void* userData, - uint32_t stream, - uint64_t counter_token, - uint64_t number_of_members, - OTF_KeyValueList* kvlist ); - -int handleDefCounterAssignments( void* userData, - uint32_t stream, - uint32_t counter_token, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* kvlist ); - -/* events */ - -int handleNoOp (void *userData, uint64_t time, uint32_t process, OTF_KeyValueList *list); - -int handleEnter (void *userData, uint64_t time, uint32_t function, uint32_t process, - uint32_t source, OTF_KeyValueList *list); - -int handleLeave( void *userData, uint64_t time, uint32_t function, uint32_t process, - uint32_t source, OTF_KeyValueList *list ); - -int handleSendMsg( void *userData, uint64_t time, uint32_t sender, uint32_t receiver, - uint32_t group, uint32_t type, uint32_t length, uint32_t source, OTF_KeyValueList *list); - -int handleRecvMsg( void *userData, uint64_t time, uint32_t recvProc, uint32_t sendProc, - uint32_t group, uint32_t type, uint32_t length, uint32_t source, OTF_KeyValueList *list); - -int handleCounter( void *userData, uint64_t time, uint32_t process, uint32_t counter, - uint64_t value, OTF_KeyValueList *list); - -int handleCollectiveOperation( void *userData, uint64_t time, uint32_t process, uint32_t collective, - uint32_t procGroup, uint32_t rootProc, uint32_t sent, uint32_t received, uint64_t duration, - uint32_t source, OTF_KeyValueList *list); - -int handleBeginCollectiveOperation( void *userData, uint64_t time, uint32_t process, uint32_t collOp, - uint64_t matchingId, uint32_t procGroup, uint32_t rootProc, uint64_t sent, uint64_t received, - uint32_t scltoken, OTF_KeyValueList *list); - -int handleEndCollectiveOperation( void *userData, uint64_t time, uint32_t process, - uint64_t matchingId, OTF_KeyValueList *list); - -int handleEventComment( void *userData, uint64_t time, uint32_t process, const char *comment, - OTF_KeyValueList *list); - -int handleBeginProcess( void *userData, uint64_t time, uint32_t process, OTF_KeyValueList *list); - -int handleEndProcess( void *userData, uint64_t time, uint32_t process, OTF_KeyValueList *list); - -int handleFileOperation( void *userData, uint64_t time, uint32_t fileid, uint32_t process, - uint64_t handleid, uint32_t operation, uint64_t bytes, uint64_t duration, uint32_t source, - OTF_KeyValueList *list); - -int handleBeginFileOperation( void *userData, uint64_t time, uint32_t process, - uint64_t matchingId, uint32_t scltoken, OTF_KeyValueList *list); - -int handleEndFileOperation( void *userData, uint64_t time, uint32_t process, - uint32_t fileid, uint64_t matchingId, uint64_t handleId, uint32_t operation, uint64_t bytes, - uint32_t scltoken, OTF_KeyValueList *list); - -int handleRMAPut( void *userData, uint64_t time, uint32_t process, uint32_t origin, - uint32_t target, uint32_t communicator, uint32_t tag, uint64_t bytes, - uint32_t source, OTF_KeyValueList *list); - -int handleRMAPutRemoteEnd( void *userData, uint64_t time, uint32_t process, uint32_t origin, - uint32_t target, uint32_t communicator, uint32_t tag, uint64_t bytes, uint32_t source, - OTF_KeyValueList *list); - -int handleRMAGet( void *userData, uint64_t time, uint32_t process, uint32_t origin, - uint32_t target, uint32_t communicator, uint32_t tag, uint64_t bytes, - uint32_t source, OTF_KeyValueList *list); - -int handleRMAEnd( void *userData, uint64_t time, uint32_t process, uint32_t remote, - uint32_t communicator, uint32_t tag, uint32_t source, OTF_KeyValueList *list); - -/* snapshots */ - -int handleSnapshotComment (void *userData, uint64_t time, uint32_t process, const char *comment, - OTF_KeyValueList *list); - -int handleEnterSnapshot (void *userData, uint64_t time, uint64_t originaltime, uint32_t function, - uint32_t process, uint32_t source, OTF_KeyValueList *list); - -int handleSendSnapshot (void *userData, uint64_t time, uint64_t originaltime, - uint32_t sender, uint32_t receiver, uint32_t procGroup, uint32_t tag, - uint32_t length, uint32_t source, OTF_KeyValueList *list); - -int handleOpenFileSnapshot (void *userData, uint64_t time, uint64_t originaltime, uint32_t fileid, - uint32_t process, uint64_t handleid, uint32_t source, OTF_KeyValueList *list); - -int handleBeginCollopSnapshot(void *userData, uint64_t time, uint64_t originaltime, uint32_t process, - uint32_t collOp, uint64_t matchingId, uint32_t procGroup, uint32_t rootProc, uint64_t sent, - uint64_t received, uint32_t scltoken, OTF_KeyValueList *list); - -int handleBeginFileOpSnapshot(void *userData, uint64_t time, uint64_t originaltime, uint32_t process, - uint64_t matchingId, uint32_t scltoken, OTF_KeyValueList *list); - -/* statistics */ - -int handleSummaryComment (void *userData, uint64_t time, uint32_t process, - const char *comment, OTF_KeyValueList *list); - -int handleFunctionSummary (void *userData, uint64_t time, uint32_t function, - uint32_t process, uint64_t invocations, uint64_t exclTime, uint64_t inclTime, - OTF_KeyValueList *list); - -int handleFunctionGroupSummary (void *userData, uint64_t time, uint32_t funcGroup, - uint32_t process, uint64_t invocations, uint64_t exclTime, uint64_t inclTime, - OTF_KeyValueList *list); - -int handleMessageSummary (void *userData, uint64_t time, uint32_t process, uint32_t peer, - uint32_t comm, uint32_t type, uint64_t sentNumber, uint64_t receivedNumber, - uint64_t sentBytes, uint64_t receivedBytes, OTF_KeyValueList *list); - -int handleCollopSummary (void *userData, uint64_t time, uint32_t process, uint32_t comm, - uint32_t collective, uint64_t sentNumber, uint64_t receivedNumber, uint64_t sentBytes, - uint64_t receivedBytes, OTF_KeyValueList *list); - -int handleFileOperationSummary (void *userData, uint64_t time, uint32_t fileid, - uint32_t process, uint64_t nopen, uint64_t nclose, uint64_t nread, uint64_t nwrite, - uint64_t nseek, uint64_t bytesread, uint64_t byteswrite, OTF_KeyValueList *list); - -int handleFileGroupOperationSummary (void *userData, uint64_t time, uint32_t groupid, - uint32_t process, uint64_t nopen, uint64_t nclose, uint64_t nread, uint64_t nwrite, - uint64_t nseek, uint64_t bytesread, uint64_t byteswrite, OTF_KeyValueList *list); - -/* marker */ - -int handleDefMarker( void *userData, uint32_t stream, uint32_t token, const char *name, - uint32_t type, OTF_KeyValueList *list); - -int handleMarker( void *userData, uint64_t time, uint32_t process, uint32_t token, - const char *text, OTF_KeyValueList *list); - -/* unknown */ - -int handleUnknownRecord (void *userData, uint64_t time, uint32_t process, const char *record); - -#endif /* HANDLER_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/otfmerge-mpi.c b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/otfmerge-mpi.c deleted file mode 100644 index 3e5535558b..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge-mpi/otfmerge-mpi.c +++ /dev/null @@ -1,988 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Johannes Spazier -*/ - -#include "handler.h" - -#define fprintf_root if(my_rank == 0) fprintf - -#define FINISH_EVERYTHING(ret) finish_everything( infile, outfile, info, &rank_data, ret ) - -#define SHOW_HELPTEXT if(my_rank == 0) { \ - int l = 0; while( helptext[l] ) { fprintf( stdout, "%s", helptext[l++] ); } } - -static const char* helptext[] = { -" otfmerge-mpi - an MPI implementation of otfmerge \n", -" \n", -" otfmerge-mpi [options] \n", -" \n", -" options: \n", -" -h, --help show this help message \n", -" -V show OTF version \n", -" -n set number of streams for output \n", -" set this to 0 for using one stream per process \n", -" default is 1 \n", -" -f set max number of filehandles available per rank \n", -" -o namestub of the output file (default 'out') \n", -" -rb set buffersize of the reader (for each rank) \n", -" -wb set buffersize of the writer (for each rank) \n", -" -stats cover statistics too \n", -" -snaps cover snapshots too \n", -" -z write compressed output \n", -" zlevel reaches from 0 to 9 where 0 is no \n", -" compression and 9 is the highest level \n", -" -l write long OTF format \n", -" -p show progress \n", -" \n", NULL }; - -int main(int argc, char **argv) { - - /* for all processes */ - int i, j; - int my_rank; - int num_ranks; - uint64_t ret_read; - int show_progress = 0; - int max_fhandles = 100; - char *outfile = NULL; - char *infile = NULL; - int rbufsize = 1024 * 1024; - int wbufsize = 1024 * 1024; - int format = OTF_WSTREAM_FORMAT_SHORT; - int read_stats = 0; - int read_snaps = 0; - OTF_FileCompression compression= 0; - RankData rank_data = { 0 ,NULL }; - ProgressInfo *info = NULL; - GlobalData global_data; - - /* only for root process (0) */ - int num_cpus; /* number of cpus in input otf-file */ - int *cpus; /* global array that contains all cpu-ids */ - int offset; - int *p; - int num_ostreams = 1; - char *outfile_otf = NULL; - FILE *master_file = NULL; - OutStream *ostreams = NULL; - - - /* progress related */ - uint64_t total_bytes = 0; - uint64_t cur_bytes = 0; - uint64_t cur_bytes_ges = 0; - uint64_t min, max, cur; - struct timeval tv; - - /* OTF related */ - OTF_Reader* reader = NULL; - OTF_WStream* wstream = NULL; - OTF_HandlerArray* handlers = NULL; - OTF_MasterControl* master = NULL; - OTF_FileManager* manager = NULL; - OTF_MapEntry* entry = NULL; - - MPI_Status status; - - int array_of_blocklengths[2]; - MPI_Aint array_of_displacements[2]; - MPI_Datatype array_of_types[2]; - - MPI_Aint first_var_address; - MPI_Aint second_var_address; - - - MPI_Init(&argc, &argv); - - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); - MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); - - /* store some important things in a global structure */ - global_data.my_rank = my_rank; - global_data.num_ranks = num_ranks; - global_data.ranks_alive = num_ranks - 1; - - /* argument handling */ - if ( 1 >= argc ) { - - SHOW_HELPTEXT - - MPI_Finalize(); - - return 0; - } - - for ( i = 1; i < argc; i++ ) { - - if ( ( 0 == strcmp( "-o", argv[i] ) ) && ( i+1 < argc ) ) { - - /* must be free'd at the end */ - outfile = OTF_stripFilename( argv[i+1] ); - ++i; - - } else if ( ( 0 == strcmp( "-n", argv[i] ) ) && ( i+1 < argc ) ) { - - num_ostreams = atoi( argv[i+1] ); - ++i; - - } else if ( 0 == strcmp( "-h", argv[i] ) || 0 == strcmp( "--help", argv[i] ) ) { - - SHOW_HELPTEXT - - return FINISH_EVERYTHING(0); - - } else if ( 0 == strcmp( "-V", argv[i] ) ) { - - fprintf_root( stdout, "%u.%u.%u \"%s\"\n", OTF_VERSION_MAJOR, OTF_VERSION_MINOR, - OTF_VERSION_SUB, OTF_VERSION_STRING ); - - return FINISH_EVERYTHING(0); - - } else if ( 0 == strcmp( "-p", argv[i] ) ) { - - show_progress = 1; - - } else if ( ( 0 == strcmp( "-f", argv[i] ) ) && ( i+1 < argc ) ) { - - max_fhandles = atoi( argv[i+1] ); - ++i; - - } else if ( ( 0 == strcmp( "-rb", argv[i] ) ) && ( i+1 < argc ) ) { - - rbufsize = atoi( argv[i+1] ); - ++i; - - } else if ( ( 0 == strcmp( "-wb", argv[i] ) ) && ( i+1 < argc ) ) { - - wbufsize = atoi( argv[i+1] ); - ++i; - - } else if ( ( 0 == strcmp( "-z", argv[i] ) ) && ( i+1 < argc ) ) { - - compression = atoi( argv[i+1] ); - ++i; - - } else if ( 0 == strcmp( "-l", argv[i] ) ) { - - format = OTF_WSTREAM_FORMAT_LONG; - - } else if ( 0 == strcmp( "-snaps", argv[i] ) ) { - - read_snaps = 1; - - } else if ( 0 == strcmp( "-stats", argv[i] ) ) { - - read_stats = 1; - - } else { - - if ( '-' != argv[i][0] ) { - - /* must be free'd at the end */ - infile = OTF_stripFilename( argv[i] ); - - } else { - - fprintf_root( stderr, "Error: unknown option: '%s'\n", argv[i] ); - - return FINISH_EVERYTHING(1); - - } - - } - - } - - if( ! outfile ) { - - outfile = strdup("out"); - - } else if( outfile[ strlen(outfile) -1 ] == '/' ) { - - strncat( outfile, "out", 3 ); - - } - - /* must be free'd at the end */ - outfile_otf = OTF_getFilename( outfile, 0, OTF_FILETYPE_MASTER, 0, NULL); - - /* check for neccessary options */ - if( infile == NULL ) { - - fprintf_root( stderr, "Error: no input file given.\n"); - - return FINISH_EVERYTHING(1); - - } - - if ( max_fhandles < 1 ) { - - fprintf_root( stderr, "Error: less than 1 filehandle is not permitted.\n" ); - - return FINISH_EVERYTHING(1); - - } - - if ( num_ostreams < 0 ) { - - fprintf_root( stderr, "Error: the number of streams must not be negative.\n" ); - - return FINISH_EVERYTHING(1); - - } - - if ( wbufsize < 0 || rbufsize < 0 ) { - - fprintf_root( stderr, "Error: buffersize must be greater or equal 0.\n" ); - - return FINISH_EVERYTHING(1); - - } - - if ( my_rank == 0 ) { - - /* read master of input file */ - manager = OTF_FileManager_open( max_fhandles ); - - if( NULL == manager ) { - fprintf( stderr, "Error: unable to initialize file manager.\n" ); - - return FINISH_EVERYTHING(1); - } - - master = OTF_MasterControl_new( manager ); - OTF_MasterControl_read( master, infile ); - - /* get the total number of processes in the otf master file */ - num_cpus = OTF_MasterControl_getrCount( master ); - - /* set one stream per process */ - if ( num_ostreams == 0 || num_ostreams > num_cpus ) { - num_ostreams = num_cpus; - } - - /* allocate memory */ - ostreams = (OutStream*) malloc( num_ostreams * sizeof(OutStream) ); - cpus = (int*) malloc( num_cpus * sizeof(int) ); - p = cpus; - - /* allocate memory for the info array */ - info = (ProgressInfo*) malloc( num_ranks * sizeof(ProgressInfo) ); - - /* fill the global cpus-array */ - i = 0; - while( 1 ) { - - entry = OTF_MasterControl_getEntryByIndex( master, i ); - - if( entry == NULL) { - break; - } - - for( j=0; (uint32_t)j < entry->n; j++) { - *p++ = entry->values[j]; - } - - i++; - - } - - /* open new master file for output */ - master_file = fopen( outfile_otf, "w"); - - if( NULL == master_file ) { - - fprintf( stderr, "Error: unable to open file \"%s\".\n", outfile_otf); - - free(cpus); - free( ostreams ); - - OTF_MasterControl_close( master ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - /* fill all ostreams with data and write the new master file */ - offset = 0; - for( i = 0; i < num_ostreams; i++ ) { - - ostreams[i].id = i + 1; - ostreams[i].num_cpus = ( num_cpus / num_ostreams) + - ( i < ( num_cpus % num_ostreams) ? 1 : 0 ); - - ostreams[i].cpus = (int*) malloc( ostreams[i].num_cpus * sizeof(int) ); - - /* append stream-id to new otf master file */ - fprintf( master_file, "%x:", ostreams[i].id); - - for(j = 0; j < ostreams[i].num_cpus; j++) { - - ostreams[i].cpus[j] = cpus[offset + j]; - - /* append cpu-id to master file */ - fprintf( master_file, "%x", ostreams[i].cpus[j]); - - if ( (j + 1) < ostreams[i].num_cpus) { - fprintf( master_file, ","); - } - - } - - fprintf( master_file, "\n"); - offset += ostreams[i].num_cpus; - - } - - /* close new master file */ - fclose(master_file); - - /* free global cpus-array, because it is not needed anymore */ - if( cpus ) { - free(cpus); - cpus = NULL; - } - - offset = 0; - /* send needed data to all ranks */ - for( i = (num_ranks - 1); i >= 0; i-- ) { - - /* get number of output-streams, rank i has to handle */ - rank_data.num_ostreams = ( num_ostreams / num_ranks) + - ( i < ( num_ostreams % num_ranks) ? 1 : 0 ); - - if( i > 0) { - - /* send number of output-streams to rank i */ - MPI_Ssend( &(rank_data.num_ostreams), 1, MPI_INT, i, 0, MPI_COMM_WORLD); - - } else { - - /* save number of output-streams for rank 0 in rank_data */ - rank_data.ostreams = (OutStream*) malloc( rank_data.num_ostreams * sizeof(OutStream) ); - - } - - info[i].num_cpus = 0; - /* go through all output-streams of rank i */ - for(j = 0; j < rank_data.num_ostreams; j++) { - - if( i > 0 ) { - - /* send data to rank */ - MPI_Ssend( &(ostreams[offset + j].id), 1, MPI_INT, i, 0, MPI_COMM_WORLD); - MPI_Ssend( &(ostreams[offset + j].num_cpus), 1, MPI_INT, i, 0, MPI_COMM_WORLD); - MPI_Ssend( ostreams[offset + j].cpus, ostreams[offset + j].num_cpus, MPI_INT, i, 0, MPI_COMM_WORLD); - - } else { - - /* save data for rank 0 */ - rank_data.ostreams[j].id = ostreams[offset + j].id; - rank_data.ostreams[j].num_cpus = ostreams[offset + j].num_cpus; - rank_data.ostreams[j].cpus = (int*) malloc(rank_data.ostreams[j].num_cpus * sizeof(int)); - memcpy(rank_data.ostreams[j].cpus, ostreams[offset + j].cpus, - rank_data.ostreams[j].num_cpus * (sizeof(int))); - - } - - info[i].num_cpus += ostreams[offset + j].num_cpus; - - } - - offset += rank_data.num_ostreams; - - } - - /* can be free'd here because all MPI_Ssends are finished --> they are synchron */ - for( i = 0; i < num_ostreams; i++ ) { - - if( ostreams[i].cpus ) { - free( ostreams[i].cpus ); - ostreams[i].cpus = NULL; - } - - } - - if( ostreams ) { - free( ostreams ); - ostreams = NULL; - } - - /* initialize the info array */ - for( i = 0; i < num_ranks; i++ ) { - - info[i].percent = (double) info[i].num_cpus / (double) num_cpus; - info[i].request = MPI_REQUEST_NULL; - info[i].value.progress = 0.0; - info[i].value.is_alive = 1; - - } - - /* close master */ - OTF_MasterControl_close( master ); - OTF_FileManager_close( manager ); - - } else { - - info = (ProgressInfo*) malloc( 1 * sizeof(ProgressInfo) ); - - info[0].request = MPI_REQUEST_NULL; - info[0].value.progress = 0.0; - info[0].value.is_alive = 1; - - /* receive number of output-streams for this rank */ - MPI_Recv( &(rank_data.num_ostreams), 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); - - /* allocate memory for output-streams */ - rank_data.ostreams = (OutStream*) malloc( rank_data.num_ostreams * sizeof(OutStream) ); - - /* go through all output streams */ - for( i = 0; i < rank_data.num_ostreams; i++ ) { - - /* receive id of output-stream and number of cpus in this stream */ - MPI_Recv( &(rank_data.ostreams[i].id), 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); - MPI_Recv( &(rank_data.ostreams[i].num_cpus), 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); - - /* allocate memory for cpus in stream */ - rank_data.ostreams[i].cpus = (int*) malloc( rank_data.ostreams[i].num_cpus * sizeof(int) ); - - /* receive all cpu-ids */ - MPI_Recv( rank_data.ostreams[i].cpus, rank_data.ostreams[i].num_cpus, MPI_INT, 0, 0, MPI_COMM_WORLD, &status ); - - } - - } - - if( show_progress ) { - - /* create new mpi datatype to transfer the progress */ - /* struct { - double progress; - uint8_t is_alive; - }; - */ - - array_of_blocklengths[0] = 1; - array_of_blocklengths[1] = 1; - - MPI_Address( &(info[0].value.progress), &first_var_address ); - MPI_Address( &(info[0].value.is_alive), &second_var_address); - - array_of_displacements[0] = (MPI_Aint) 0; - array_of_displacements[1] = second_var_address - first_var_address; - - array_of_types[0] = MPI_DOUBLE; - array_of_types[1] = MPI_BYTE; - - MPI_Type_struct( 2, array_of_blocklengths, array_of_displacements, - array_of_types, &(global_data.buftype) ); - - MPI_Type_commit( &(global_data.buftype) ); - - } - - - manager = OTF_FileManager_open( max_fhandles ); - if( NULL == manager ) { - - fprintf( stderr, "Error: unable to initialize file manager.\n" ); - - return FINISH_EVERYTHING(1); - - } - - /* the root process should read the definitions now */ - if( my_rank == 0 ) { - - wstream = OTF_WStream_open( outfile, 0, manager ); - - OTF_WStream_setBufferSizes( wstream, wbufsize ); - OTF_WStream_setCompression( wstream, compression ); - OTF_WStream_setFormat( wstream, format ); - - handlers = OTF_HandlerArray_open(); - - setDefinitionHandlerArray( handlers, wstream ); - - reader = OTF_Reader_open( infile, manager); - - if( reader == NULL) { - - fprintf( stderr, "Error: unable to open file %s.\n", infile ); - - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - } - - OTF_Reader_setBufferSizes( reader, rbufsize ); - - if( OTF_READ_ERROR == OTF_Reader_readDefinitions( reader, handlers ) ) { - - fprintf( stderr, "Error: while reading definitions from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - } - - if( OTF_READ_ERROR == OTF_Reader_readMarkers( reader, handlers ) ) { - - fprintf( stderr, "Error: while reading markers from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - } - - /* close everything */ - OTF_HandlerArray_close( handlers ); - OTF_Reader_close( reader ); - OTF_WStream_close( wstream ); - - } - - /*MPI_Barrier(MPI_COMM_WORLD);*/ - - for( i = 0; i < rank_data.num_ostreams; i++ ) { - - total_bytes = 0; - cur_bytes = 0; - cur_bytes_ges = 0; - - wstream = OTF_WStream_open( outfile, rank_data.ostreams[i].id, manager ); - - OTF_WStream_setBufferSizes( wstream, wbufsize ); - OTF_WStream_setCompression( wstream, compression ); - OTF_WStream_setFormat( wstream, format ); - - handlers = OTF_HandlerArray_open(); - - setEventHandlerArray( handlers, wstream ); - - reader = OTF_Reader_open( infile, manager); - if( reader == NULL) { - - fprintf_root( stderr, "Error: unable to open file %s.\n", infile ); - - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - } - - OTF_Reader_setBufferSizes( reader, rbufsize ); - - OTF_Reader_setProcessStatusAll ( reader, 0 ); - - for( j = 0; j < rank_data.ostreams[i].num_cpus; j++ ) { - - OTF_Reader_setProcessStatus( reader, rank_data.ostreams[i].cpus[j], 1 ); - - } - - if( show_progress ) { - - OTF_Reader_setRecordLimit( reader, 0 ); - - if( OTF_READ_ERROR == OTF_Reader_readEvents( reader, handlers ) ) { - - fprintf( stderr, "Error: while reading events from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - if( read_snaps ) { - - if( OTF_READ_ERROR == OTF_Reader_readSnapshots( reader, handlers ) ) { - - fprintf( stderr, "Error: while reading snaphots from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - } - - if( read_stats ) { - - if( OTF_READ_ERROR == OTF_Reader_readStatistics( reader, handlers ) ) { - - fprintf( stderr, "Error: while reading statistics from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - } - - OTF_Reader_eventBytesProgress( reader, &min, &cur, &max ); - /* (min - max) is erroneous because with small traces min == max --> division by zero */ - total_bytes += max; /* max - min */ - - if( read_snaps ) { - - OTF_Reader_snapshotBytesProgress( reader, &min, &cur, &max ); - total_bytes += max; /* max - min */ - - } - - if( read_stats ) { - - OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max ); - total_bytes += max; /* max - min */ - - } - - OTF_Reader_setRecordLimit( reader, 100000 ); - - } - - while( 0 != ( ret_read = OTF_Reader_readEvents( reader, handlers ) ) ) { - - if( ret_read == OTF_READ_ERROR) { - - fprintf( stderr, "Error: while reading events from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - if( show_progress ) { - - OTF_Reader_eventBytesProgress( reader, &min, &cur, &max ); - - cur_bytes = cur; /* cur - min */ - - /* calculate rank specific progress for the current stream */ - global_data.tmp_progress = (double) ( (double) cur_bytes / (double) total_bytes ); - - update_progress( info, &global_data, i, rank_data.num_ostreams ); - - } - - } - - cur_bytes_ges = cur_bytes; - - /* read snapshots */ - if( read_snaps ) { - - while( 0 != ( ret_read = OTF_Reader_readSnapshots( reader, handlers ) ) ) { - - if( ret_read == OTF_READ_ERROR) { - - fprintf( stderr, "Error: while reading snapshots from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - if( show_progress ) { - - OTF_Reader_snapshotBytesProgress( reader, &min, &cur, &max ); - - cur_bytes = cur; /* cur - min */ - - /* calculate rank specific progress for the current stream */ - global_data.tmp_progress = (double) ( (double) (cur_bytes + cur_bytes_ges) / (double) total_bytes ); - - update_progress( info, &global_data, i, rank_data.num_ostreams ); - } - - } - - } - - cur_bytes_ges += cur_bytes; - - /* read statistics */ - if( read_stats ) { - - while( 0 != ( ret_read = OTF_Reader_readStatistics( reader, handlers ) ) ) { - - if( ret_read == OTF_READ_ERROR) { - - fprintf( stderr, "Error: while reading statistics from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - if( show_progress ) { - - OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max ); - - cur_bytes = cur; /* cur - min */ - - /* calculate rank specific progress for the current stream */ - global_data.tmp_progress = (double) ( (double) (cur_bytes + cur_bytes_ges) / (double) total_bytes ); - - update_progress( info, &global_data, i, rank_data.num_ostreams ); - } - - } - - } - - /* read markers */ - while( 0 != ( ret_read = OTF_Reader_readMarkers( reader, handlers ) ) ) { - - if( ret_read == OTF_READ_ERROR) { - - fprintf( stderr, "Error: while reading markers from file %s\n", infile ); - - OTF_Reader_close( reader ); - OTF_HandlerArray_close( handlers ); - OTF_WStream_close( wstream ); - OTF_FileManager_close( manager ); - - return FINISH_EVERYTHING(1); - - } - - } - - /* close everything */ - OTF_HandlerArray_close( handlers ); - OTF_Reader_close( reader ); - OTF_WStream_close( wstream ); - - } - - /* wait for other processes to finish */ - if( show_progress ) { - - if( my_rank == 0 ) { - - /* set own progress to 100 % */ - global_data.tmp_progress = 1.0; - - /* check every 0.2 sec for new progress until all ranks have finished */ - while( 1 ) { - - /* update_progress() returns 0 if all ranks finished */ - if( ! update_progress( info, &global_data, 0, 1) ) { - break; - } - - /* sleep 0.2 s --> select is used because of portability */ - tv.tv_sec = 0; - tv.tv_usec = 200000; - select(0, NULL, NULL, NULL, &tv); - - } - - printf("%7.2f %% done\n", 100.0); - fflush( stdout ); - - /* clear all open requests in info array */ - for( i = 1; i < num_ranks; i++ ) { - - if( info[i].request != MPI_REQUEST_NULL ) { - - MPI_Cancel( &(info[i].request) ); - - } - - } - - } else { - - /* rank != 0 has finished and sends a last message to ranks 0 */ - - /* first wait until the previous msg was received by rank 0 */ - MPI_Wait( &(info[0].request), &status ); - /* fill buffer with valid values */ - info[0].value.progress = 100.0; - info[0].value.is_alive = 0; - /* send message and wait until the buffer is free for reuse */ - MPI_Isend( &(info[0].value.progress), 1, global_data.buftype, 0, 0, MPI_COMM_WORLD, &(info[0].request)); - MPI_Wait( &(info[0].request), &status ); - } - - } - - OTF_FileManager_close( manager ); - - /* clear everything and exit */ - return FINISH_EVERYTHING(0); -} - -double update_progress( ProgressInfo* info, GlobalData *data, int cur_ostream, int num_ostreams) { - - /* progress is a sum of all progresses sent by other ranks (proportionable) */ - static double progress = 0.0; - static int tmp = 0; - MPI_Status status; - int flag = 0; - int j; - char signs[2] = {' ','.'}; - - if( data->my_rank != 0 ) { - - /* check if previous msg was received by rank 0 already - --> if not, do nothing in this function; - else calculate new progress and send the result to root later on */ - MPI_Test( &(info[0].request), &flag, &status ); - - } - - /* calculate progress if necessary */ - if( data->my_rank == 0 || flag ) { - - info[0].value.progress = data->tmp_progress / (double)num_ostreams - + (double)cur_ostream * ( 1.0 / (double)num_ostreams ); - info[0].value.progress *= 100.0; - - } - - /* show progress */ - if( data->my_rank == 0) { - - /* set the roots progress as the global progress first (in the rigth proportion) */ - progress = info[0].value.progress * info[0].percent; - - /* listen to all ranks for new messages */ - for( j = 1; j < data->num_ranks; j++ ) { - - /* check if a new MPI_Irecv is necessary/if the previous msg was received */ - if ( MPI_REQUEST_NULL == info[j].request ) { - - /* irecv with derived datatype --> double progress, uint8_t is_alive */ - MPI_Irecv( &(info[j].buf.progress), 1, data->buftype, j, 0, MPI_COMM_WORLD, &(info[j].request) ); - - } - - /* test if current msg was received */ - MPI_Test( &(info[j].request), &flag, &status ); - - if( flag ) { - - /* got new values */ - - /* MPI_REQUEST_NULL indicates that a new MPI_Irecv is necessary */ - info[j].request = MPI_REQUEST_NULL; - /* the receive-buffer must be copied because its value is needed later on - and the buffer itself is locked by MPI_Irecv */ - info[j].value.progress = info[j].buf.progress; - - /* check if it was the last msg from rank j - --> the second field of the buffer (is_alive) would be 0 */ - if( ! info[j].buf.is_alive ) { - - /* decrease the number of still living ranks */ - data->ranks_alive--; - - } - - } - - /* add the progress of rank j proportionally to the global progress */ - progress += info[j].value.progress * info[j].percent; - - } - - /* print progress */ - printf("%7.2f %% %c\r", progress, signs[tmp]); - fflush(stdout); - - tmp ^= 1; - - } else { - - /* flag is only set if the send-buffer can be used again and a msg is necessary therefore */ - if( flag ) { - - /* send in synchronous mode --> this is because with MPI_Test we want to know if the root has - started a matching receive operation already and not only if we can reuse the send-buffer */ - MPI_Issend( &(info[0].value.progress), 1, data->buftype, 0, 0, MPI_COMM_WORLD, &(info[0].request)); - - } - - } - - /* returns 0 if all ranks have finished */ - return data->ranks_alive; -} - -int finish_everything( char *infile, char* outfile, ProgressInfo* info, RankData* data, int ret ) { - - int i; - - if( infile ) - free( infile ); - - if( outfile ) - free( outfile ); - - if( info ) - free( info ); - - if ( data->ostreams ) { - - for( i = 0; i < data->num_ostreams; i++ ) { - - if( data->ostreams[i].cpus ) - free( data->ostreams[i].cpus ); - - } - - free( data->ostreams ); - - } - - if( ret == 0 ) { - - MPI_Finalize(); - - } else { - - MPI_Abort( MPI_COMM_WORLD, ret ); - - } - - return ret; -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/Makefile.am index 7414e79adf..c0d08d9ff0 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/Makefile.am @@ -1,19 +1,18 @@ -INCLUDES = -I$(top_builddir)/otflib -I$(top_srcdir)/otflib - -if AMBUILDBINARIES -bin_PROGRAMS = \ - otfmerge +if AMHAVEMPI +SUBDIRS = . mpi endif -otfmerge_LDADD = $(top_builddir)/otflib/libotf.la -otfmerge_DEPENDENCIES = $(otfmerge_LDADD) -otfmerge_SOURCES = \ - handler.h \ - hash.h \ - otfmerge.c \ - handler.c \ - hash.c +if AMBUILDBINARIES +bin_PROGRAMS = otfmerge +endif + +OTFMERGESRCDIR = $(srcdir) +include $(srcdir)/Makefile.common + +INCLUDES = $(COMMONINCLUDES) + +otfmerge_CFLAGS = $(COMMONCFLAGS) +otfmerge_LDADD = $(COMMONLDADD) +otfmerge_DEPENDENCIES = $(COMMONDEPENDENCIES) +otfmerge_SOURCES = $(COMMONSOURCES) -EXTRA_DIST = \ - otfmerge_vs08.vcproj - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/Makefile.common b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/Makefile.common new file mode 100644 index 0000000000..e6cc29cc9a --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/Makefile.common @@ -0,0 +1,13 @@ +COMMONINCLUDES = \ + -I$(OTFMERGESRCDIR) \ + -I$(top_builddir)/otflib -I$(top_srcdir)/otflib \ + -I$(top_builddir)/otfauxlib -I$(top_srcdir)/otfauxlib + +COMMONCFLAGS = +COMMONLDADD = $(top_builddir)/otflib/libotf.la +COMMONDEPENDENCIES = $(top_builddir)/otflib/libotf.la +COMMONSOURCES = \ + $(OTFMERGESRCDIR)/handler.h \ + $(OTFMERGESRCDIR)/handler.c \ + $(OTFMERGESRCDIR)/otfmerge.c + diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.c b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.c index f22a5217eb..a73b837f3e 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.c +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.c @@ -1,858 +1,655 @@ /* This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Holger Brunst, Ronny Brendel, Thomas Kriebitzsch + Authors: Johannes Spazier */ -#include - -#include "OTF_Platform.h" -#include "OTF_Writer.h" - #include "handler.h" -#include "hash.h" -/* *** Definition handler *** ************************************* */ -int handleDefinitionComment( void* fcbx, uint32_t streamid, - const char* comment, OTF_KeyValueList *list ) { - +void setDefinitionHandlerArray( OTF_HandlerArray* handlers, + OTF_WStream* wstream) { - if( 0 != streamid ) { + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefinitionComment, + OTF_DEFINITIONCOMMENT_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFINITIONCOMMENT_RECORD ); - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefTimerResolution, + OTF_DEFTIMERRESOLUTION_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFTIMERRESOLUTION_RECORD); - assert( 0 ); + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefProcess, + OTF_DEFPROCESS_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFPROCESS_RECORD ); - return OTF_RETURN_ABORT; + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefProcessGroup, + OTF_DEFPROCESSGROUP_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFPROCESSGROUP_RECORD ); - } else { + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefAttributeList, + OTF_DEFATTRLIST_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFATTRLIST_RECORD ); - return ( 0 == OTF_Writer_writeDefinitionCommentKV( ((fcbT*) fcbx)->writer, streamid, - comment, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefProcessOrGroupAttributes, + OTF_DEFPROCESSORGROUPATTR_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFPROCESSORGROUPATTR_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefFunction, + OTF_DEFFUNCTION_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFFUNCTION_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefFunctionGroup, + OTF_DEFFUNCTIONGROUP_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFFUNCTIONGROUP_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefCollectiveOperation, + OTF_DEFCOLLOP_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFCOLLOP_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefCounter, + OTF_DEFCOUNTER_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFCOUNTER_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefCounterGroup, + OTF_DEFCOUNTERGROUP_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFCOUNTERGROUP_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefScl, + OTF_DEFSCL_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFSCL_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefSclFile, + OTF_DEFSCLFILE_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFSCLFILE_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefVersion, + OTF_DEFVERSION_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFVERSION_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefCreator, + OTF_DEFCREATOR_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFCREATOR_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefFile, + OTF_DEFFILE_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFFILE_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefFileGroup, + OTF_DEFFILEGROUP_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFFILEGROUP_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefKeyValue, + OTF_DEFKEYVALUE_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFKEYVALUE_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefMarker, + OTF_DEFMARKER_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFMARKER_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefTimeRange, + OTF_DEFTIMERANGE_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFTIMERANGE_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefCounterAssignments, + OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleDefProcessSubstitutes, + OTF_DEFPROCESSSUBSTITUTES_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_DEFPROCESSSUBSTITUTES_RECORD ); + + OTF_HandlerArray_setHandler( handlers, + (OTF_FunctionPointer*) handleUnknownRecord, + OTF_UNKNOWN_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handlers, + (void*) wstream, OTF_UNKNOWN_RECORD ); } - -int handleDefTimerResolution( void* fcbx, - uint32_t streamid, uint64_t ticksPerSecond, OTF_KeyValueList *list ) { - - - fcbT *fcb; - streaminfoT* si; - - if( 0 != streamid ) { +void setEventHandlerArray( OTF_HandlerArray* handlers, OTF_WStream* wstream) { - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - - fcb = ((fcbT*) fcbx); - si = hash_search( fcb->hash, streamid ); - - si->ticksPerSecond = ticksPerSecond; - - return ( 0 == OTF_Writer_writeDefTimerResolutionKV( fcb->writer, streamid, - ticksPerSecond, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } + /* no special handlers needed for processing events; use copy handlers */ + OTF_HandlerArray_getCopyHandler_stream( handlers, wstream ); } -int handleDefProcess( void* fcbx, uint32_t streamid, uint32_t deftoken, - const char* name, uint32_t parent, OTF_KeyValueList *list ) { +/* handlers */ +int handleDefinitionComment( void *userData, uint32_t stream, + const char *comment, OTF_KeyValueList *list ) { - if( 0 != streamid ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); + if( 0 != stream ) { - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefProcessKV( ((fcbT*) fcbx)->writer, streamid, - deftoken, name, parent, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefProcessGroup( void* fcbx, uint32_t streamid, uint32_t deftoken, - const char* name, uint32_t n, uint32_t* array, OTF_KeyValueList *list ) { - - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefProcessGroupKV( ((fcbT*) fcbx)->writer, streamid, deftoken, - name, n, array, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - -int handleDefAttributeList( void* fcbx, uint32_t stream, - uint32_t attr_token, uint32_t num, OTF_ATTR_TYPE* array, OTF_KeyValueList* list ) { - - - if( 0 != stream ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefAttributeListKV( ((fcbT*) fcbx)->writer, stream, attr_token, - num, array, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - -int handleDefProcessOrGroupAttributes( void* fcbx, uint32_t stream, - uint32_t proc_token, uint32_t attr_token, OTF_KeyValueList* list ) { - - - if( 0 != stream ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefProcessOrGroupAttributesKV( ((fcbT*) fcbx)->writer, stream, proc_token, - attr_token, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefFunction( void* fcbx, uint32_t streamid, - uint32_t deftoken, const char* name, - uint32_t group, uint32_t scltoken, OTF_KeyValueList *list ) { - - - fcbT *fcb; - streaminfoT* si; - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - fcb = ((fcbT*) fcbx); - si = hash_search( fcb->hash, streamid ); - - - /* allocate new memory if necessary */ - if ( si->nfunctions >= si->sfunctions ) - { - si->sfunctions = ( si->sfunctions > 0 ) ? ( 2* si->sfunctions ) : 20; - si->functions = (functionT*) realloc( si->functions, - si->sfunctions * sizeof(functionT) ); - - assert( NULL != si->functions ); - } - - /* insert all data about the function */ - si->functions[si->nfunctions].deftoken = deftoken; - si->functions[si->nfunctions].name = strdup( name ); - si->functions[si->nfunctions].group = group; - si->functions[si->nfunctions].scltoken = scltoken; - - ++(si->nfunctions); - - return ( 0 == OTF_Writer_writeDefFunctionKV( fcb->writer, streamid, deftoken, - name, group, scltoken, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefFunctionGroup( void* fcbx, uint32_t streamid, - uint32_t deftoken, const char* name, OTF_KeyValueList *list ) { - - - fcbT *fcb; - streaminfoT* si; - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - fcb = ((fcbT*) fcbx); - si = hash_search( fcb->hash, streamid ); - - /* allocate new memory if necessary */ - if ( si->nfunctiongroups >= si->sfunctiongroups ) - { - si->sfunctiongroups = ( si->sfunctiongroups > 0 ) - ? ( 2* si->sfunctiongroups ) : 10; - si->functiongroups = (functiongroupT*) realloc( si->functiongroups, - si->sfunctiongroups * sizeof(functiongroupT) ); - - assert( NULL != si->functiongroups ); - } - - /* insert all data about the function */ - si->functiongroups[si->nfunctiongroups].deftoken = deftoken; - si->functiongroups[si->nfunctiongroups].name = strdup( name ); - - ++(si->nfunctiongroups); - - return ( 0 == OTF_Writer_writeDefFunctionGroupKV( fcb->writer, streamid, - deftoken, name, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefCollectiveOperation( void* fcbx, uint32_t streamid, - uint32_t collOp, const char* name, uint32_t type, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefCollectiveOperationKV( ((fcbT*) fcbx)->writer, streamid, - collOp, name, type, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - } -} - - -int handleDefCounter( void* fcbx, uint32_t streamid, - uint32_t deftoken, const char* name, uint32_t properties, - uint32_t countergroup, const char* unit, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefCounterKV( ((fcbT*) fcbx)->writer, streamid, - deftoken, name, properties, countergroup, unit, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefCounterGroup( void* fcbx, uint32_t streamid, - uint32_t deftoken, const char* name, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefCounterGroupKV( ((fcbT*) fcbx)->writer, streamid, - deftoken, name, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefScl( void* fcbx, uint32_t streamid, - uint32_t deftoken, uint32_t sclfile, uint32_t sclline, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefSclKV( ((fcbT*) fcbx)->writer, streamid, deftoken, - sclfile, sclline, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefSclFile( void* fcbx, uint32_t streamid, - uint32_t deftoken, const char* filename, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefSclFileKV( ((fcbT*) fcbx)->writer, streamid, - deftoken, filename, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefCreator( void* fcbx, uint32_t streamid, - const char* creator, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefCreatorKV( ((fcbT*) fcbx)->writer, streamid, - creator, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefFile( void* fcbx, uint32_t streamid, uint32_t token, - const char* name, uint32_t group, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefFileKV( ((fcbT*) fcbx)->writer, - streamid, token, name, group, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - - -int handleDefFileGroup( void* fcbx, uint32_t streamid, - uint32_t token, const char* name, OTF_KeyValueList *list ) { - - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefFileGroupKV( ((fcbT*) fcbx)->writer, - streamid, token, name, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - -int handleDefKeyValue( void *fcbx, uint32_t streamid, uint32_t token, - OTF_Type type, const char *name, const char *desc, OTF_KeyValueList *list) { - - if( 0 != streamid ) { - - fprintf( stderr, "ERROR: cannot merge traces with local definitions. aborting\n" ); - - assert( 0 ); - - return OTF_RETURN_ABORT; - - } else { - - return ( 0 == OTF_Writer_writeDefKeyValueKV( ((fcbT*) fcbx)->writer, - streamid, token, type, name, desc, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - - } -} - -int handleDefTimeRange( void* fcbx, - uint32_t streamid, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* list ) { - - - if ( 0 != streamid ) { - - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); return OTF_RETURN_ABORT; } else { - return ( 0 == OTF_Writer_writeDefTimeRange( - ((fcbT*) fcbx)->writer, - streamid, - minTime, - maxTime, - list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return ( 0 == OTF_WStream_writeDefinitionCommentKV( wstream, comment, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + } } -int handleDefCounterAssignments( void* fcbx, - uint32_t streamid, - uint32_t counter_token, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* list ) { +int handleDefTimerResolution( void *userData, uint32_t stream, + uint64_t ticksPerSecond, OTF_KeyValueList *list ) { - if ( 0 != streamid ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - fprintf( stderr, "Error: cannot merge traces with local definitions. Aborting.\n" ); + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); return OTF_RETURN_ABORT; } else { - return ( 0 == OTF_Writer_writeDefCounterAssignments( - ((fcbT*) fcbx)->writer, - streamid, - counter_token, - number_of_members, - procs_or_groups, - list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return ( 0 == OTF_WStream_writeDefTimerResolutionKV( wstream, + ticksPerSecond, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + } } -/* *** Event handler *** ****************************************** */ +int handleDefProcess( void *userData, uint32_t stream, uint32_t process, + const char *name, uint32_t parent, OTF_KeyValueList *list ) { -int handleNoOp( void* fcb, uint64_t time, - uint32_t process, OTF_KeyValueList *list ) { - - return ( 0 == OTF_Writer_writeNoOpKV( ((fcbT*) fcb)->writer, time, process, - list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + OTF_WStream* wstream = (OTF_WStream*) userData; + + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefProcessKV( wstream, process, name, + parent, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleEventComment( void* fcb, uint64_t time, uint32_t process, - const char* comment, OTF_KeyValueList *list ) { +int handleDefProcessGroup( void *userData, uint32_t stream, uint32_t procGroup, + const char *name, uint32_t numberOfProcs, const uint32_t *procs, + OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeEventCommentKV( ((fcbT*) fcb)->writer, time, process, - comment, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefProcessGroupKV( wstream, procGroup, + name, numberOfProcs, procs, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleCounter( void* fcb, uint64_t time, uint32_t process, - uint32_t counter_token, uint64_t value, OTF_KeyValueList *list ) { +int handleDefAttributeList( void *userData, uint32_t stream, + uint32_t attr_token, uint32_t num, OTF_ATTR_TYPE *array, + OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeCounterKV( ((fcbT*) fcb)->writer, time, - process, counter_token, value, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefAttributeListKV( wstream, attr_token, + num, array, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleEnter( void* fcb, uint64_t time, uint32_t statetoken, - uint32_t cpuid, uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefProcessOrGroupAttributes( void *userData, uint32_t stream, + uint32_t proc_token, uint32_t attr_token, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeEnterKV( ((fcbT*) fcb)->writer, time, - statetoken, cpuid, scltoken, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefProcessOrGroupAttributesKV( wstream, + proc_token, attr_token, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleCollectiveOperation( void* fcb, uint64_t time, - uint32_t process, uint32_t functionToken, uint32_t communicator, - uint32_t rootprocess, uint32_t sent, uint32_t received, - uint64_t duration, uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefFunction( void *userData, uint32_t stream, uint32_t func, + const char *name, uint32_t funcGroup, uint32_t source, + OTF_KeyValueList *list) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeCollectiveOperationKV( ((fcbT*) fcb)->writer, time, - process, functionToken, communicator, rootprocess, - sent, received, duration, scltoken, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefFunctionKV( wstream, func, name, + funcGroup, source, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleBeginCollectiveOperation( void* fcb, uint64_t time, uint32_t process, - uint32_t collOp, uint64_t matchingId, uint32_t procGroup, - uint32_t rootprocess, uint64_t sent, uint64_t received, - uint32_t scltoken, OTF_KeyValueList *list ) -{ - - return (0 == OTF_Writer_writeBeginCollectiveOperationKV( - ((fcbT*) fcb)->writer, time, process, collOp, - matchingId, procGroup, rootprocess, sent, received, - scltoken, list )) ? OTF_RETURN_ABORT : OTF_RETURN_OK; +int handleDefFunctionGroup( void *userData, uint32_t stream, uint32_t funcGroup, + const char *name, OTF_KeyValueList *list ) { + + OTF_WStream* wstream = (OTF_WStream*) userData; + + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefFunctionGroupKV( wstream, funcGroup, + name, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleEndCollectiveOperation( void* fcb, uint64_t time, uint32_t process, - uint64_t matchingId, OTF_KeyValueList *list ) -{ - return (0 == OTF_Writer_writeEndCollectiveOperationKV( - ((fcbT*) fcb)->writer, time, process, matchingId, list)) ? - OTF_RETURN_ABORT : OTF_RETURN_OK; +int handleDefCollectiveOperation( void *userData, uint32_t stream, + uint32_t collOp, const char *name, uint32_t type, + OTF_KeyValueList *list ) { + + OTF_WStream* wstream = (OTF_WStream*) userData; + + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefCollectiveOperationKV( wstream, + collOp, name, type, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleRecvMsg( void* fcb, uint64_t time, - uint32_t receiver, uint32_t sender, uint32_t communicator, - uint32_t msgtype, uint32_t msglength, uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefCounter( void *userData, uint32_t stream, uint32_t counter, + const char *name, uint32_t properties, uint32_t counterGroup, + const char *unit, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeRecvMsgKV( ((fcbT*) fcb)->writer, time, receiver, - sender, communicator, msgtype, msglength, scltoken, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefCounterKV( wstream, counter, name, + properties, counterGroup, unit, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleSendMsg( void* fcb, uint64_t time, - uint32_t sender, uint32_t receiver, uint32_t communicator, - uint32_t msgtype, uint32_t msglength, uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefCounterGroup( void *userData, uint32_t stream, + uint32_t counterGroup, const char *name, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeSendMsgKV( ((fcbT*) fcb)->writer, time, sender, - receiver, communicator, msgtype, msglength, scltoken, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefCounterGroupKV( wstream, counterGroup, + name, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleLeave( void* fcb, uint64_t time, uint32_t statetoken, - uint32_t cpuid, uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefScl( void *userData, uint32_t stream, uint32_t source, + uint32_t sourceFile, uint32_t line, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeLeaveKV( ((fcbT*) fcb)->writer, time, statetoken, - cpuid, scltoken, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefSclKV( wstream, source, sourceFile, + line, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleBeginProcess( void* fcb, uint64_t time, - uint32_t cpuid, OTF_KeyValueList *list ) { +int handleDefSclFile( void *userData, uint32_t stream, uint32_t sourceFile, + const char *name, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeBeginProcessKV( ((fcbT*) fcb)->writer, time, cpuid, list ) ); -} -int handleEndProcess( void* fcb, uint64_t time, - uint32_t cpuid, OTF_KeyValueList *list ) { + if( 0 != stream ) { + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); - return ( 0 == OTF_Writer_writeEndProcessKV( ((fcbT*) fcb)->writer, time, cpuid, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefSclFileKV( wstream, sourceFile, + name, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleFileOperation( void* fcb, uint64_t time, uint32_t fileid, - uint32_t process, uint64_t handleid, uint32_t operation, uint64_t bytes, - uint64_t duration, uint32_t source, OTF_KeyValueList *list ) { +int handleDefCreator( void *userData, uint32_t stream, const char *creator, + OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeFileOperationKV( ((fcbT*) fcb)->writer, time, fileid, - process, handleid, operation, bytes, duration, source, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefCreatorKV( wstream, creator, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleBeginFileOperation( void* fcb, uint64_t time, uint32_t process, - uint64_t matchingId, uint32_t scltoken, OTF_KeyValueList *list ) -{ - return (0 == OTF_Writer_writeBeginFileOperationKV( ((fcbT*) fcb)->writer, - time, process, matchingId, scltoken, list )) ? OTF_RETURN_ABORT - : OTF_RETURN_OK; +int handleDefVersion( void *userData, uint32_t stream, uint8_t major, + uint8_t minor, uint8_t sub, const char *string ) { + + OTF_WStream* wstream = (OTF_WStream*) userData; + + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeOtfVersion( + wstream ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleEndFileOperation( void* fcb, uint64_t time, - uint32_t process, uint32_t fileid, uint64_t matchingId, uint64_t handleId, - uint32_t operation, uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *list ) -{ - return (0 == OTF_Writer_writeEndFileOperationKV( ((fcbT*) fcb)->writer, - time, process, fileid, matchingId, handleId, operation, bytes, - scltoken, list )) ? OTF_RETURN_ABORT : OTF_RETURN_OK; +int handleDefFile( void *userData, uint32_t stream, uint32_t token, + const char *name, uint32_t group, OTF_KeyValueList *list ) { + + OTF_WStream* wstream = (OTF_WStream*) userData; + + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefFileKV( wstream, token, name, + group, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleRMAPut( void* fcb, uint64_t time, uint32_t process, - uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag, - uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefFileGroup( void *userData, uint32_t stream, uint32_t token, + const char *name, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeRMAPutKV( ((fcbT*) fcb)->writer, time, - process, origin, target, communicator, tag, bytes, scltoken, list ) - ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefFileGroupKV( wstream, token, name, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleRMAPutRemoteEnd( void* fcb, uint64_t time, - uint32_t process, uint32_t origin, uint32_t target, - uint32_t communicator, uint32_t tag, uint64_t bytes, - uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefKeyValue( void *userData, uint32_t stream, uint32_t token, + OTF_Type type, const char *name, const char *desc, + OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeRMAPutRemoteEndKV( ((fcbT*) fcb)->writer, - time, process, origin, target, communicator, tag, bytes, scltoken, list ) - ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefKeyValueKV( wstream, token, type, + name, desc, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleRMAGet( void* fcb, uint64_t time, uint32_t process, - uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag, - uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *list) { +int handleDefTimeRange( void* userData, uint32_t stream, uint64_t minTime, + uint64_t maxTime, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeRMAGetKV( ((fcbT*) fcb)->writer, time, - process, origin, target, communicator, tag, bytes, scltoken, list ) - ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefTimeRange( wstream, minTime, + maxTime, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + + } } -int handleRMAEnd( void* fcb, uint64_t time, uint32_t process, uint32_t remote, - uint32_t communicator, uint32_t tag, uint32_t scltoken, OTF_KeyValueList *list ) { +int handleDefCounterAssignments( void* userData, uint32_t stream, + uint32_t counter_token, uint32_t number_of_members, + const uint32_t* procs_or_groups, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeRMAEndKV( ((fcbT*) fcb)->writer, time, - process, remote, communicator, tag, scltoken, list ) - ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + if( 0 != stream ) { + + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); + + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefCounterAssignments( wstream, + counter_token, number_of_members, procs_or_groups, + list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + } } -/* *** snapshot handlers ********************************************** */ +int handleDefProcessSubstitutes( void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; -int handleSnapshotComment( void *fcb, uint64_t time, - uint32_t process, const char* comment, OTF_KeyValueList *list ) { + if( 0 != stream ) { + fprintf( stderr, + "Error: cannot merge traces with local definitions. " + "Aborting.\n" ); - return ( 0 == OTF_Writer_writeSnapshotCommentKV( ((fcbT*) fcb)->writer, time, - process, comment, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return OTF_RETURN_ABORT; + + } else { + + return ( 0 == OTF_WStream_writeDefProcessSubstitutes( wstream, + representative, numberOfProcs, procs, + list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + } } -int handleEnterSnapshot( void *fcb, uint64_t time, - uint64_t originaltime, uint32_t function, uint32_t process, - uint32_t source, OTF_KeyValueList *list ) { +int handleDefMarker( void *userData, uint32_t stream, uint32_t token, + const char *name, uint32_t type, OTF_KeyValueList *list ) { + OTF_WStream* wstream = (OTF_WStream*) userData; - return ( 0 == OTF_Writer_writeEnterSnapshotKV( ((fcbT*) fcb)->writer, - time, originaltime, function, process, source, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; + return ( 0 == OTF_WStream_writeDefMarkerKV( wstream, token, name, type, + list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; } -int handleSendSnapshot( void *fcb, uint64_t time, uint64_t originaltime, - uint32_t sender, uint32_t receiver, uint32_t procGroup, uint32_t tag, - uint32_t length, uint32_t source, OTF_KeyValueList *list ) { - +int handleUnknownRecord( void *userData, uint64_t time, uint32_t process, + const char *record) { - return ( 0 == OTF_Writer_writeSendSnapshotKV( ((fcbT*) fcb)->writer, - time, originaltime, sender, receiver, procGroup, tag, length, source, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; + fprintf( stderr, "Error: unknown record >%s< at process 0x%x. Aborting.\n", + record, process ); + + return OTF_RETURN_ABORT; } - - -int handleOpenFileSnapshot( void* fcb, uint64_t time, - uint64_t originaltime, uint32_t fileid, uint32_t process, - uint64_t handleid, uint32_t source, OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeOpenFileSnapshotKV( ((fcbT*) fcb)->writer, time, - originaltime, fileid, process, handleid, source, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -int handleBeginCollopSnapshot( void *fcb, uint64_t time, - uint64_t originaltime, uint32_t process, uint32_t collOp, - uint64_t matchingId, uint32_t procGroup, uint32_t rootProc, - uint64_t sent, uint64_t received, uint32_t scltoken, OTF_KeyValueList *list) { - - - return ( 0 == OTF_Writer_writeBeginCollopSnapshotKV( ((fcbT*) fcb)->writer, time, - originaltime, process, collOp, matchingId, procGroup, rootProc, sent, received, - scltoken, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; - -} - -int handleBeginFileOpSnapshot( void *fcb, uint64_t time, - uint64_t originaltime, uint32_t process, uint64_t matchingId, - uint32_t scltoken, OTF_KeyValueList *list) { - - - return ( 0 == OTF_Writer_writeBeginFileOpSnapshotKV( ((fcbT*) fcb)->writer, time, - originaltime, process, matchingId, scltoken, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; - -} - - -/* *** Summary handlers *** ****************************************** */ - - -int handleSummaryComment( void* fcb, uint64_t time, - uint32_t process, const char* comment, OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeSummaryCommentKV( ((fcbT*) fcb)->writer, time, - process, comment, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK;; -} - - -int handleFunctionSummary( void* fcb, - uint64_t time, uint32_t function, uint32_t process, - uint64_t count, uint64_t excltime, uint64_t incltime, OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeFunctionSummaryKV( ((fcbT*) fcb)->writer, - time, function, process, count, excltime, incltime, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -int handleFunctionGroupSummary( void* fcb, - uint64_t time, uint32_t functiongroup, uint32_t process, - uint64_t count, uint64_t excltime, uint64_t incltime, OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeFunctionGroupSummaryKV( ((fcbT*) fcb)->writer, - time, functiongroup, process, count, excltime, incltime, list ) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -int handleMessageSummary( void* fcb, - uint64_t time, uint32_t process, uint32_t peer, - uint32_t comm, uint32_t tag, uint64_t number_sent, - uint64_t number_recvd, uint64_t bytes_sent, uint64_t bytes_recved, - OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeMessageSummaryKV( ((fcbT*) fcb)->writer, - time, process, peer, comm, tag, number_sent, number_recvd, bytes_sent, - bytes_recved, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleCollopSummary( void* fcb, - uint64_t time, uint32_t process, uint32_t comm, - uint32_t collective, uint64_t number_sent, uint64_t number_recvd, - uint64_t bytes_sent, uint64_t bytes_recved, OTF_KeyValueList *list ) { - - return ( 0 == OTF_Writer_writeCollopSummaryKV( ((fcbT*) fcb)->writer, - time, process, comm, collective, number_sent, number_recvd, bytes_sent, - bytes_recved, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -int handleFileOperationSummary( void* fcb, uint64_t time, uint32_t fileid, - uint32_t process, uint64_t nopen, uint64_t nclose, uint64_t nread, - uint64_t nwrite, uint64_t nseek, uint64_t bytesread, uint64_t byteswrite, - OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeFileOperationSummaryKV( ((fcbT*) fcb)->writer, - time, fileid, process, nopen, nclose, nread, nwrite, nseek, - bytesread, byteswrite, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleFileGroupOperationSummary( void* fcb, uint64_t time, - uint32_t groupid, uint32_t process, uint64_t nopen, uint64_t nclose, - uint64_t nread, uint64_t nwrite, uint64_t nseek, uint64_t bytesread, - uint64_t byteswrite, OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeFileGroupOperationSummaryKV( ((fcbT*) fcb)->writer, - time, groupid, process, nopen, nclose, nread, nwrite, nseek, - bytesread, byteswrite, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -int handleDefMarker( void *fcb, uint32_t stream, uint32_t token, - const char* name, uint32_t type, OTF_KeyValueList *list ) { - - /* even if marker definitions could be read from many streams, they are - written to stream 0 forcedly, because this is where all markers belong. */ - stream= 0; - - return ( 0 == OTF_Writer_writeDefMarkerKV( ((fcbT*) fcb)->writer, - stream, token, name, type, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK;; -} - - -int handleMarker( void *fcb, uint64_t time, uint32_t process, - uint32_t token, const char* text, OTF_KeyValueList *list ) { - - - return ( 0 == OTF_Writer_writeMarkerKV( ((fcbT*) fcb)->writer, - time, process, token, text, list ) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - - -int handleUnknown( void* fcb, uint64_t time, uint32_t process, const char* record ) { - -#ifdef OTF_VERBOSE - - if ( (uint64_t) -1 != time ) { - - printf( " unknown record >%s< at process 0x%x time 0x%llu\n", - record, process, (unsigned long long) time ); - - } else { - - printf( " unknown record >%s< at stream 0x%x\n", - record, process ); - } - -#endif /* OTF_VERBOSE */ - - return OTF_RETURN_ABORT; -} - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.h index b07c5650c3..0c426112e2 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.h +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/handler.h @@ -1,281 +1,163 @@ /* This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Holger Brunst, Ronny Brendel, Thomas Kriebitzsch + Authors: Johannes Spazier */ #ifndef HANDLER_H #define HANDLER_H #ifdef HAVE_CONFIG_H -#include "config.h" +# include "config.h" #endif +#include +#include -#include "OTF_inttypes.h" -#include "OTF_Writer.h" +#if defined(HAVE_SYS_SELECT_H) && HAVE_SYS_SELECT_H +# include +#else /* HAVE_SYS_SELECT_H */ +# include +# include +# include +#endif /* HAVE_SYS_SELECT_H */ +#ifdef OTFMERGE_MPI +# include "mpi.h" +#endif /* OTFMERGE_MPI */ + +#include "otf.h" + + +/* data structures */ typedef struct { - - uint32_t deftoken; - char *name; - uint32_t group; - uint32_t scltoken; -}functionT; + int id; + int num_cpus; + int *cpus; +} OutStream; typedef struct { - - uint32_t deftoken; - char* name; -}functiongroupT; + int num_ostreams; + OutStream *ostreams; +} RankData; typedef struct { - - uint32_t streamid; - - uint64_t ticksPerSecond; - - functionT *functions; - int nfunctions; /* number of functions in the functioninfo-array */ - int sfunctions; /* size of the functioninfo-array */ - functiongroupT *functiongroups; - int nfunctiongroups; /* number of groups in the functiongroup-array */ - int sfunctiongroups; /* size of the functiongroups-array */ - -}streaminfoT; - -typedef struct hashtabS -{ - streaminfoT entry; - - int entryvecsize; - streaminfoT *p_entryvec; -}hashtabT; + double progress; + uint8_t is_alive; +} ProgressBuf; typedef struct { +#ifdef OTFMERGE_MPI + MPI_Request request; +#endif /* OTFMERGE_MPI */ + ProgressBuf value; + ProgressBuf buf; + int num_cpus; + double percent; +} ProgressInfo; - OTF_Writer *writer; - hashtabT *hash; - int nstreaminfos; /* number of streams in the streaminfos-array */ - - int error; - -}fcbT; - -/* *** Definition handler *** ************************************* */ - -int handleDefinitionComment( void* firsthandlerarg, uint32_t streamid, - const char* comment, OTF_KeyValueList *list ); - -int handleDefTimerResolution( void* firsthandlerarg, - uint32_t streamid, uint64_t ticksPerSecond, OTF_KeyValueList *list ); - -int handleDefProcess( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, const char* name, uint32_t paren, OTF_KeyValueList *list ); - -int handleDefProcessGroup( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, const char* name, uint32_t n, uint32_t* array, OTF_KeyValueList *list ); - -int handleDefAttributeList( void* userData, uint32_t stream, - uint32_t attr_token, uint32_t num, OTF_ATTR_TYPE* array, OTF_KeyValueList* list ); - -int handleDefProcessOrGroupAttributes( void* userData, uint32_t stream, - uint32_t proc_token, uint32_t attr_token, OTF_KeyValueList* list ); - -int handleDefFunction( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, const char* name, uint32_t group, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleDefFunctionGroup( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, const char* name, OTF_KeyValueList *list ); - -int handleDefCollectiveOperation( void* firsthandlerarg, uint32_t streamid, - uint32_t collOp, const char* name, uint32_t type, OTF_KeyValueList *list ); - -int handleDefCounter( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, const char* name, uint32_t properties, - uint32_t countergroup, const char* unit, OTF_KeyValueList *list ); - -int handleDefCounterGroup( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, const char* name, OTF_KeyValueList *list ); - -int handleDefScl( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, uint32_t sclfile, uint32_t sclline, OTF_KeyValueList *list ); - -int handleDefSclFile( void* firsthandlerarg, uint32_t streamid, - uint32_t deftoken, const char* filename, OTF_KeyValueList *list ); - -int handleDefCreator( void* firsthandlerarg, uint32_t streamid, - const char* creator, OTF_KeyValueList *list ); - -int handleDefFile( void* firsthandlerarg, uint32_t stream, uint32_t token, - const char* name, uint32_t group, OTF_KeyValueList *list ); - -int handleDefFileGroup( void* firsthandlerarg, uint32_t stream, - uint32_t token, const char* name, OTF_KeyValueList *list ); - -int handleDefKeyValue( void *fcbx, uint32_t streamid, uint32_t token, - OTF_Type type, const char *name, const char *desc, OTF_KeyValueList *list); - -int handleDefTimeRange( void* fcbx, - uint32_t streamid, - uint64_t minTime, - uint64_t maxTime, - OTF_KeyValueList* list ); - -int handleDefCounterAssignments( void* fcbx, - uint32_t streamid, - uint32_t counter_token, - uint32_t number_of_members, - const uint32_t* procs_or_groups, - OTF_KeyValueList* list ); - -/* *** Event handler *** ****************************************** */ - -int handleNoOp( void* firsthandlerarg, uint64_t time, - uint32_t process, OTF_KeyValueList *list ); - -int handleEventComment( void* firsthandlerarg, uint64_t time, - uint32_t process, const char* comment, OTF_KeyValueList *list ); - -int handleCounter( void* firsthandlerarg, uint64_t time, - uint32_t process, uint32_t counter_token, uint64_t value, OTF_KeyValueList *list ); - -int handleEnter( void* firsthandlerarg, uint64_t time, - uint32_t statetoken, uint32_t cpuid, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleCollectiveOperation( void* firsthandlerarg, uint64_t time, - uint32_t process, uint32_t functionToken, uint32_t communicator, - uint32_t rootprocess, uint32_t sent, uint32_t received, - uint64_t duration, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleBeginCollectiveOperation( void* fcb, uint64_t time, uint32_t process, - uint32_t collOp, uint64_t matchingId, uint32_t procGroup, - uint32_t rootprocess, uint64_t sent, uint64_t received, - uint32_t scltoken, OTF_KeyValueList *list ); - -int handleEndCollectiveOperation( void* fcb, uint64_t time, uint32_t process, - uint64_t matchingId, OTF_KeyValueList *list ); - -int handleRecvMsg( void* firsthandlerarg, uint64_t time, - uint32_t receiver, uint32_t sender, uint32_t communicator, - uint32_t msgtype, uint32_t msglength, - uint32_t scltoken, OTF_KeyValueList *list ); - -int handleSendMsg( void* firsthandlerarg, uint64_t time, - uint32_t sender, uint32_t receiver, uint32_t communicator, - uint32_t msgtype, uint32_t msglength, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleLeave( void* firsthandlerarg, uint64_t time, - uint32_t statetoken, uint32_t cpuid, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleBeginProcess( void* firsthandlerarg, uint64_t time, - uint32_t process, OTF_KeyValueList *list ); - -int handleEndProcess( void* firsthandlerarg, uint64_t time, - uint32_t process, OTF_KeyValueList *list ); - -int handleFileOperation( void* firsthandlerarg, uint64_t time, uint32_t fileid, - uint32_t process, uint64_t handleid, uint32_t operation, uint64_t bytes, - uint64_t duration, uint32_t source, OTF_KeyValueList *list ); - -int handleBeginFileOperation( void* fcb, uint64_t time, uint32_t process, - uint64_t matchingId, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleEndFileOperation( void* fcb, uint64_t time, uint32_t process, - uint32_t fileid, uint64_t matchingId, uint64_t handleId, uint32_t operation, - uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleRMAPut( void* firsthandlerarg, uint64_t time, uint32_t process, - uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag, - uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *list ); - -int handleRMAPutRemoteEnd( void* firsthandlerarg, uint64_t time, - uint32_t process, uint32_t origin, uint32_t target, - uint32_t communicator, uint32_t tag, uint64_t bytes, - uint32_t scltoken, OTF_KeyValueList *list ); - -int handleRMAGet( void* firsthandlerarg, uint64_t time, uint32_t process, - uint32_t origin, uint32_t target, uint32_t communicator, uint32_t tag, - uint64_t bytes, uint32_t scltoken, OTF_KeyValueList *list); - -int handleRMAEnd( void* firsthandlerarg, uint64_t time, uint32_t process, - uint32_t remote, uint32_t communicator, uint32_t tag, uint32_t scltoken, OTF_KeyValueList *list ); +typedef struct { + int my_rank; + int num_ranks; + int ranks_alive; + double tmp_progress; +#ifdef OTFMERGE_MPI + MPI_Datatype buftype; +#endif /* OTFMERGE_MPI */ +} GlobalData; -/* *** Handlers for OTF snapshot records ****************************** */ +/* function declarations */ + +double update_progress( ProgressInfo* info, GlobalData* data, int cur_ostream, + int num_ostreams ); + +int finish_everything( char *infile, char* outfile, ProgressInfo* info, + RankData* rank_data, int ret ); + +void setDefinitionHandlerArray( OTF_HandlerArray* handlers, + OTF_WStream* wstream); + +void setEventHandlerArray( OTF_HandlerArray* handlers, OTF_WStream* wstream ); -int handleSnapshotComment( void *firsthandlerarg, uint64_t time, - uint32_t process, const char* comment, OTF_KeyValueList *list ); - -int handleEnterSnapshot( void *firsthandlerarg, uint64_t time, - uint64_t originaltime, uint32_t function, uint32_t process, - uint32_t source, OTF_KeyValueList *list ); +/* handlers */ -int handleSendSnapshot( void *firsthandlerarg, uint64_t time, uint64_t originaltime, - uint32_t sender, uint32_t receiver, uint32_t procGroup, uint32_t tag, - uint32_t length, uint32_t source, OTF_KeyValueList *list ); - -int handleOpenFileSnapshot( void* firsthandlerarg, uint64_t time, - uint64_t originaltime, uint32_t fileid, uint32_t process, - uint64_t handleid, uint32_t source, OTF_KeyValueList *list ); +int handleDefinitionComment( void *userData, uint32_t stream, + const char *comment, OTF_KeyValueList *list ); -int handleBeginCollopSnapshot( void *fcb, uint64_t time, - uint64_t originaltime, uint32_t process, uint32_t collOp, - uint64_t matchingId, uint32_t procGroup, uint32_t rootProc, - uint64_t sent, uint64_t received, uint32_t scltoken, OTF_KeyValueList *list); - -int handleBeginFileOpSnapshot( void *userData, uint64_t time, - uint64_t originaltime, uint32_t process, uint64_t matchingId, - uint32_t scltoken, OTF_KeyValueList *list); - -/* *** Summary handler *** ****************************************** */ +int handleDefTimerResolution( void *userData, uint32_t stream, + uint64_t ticksPerSecond, OTF_KeyValueList *list ); -int handleSummaryComment( void* firsthandlerarg, uint64_t time, - uint32_t process, const char* comment, OTF_KeyValueList *list ); - -int handleFunctionSummary( void* firsthandlerarg, - uint64_t time, uint32_t function, uint32_t process, - uint64_t count, uint64_t excltime, uint64_t incltime, OTF_KeyValueList *list ); +int handleDefProcess( void *userData, uint32_t stream, uint32_t process, + const char *name, uint32_t parent, OTF_KeyValueList *list ); -int handleFunctionGroupSummary( void* firsthandlerarg, - uint64_t time, uint32_t functiongroup, uint32_t process, - uint64_t count, uint64_t excltime, uint64_t incltime, OTF_KeyValueList *list ); +int handleDefProcessGroup( void *userData, uint32_t stream, uint32_t procGroup, + const char *name, uint32_t numberOfProcs, const uint32_t *procs, + OTF_KeyValueList *list ); -int handleMessageSummary( void* firsthandlerarg, - uint64_t time, uint32_t process, uint32_t peer, - uint32_t comm, uint32_t tag, uint64_t number_sent, uint64_t number_recvd, - uint64_t bytes_sent, uint64_t bytes_recved, OTF_KeyValueList *list ); +int handleDefAttributeList( void *userData, uint32_t stream, + uint32_t attr_token, uint32_t num, OTF_ATTR_TYPE *array, + OTF_KeyValueList *list ); -int handleCollopSummary( void* firsthandlerarg, - uint64_t time, uint32_t process, uint32_t comm, uint32_t collective, - uint64_t number_sent, uint64_t number_recvd, uint64_t bytes_sent, - uint64_t bytes_recved, OTF_KeyValueList *list ); +int handleDefProcessOrGroupAttributes( void *userData, uint32_t stream, + uint32_t proc_token, uint32_t attr_token, OTF_KeyValueList *list); +int handleDefFunction( void *userData, uint32_t stream, uint32_t func, + const char *name, uint32_t funcGroup, uint32_t source, + OTF_KeyValueList *list ); -int handleFileOperationSummary( void* firsthandlerarg, uint64_t time, uint32_t fileid, - uint32_t process, uint64_t nopen, uint64_t nclose, uint64_t nread, - uint64_t nwrite, uint64_t nseek, uint64_t bytesread, uint64_t byteswrite, OTF_KeyValueList *list ); +int handleDefFunctionGroup( void *userData, uint32_t stream, uint32_t funcGroup, + const char *name, OTF_KeyValueList *list ); -int handleFileGroupOperationSummary( void* firsthandlerarg, uint64_t time, - uint32_t groupid, uint32_t process, uint64_t nopen, uint64_t nclose, - uint64_t nread, uint64_t nwrite, uint64_t nseek, uint64_t bytesread, - uint64_t byteswrite, OTF_KeyValueList *list ); +int handleDefCollectiveOperation(void *userData, uint32_t stream, + uint32_t collOp, const char *name, uint32_t type, + OTF_KeyValueList *list ); +int handleDefCounter( void *userData, uint32_t stream, uint32_t counter, + const char *name, uint32_t properties, uint32_t counterGroup, + const char *unit, OTF_KeyValueList *list ); -/* *** Marker handler *** ******************************************* */ +int handleDefCounterGroup( void *userData, uint32_t stream, + uint32_t counterGroup, const char *name, OTF_KeyValueList *list ); +int handleDefScl( void *userData, uint32_t stream, uint32_t source, + uint32_t sourceFile, uint32_t line, OTF_KeyValueList *list ); -int handleDefMarker( void *userData, uint32_t stream, - uint32_t token, const char* name, uint32_t type, OTF_KeyValueList *list ); +int handleDefSclFile( void *userData, uint32_t stream, uint32_t sourceFile, + const char *name, OTF_KeyValueList *list ); -int handleMarker( void *userData, uint64_t time, - uint32_t process, uint32_t token, const char* text, OTF_KeyValueList *list ); - +int handleDefCreator( void *userData, uint32_t stream, const char *creator, + OTF_KeyValueList *list ); -/* *** Misc handlers *** ******************************************** */ +int handleDefVersion( void *userData, uint32_t stream, uint8_t major, + uint8_t minor, uint8_t sub, const char *string ); -int handleUnknown( void* fcb, uint64_t time, uint32_t process, const char* record ); +int handleDefFile( void *userData, uint32_t stream, uint32_t token, + const char *name, uint32_t group, OTF_KeyValueList *list ); +int handleDefFileGroup( void *userData, uint32_t stream, uint32_t token, + const char *name, OTF_KeyValueList *list ); -#endif /* OTF_handleH */ +int handleDefKeyValue( void *userData, uint32_t stream, uint32_t token, + OTF_Type type, const char *name, const char *desc, + OTF_KeyValueList *list ); + +int handleDefTimeRange( void* userData, uint32_t stream, uint64_t minTime, + uint64_t maxTime, OTF_KeyValueList *list ); + +int handleDefCounterAssignments( void* userData, uint32_t stream, + uint32_t counter_token, uint32_t number_of_members, + const uint32_t* procs_or_groups, OTF_KeyValueList *list ); + +int handleDefProcessSubstitutes( void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, const uint32_t* procs, + OTF_KeyValueList *list ); + +int handleDefMarker( void *userData, uint32_t stream, uint32_t token, + const char *name, uint32_t type, OTF_KeyValueList *list ); + +int handleUnknownRecord( void *userData, uint64_t time, uint32_t process, + const char *record ); + +#endif /* HANDLER_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/hash.c b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/hash.c deleted file mode 100644 index 32c5233e05..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/hash.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Holger Brunst, Ronny Brendel, Thomas Kriebitzsch -*/ - -#include "hash.h" - -hashtabT* hash_new( ) { - - - int i; - hashtabT* ret; - - ret = ( hashtabT* ) malloc( sizeof( hashtabT ) * HASH_SIZE ); - - for( i = 0; i < HASH_SIZE; i++) { - - ret[i].entry.streamid = 0; - - ret[i].entry.ticksPerSecond = 0; - - ret[i].entry.functions = NULL; - ret[i].entry.nfunctions = 0; - ret[i].entry.sfunctions = 0; - - ret[i].entry.functiongroups = NULL; - ret[i].entry.nfunctiongroups = 0; - ret[i].entry.sfunctiongroups = 0; - - ret[i].entryvecsize = -1; - ret[i].p_entryvec = 0; - } - - return ret; -} - - -void hash_delete( hashtabT *hash ) { - - - int i; - int a; - int b; - - for( i = 0; i < HASH_SIZE; i++) { - - /* del functions (free namestrings) */ - if( hash[i].entry.functions != NULL ) { - - for( a= 0; a < hash[i].entry.nfunctions; ++a ) { - if( NULL != hash[i].entry.functions[a].name ) { - free( hash[i].entry.functions[a].name ); - } - } - free( hash[i].entry.functions ); - } - - /* del functiongroups (free namestrings) */ - if( hash[i].entry.functiongroups != NULL ) { - - for( a= 0; a < hash[i].entry.nfunctiongroups; ++a ) { - if( NULL != hash[i].entry.functiongroups[a].name ) { - free( hash[i].entry.functiongroups[a].name ); - } - } - free( hash[i].entry.functiongroups ); - } - - - if ( hash[i].entryvecsize > 0 ) { - - for( a= 0; a < hash[i].entryvecsize ; ++a ) { - - /* del functions (free namestrings) */ - if( hash[i].p_entryvec[a].functions != NULL ) { - - for( b= 0; b < hash[i].p_entryvec[a].nfunctions; ++b ) { - if( NULL != hash[i].p_entryvec[a].functions[b].name ) { - free( hash[i].p_entryvec[a].functions[b].name ); - } - } - free( hash[i].p_entryvec[a].functions ); - } - - /* del functiongroups (free namestrings) */ - if( hash[i].p_entryvec[a].functiongroups != NULL ) { - - for( b= 0; b < hash[i].p_entryvec[a].nfunctiongroups; ++b ) { - if( NULL != hash[i].p_entryvec[a].functiongroups[b].name ) { - free( hash[i].p_entryvec[a].functiongroups[b].name ); - } - } - free( hash[i].p_entryvec[a].functiongroups ); - } - } - - free( hash[i].p_entryvec ); - } - } - - free( hash ); -} - - -void hash_add( hashtabT *hash, uint32_t entry ) { - - uint32_t hashkey = entry; - - HASH_GET_KEY( hashkey ); - - if ( hash[hashkey].entryvecsize == -1 ) { - - hash[hashkey].entry.streamid = entry; - - hash[hashkey].entry.ticksPerSecond= 0; - hash[hashkey].entry.functions= NULL; - hash[hashkey].entry.nfunctions= 0; - hash[hashkey].entry.sfunctions= 0; - hash[hashkey].entry.functiongroups= NULL; - hash[hashkey].entry.nfunctiongroups= 0; - hash[hashkey].entry.sfunctiongroups= 0; - - hash[hashkey].entryvecsize = 0; - - } else { /* realloc the entryvector and insert the new entry */ - - hash[hashkey].p_entryvec = (streaminfoT *) realloc( hash[hashkey].p_entryvec, - sizeof( streaminfoT ) * ( hash[hashkey].entryvecsize + 1) ); - - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].streamid = entry;; - - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].ticksPerSecond= 0; - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].functions= NULL; - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].nfunctions= 0; - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].sfunctions= 0; - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].functiongroups= NULL; - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].nfunctiongroups= 0; - hash[hashkey].p_entryvec[hash[hashkey].entryvecsize].sfunctiongroups= 0; - - hash[hashkey].entryvecsize++; - - } -} - -streaminfoT* hash_search( hashtabT *hash, uint32_t entry) { - - int i; - uint32_t hashkey = entry; - - HASH_GET_KEY( hashkey ); - - if ( hash[hashkey].entry.streamid == entry ) { - return &hash[hashkey].entry; - } else { - for( i = 0; i < hash[hashkey].entryvecsize; i++ ) { - - if ( hash[hashkey].p_entryvec[i].streamid == entry ) - return &(hash[hashkey].p_entryvec[i]); - } - } - - return NULL; -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/hash.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/hash.h deleted file mode 100644 index 66240a8dd7..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/hash.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Holger Brunst, Ronny Brendel, Thomas Kriebitzsch -*/ - -#ifndef HASH_H -#define HASH_H - -#include "handler.h" - -/* *** macros ****************************************/ - -/* 0x100 = 4096 */ -#define HASH_SIZE 0x1000 - -#define HASH_GET_KEY(key) \ -{ \ - key += ~(key << 15); \ - key ^= (key >> 10); \ - key += (key << 3); \ - key ^= (key >> 6); \ - key += ~(key << 11); \ - key ^= (key >> 16); \ - key &= HASH_SIZE - 1; \ -} - -/* initialize the hash */ -hashtabT* hash_new( void ); - -/* free all mem of the hash */ -void hash_delete( hashtabT *hash ); - -/* add an entry to the hash */ -void hash_add( hashtabT *hash, uint32_t entry ); - -/* search an entry in the hash */ -streaminfoT* hash_search( hashtabT *hash, uint32_t entry); - - - -#endif /* HASH_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/mpi/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/mpi/Makefile.am new file mode 100644 index 0000000000..3fc3641e47 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/mpi/Makefile.am @@ -0,0 +1,16 @@ +if AMBUILDBINARIES +bin_PROGRAMS = otfmerge-mpi +endif + +OTFMERGESRCDIR = $(srcdir)/.. +include $(srcdir)/../Makefile.common + +CC = $(MPICC) + +INCLUDES = $(COMMONINCLUDES) $(MPI_INCLUDE_LINE) + +otfmerge_mpi_CFLAGS = -DOTFMERGE_MPI $(COMMONCFLAGS) $(MPICFLAGS) +otfmerge_mpi_LDADD = $(COMMONLDADD) $(MPI_LIB_LINE) +otfmerge_mpi_DEPENDENCIES = $(COMMONDEPENDENCIES) +otfmerge_mpi_SOURCES = $(COMMONSOURCES) + diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/otfmerge.c b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/otfmerge.c index 9df348be28..ab26e11428 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/otfmerge.c +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/otfmerge.c @@ -1,821 +1,1135 @@ /* This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Holger Brunst, Ronny Brendel, Thomas Kriebitzsch + Authors: Johannes Spazier */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - - -#include -#include -#include -#include -#include - -#include "otf.h" - #include "handler.h" -#include "hash.h" - - -#define OTFMERGE_STRING "otfmerge" - -#define FINISH_EVERYTHING hash_delete( fcb.hash); fcb.hash= NULL; \ - OTF_Reader_close( reader ); \ - OTF_Writer_close( fcb.writer ); \ - OTF_FileManager_close( manager ); \ - OTF_HandlerArray_close( handlers ); \ - -#define SHOW_HELPTEXT { \ - int l = 0; while( Helptext[l] ) { printf( "%s", Helptext[l++] ); } } - -static const char* Helptext[] = { -" \n", -" otfmerge - converter program of OTF library. \n", -" \n", -" otfmerge [Options] \n", -" \n", -" options: \n", -" -h, --help show this help message \n", -" -V show OTF version \n", -" -n set number of streams for output \n", -" set this to 0 for using one stream per process\n", -" standard is 1 \n", -" -f set max number of filehandles available \n", -" -o namestub of the output file (default 'out') \n", -" -rb set buffersize of the reader \n", -" -wb set buffersize of the writer \n", -" -stats cover statistics too \n", -" -snaps cover snaphots too \n", -" -z write compressed output \n", -" zlevel reaches from 0 to 9 where 0 is no \n", -" compression and 9 is the highest level \n", -" -l write long OTF format \n", -" -p show progress \n", -" \n", -" \n", NULL }; -void initProgressDisplay( void ); -void finishProgressDisplay( void ); -void updateProgressDisplay( uint32_t i, uint64_t max, uint64_t cur ); -int main ( int argc, char** argv ) { +#define fprintf_root \ + if( my_rank == 0 ) fprintf + +#define FINISH_EVERYTHING(ret) \ + finish_everything( infile, outfile, info, &rank_data, ret ) + +#define SHOW_HELPTEXT if( my_rank == 0 ) { \ + int l = 0; \ + while( helptext[l] ) { fprintf( stdout, "%s", helptext[l++] ); } \ +} + +/* name of program executable */ +#ifdef OTFMERGE_MPI +# define EXENAME "otfmerge-mpi" +#else /* OTFMERGE_MPI */ +# define EXENAME "otfmerge" +#endif /* OTFMERGE_MPI */ + + +static const char* helptext[] = { + "\n", + " "EXENAME" - change the number of streams for an existing trace. \n", + " \n", + " Syntax: "EXENAME" [options] \n", + " \n", + " options: \n", + " -h, --help show this help message \n", + " -V show OTF version \n", + " -p show progress \n", + " -n set number of streams for output \n", + " set this to 0 for using one stream per process\n", + " (default: 1) \n", + " -f max. number of filehandles available per rank \n", + " -o namestub of the output file \n", + " (default: out) \n", + " -rb set buffersize of the reader (for each rank) \n", + " -wb set buffersize of the writer (for each rank) \n", + " -z write compressed output \n", + " zlevel reaches from 0 to 9 where 0 is no \n", + " compression and 9 is the highest level \n", + " --stats cover statistics too \n", + " --snaps cover snapshots too \n", + " --long write long OTF format \n", + "\n", + NULL +}; + + +int main(int argc, char **argv) { + + /* for all processes */ + int i, j; + int my_rank = 0; + int num_ranks = 1; + uint64_t ret_read; + int show_progress = 0; + int max_fhandles = 100; + char *outfile = NULL; + char *infile = NULL; + int rbufsize = 1024 * 1024; + int wbufsize = 1024 * 1024; + int format = OTF_WSTREAM_FORMAT_SHORT; + int read_stats = 0; + int read_snaps = 0; + OTF_FileCompression compression= 0; + RankData rank_data = { 0 ,NULL }; + ProgressInfo *info = NULL; + GlobalData global_data; + + /* only for root process (0) */ + int num_cpus; /* number of cpus in input otf-file */ + int *cpus; /* global array that contains all cpu-ids */ + int offset; + int *p; + int num_ostreams = 1; + char *outfile_otf = NULL; + FILE *master_file = NULL; + OutStream *ostreams = NULL; + /* progress related */ + uint64_t total_bytes = 0; + uint64_t cur_bytes = 0; + uint64_t cur_bytes_ges = 0; + uint64_t min, max, cur; + struct timeval tv; - OTF_Reader* reader = NULL; - OTF_HandlerArray* handlers; - OTF_FileManager* manager; - OTF_MasterControl* mc; - OTF_MapEntry* mapentry; - - int read_stats= 0; - int read_snaps= 0; - int nstreams = 1; - int nfiles = 200; - int longformat = 0; - int showprogress= 0; - int i; + /* OTF related */ + OTF_Reader* reader = NULL; + OTF_WStream* wstream = NULL; + OTF_HandlerArray* handlers = NULL; + OTF_MasterControl* master = NULL; + OTF_FileManager* manager = NULL; + OTF_MapEntry* entry = NULL; - int readerbuffersize = 1024 * 1024; - int writerbuffersize = 1024 * 1024; - OTF_FileCompression compression= 0; - - fcbT fcb; - - char* infile= NULL; - char* outfile= NULL; +#ifdef OTFMERGE_MPI + /* MPI related */ + MPI_Status status; - uint64_t minbytes; - uint64_t maxbytes; - uint64_t curbytes; - uint64_t minbytestmp; - uint64_t maxbytestmp; - uint64_t curbytestmp; - uint64_t recordsperupdate; - uint64_t totalbytes; - uint32_t progress_counter= 0; + int array_of_blocklengths[2]; + MPI_Aint array_of_displacements[2]; + MPI_Datatype array_of_types[2]; - uint64_t retde; - uint64_t retma; - uint64_t retev; - uint64_t retst; - uint64_t retsn; + MPI_Aint first_var_address; + MPI_Aint second_var_address; - fcb.error= 0; - if ( 1 >= argc ) { + /* start MPI */ + MPI_Init( &argc, &argv ); - SHOW_HELPTEXT; - return 0; - } + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + MPI_Comm_size( MPI_COMM_WORLD, &num_ranks ); +#endif /* OTFMERGE_MPI */ - for ( i = 1; i < argc; i++ ) { + /* store some important things in a global structure */ + global_data.my_rank = my_rank; + global_data.num_ranks = num_ranks; + global_data.ranks_alive = num_ranks - 1; + /* argument handling */ + if( 1 >= argc ) { - if ( ( 0 == strcmp( "-i", argv[i] ) ) && ( i+1 < argc ) ) { + SHOW_HELPTEXT - infile= argv[i+1]; - ++i; +#ifdef OTFMERGE_MPI + MPI_Finalize(); +#endif /* OTFMERGE_MPI */ - } else if ( ( 0 == strcmp( "-n", argv[i] ) ) && ( i+1 < argc ) ) { + return 0; + } - nstreams = atoi( argv[i+1] ); - ++i; - - } else if ( ( 0 == strcmp( "-rb", argv[i] ) ) && ( i+1 < argc ) ) { - - readerbuffersize = atoi( argv[i+1] ); - ++i; + for ( i = 1; i < argc; i++ ) { - } else if ( ( 0 == strcmp( "-wb", argv[i] ) ) && ( i+1 < argc ) ) { + if( ( 0 == strcmp( "-o", argv[i] ) ) && ( i+1 < argc ) ) { - writerbuffersize = atoi( argv[i+1] ); - ++i; - - } else if ( ( 0 == strcmp( "-f", argv[i] ) ) && ( i+1 < argc ) ) { + /* must be free'd at the end */ + outfile = OTF_stripFilename( argv[i+1] ); + ++i; - nfiles = atoi( argv[i+1] ); - ++i; + } else if( ( 0 == strcmp( "-n", argv[i] ) ) && ( i+1 < argc ) ) { - } else if ( ( 0 == strcmp( "-o", argv[i] ) ) && ( i+1 < argc ) ) { + num_ostreams = atoi( argv[i+1] ); + ++i; - outfile= argv[i+1]; - ++i; + } else if( 0 == strcmp( "-h", argv[i] ) || + 0 == strcmp( "--help", argv[i] ) ) { - } else if ( 0 == strcmp( "-stats", argv[i] ) ) { + SHOW_HELPTEXT - read_stats= 1; + return FINISH_EVERYTHING(0); - } else if ( 0 == strcmp( "-snaps", argv[i] ) ) { + } else if( 0 == strcmp( "-V", argv[i] ) ) { - read_snaps= 1; + fprintf_root( stdout, "%u.%u.%u \"%s\"\n", + OTF_VERSION_MAJOR, OTF_VERSION_MINOR, + OTF_VERSION_SUB, OTF_VERSION_STRING ); - } else if ( ( 0 == strcmp( "-z", argv[i] ) ) && ( i+1 < argc ) ) { + return FINISH_EVERYTHING(0); - compression= atoi( argv[i+1] ); + } else if( 0 == strcmp( "-p", argv[i] ) ) { - ++i; + show_progress = 1; - } else if ( 0 == strcmp( "-l", argv[i] ) ) { + } else if( ( 0 == strcmp( "-f", argv[i] ) ) && ( i+1 < argc ) ) { - longformat = 1; + max_fhandles = atoi( argv[i+1] ); + ++i; - } else if ( 0 == strcmp( "-p", argv[i] ) ) { + } else if( ( 0 == strcmp( "-rb", argv[i] ) ) && ( i+1 < argc ) ) { - showprogress= 1; + rbufsize = atoi( argv[i+1] ); + ++i; - } else if ( 0 == strcmp( "--help", argv[i] ) || - 0 == strcmp( "-h", argv[i] ) ) { + } else if( ( 0 == strcmp( "-wb", argv[i] ) ) && ( i+1 < argc ) ) { - SHOW_HELPTEXT; - return 0; + wbufsize = atoi( argv[i+1] ); + ++i; - } else if ( 0 == strcmp( "-V", argv[i] ) ) { - - printf( "%u.%u.%u \"%s\"\n", OTF_VERSION_MAJOR, OTF_VERSION_MINOR, - OTF_VERSION_SUB, OTF_VERSION_STRING); - exit( 0 ); + } else if( ( 0 == strcmp( "-z", argv[i] ) ) && ( i+1 < argc ) ) { - } else { + compression = atoi( argv[i+1] ); + ++i; - if ( '-' != argv[i][0] ) { + } else if( 0 == strcmp( "--long", argv[i] ) ) { - infile= argv[i]; + format = OTF_WSTREAM_FORMAT_LONG; - } else{ + } else if( 0 == strcmp( "--snaps", argv[i] ) ) { - fprintf( stderr, "ERROR: Unknown option: '%s'\n", argv[i] ); - exit(1); - } - } - } - - if ( nfiles < 1 ) { - - fprintf( stderr, "ERROR: less than 1 filehandle is not permitted\n" ); - exit(1); - } - if ( nstreams < 0 ) { - - fprintf( stderr, "ERROR: less than 0 stream is not permitted\n" ); - exit(1); - } - if ( NULL == infile ) { - - fprintf( stderr, "ERROR: no input file specified\n" ); - exit(1); - } - if ( NULL == outfile ) { + read_snaps = 1; - /* - fprintf( stderr, "ERROR: no output file has been specified\n" ); - exit(1); - */ + } else if( 0 == strcmp( "--stats", argv[i] ) ) { - outfile= "out.otf"; - } + read_stats = 1; - handlers = OTF_HandlerArray_open(); + } else { - manager= OTF_FileManager_open( nfiles ); - if( NULL == manager) { - fprintf( stderr, "Error: Unable to initialize File Manager. aborting\n" ); - exit(1); - } - - reader = OTF_Reader_open( infile, manager ); - - if ( NULL == reader ) { - - fprintf( stderr, "Error: Unable to open '%s'. aborting\n", infile ); - OTF_FileManager_close( manager ); - OTF_HandlerArray_close( handlers ); - exit(1); - } - - OTF_Reader_setBufferSizes( reader, readerbuffersize ); - - fcb.writer = OTF_Writer_open( outfile, nstreams, manager ); - OTF_Writer_setBufferSizes( fcb.writer, writerbuffersize ); - OTF_Writer_setCompression( fcb.writer, compression ); - if( longformat ) - OTF_Writer_setFormat( fcb.writer, OTF_WSTREAM_FORMAT_LONG ); - else - OTF_Writer_setFormat( fcb.writer, OTF_WSTREAM_FORMAT_SHORT ); - - mc = OTF_Reader_getMasterControl( reader ); - - /* set your own handler functions */ - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefinitionComment, - OTF_DEFINITIONCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFINITIONCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefTimerResolution, - OTF_DEFTIMERRESOLUTION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFTIMERRESOLUTION_RECORD); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefProcess, - OTF_DEFPROCESS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFPROCESS_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefProcessGroup, - OTF_DEFPROCESSGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFPROCESSGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefAttributeList, - OTF_DEFATTRLIST_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFATTRLIST_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefProcessOrGroupAttributes, - OTF_DEFPROCESSORGROUPATTR_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFPROCESSORGROUPATTR_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFunction, - OTF_DEFFUNCTION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFFUNCTION_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFunctionGroup, - OTF_DEFFUNCTIONGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFFUNCTIONGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCollectiveOperation, - OTF_DEFCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFCOLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCounter, - OTF_DEFCOUNTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFCOUNTER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCounterGroup, - OTF_DEFCOUNTERGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFCOUNTERGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefScl, - OTF_DEFSCL_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFSCL_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefSclFile, - OTF_DEFSCLFILE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFSCLFILE_RECORD ); - -/* OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefversion, - OTF_DEFVERSION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFVERSION_RECORD ); -*/ - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCreator, - OTF_DEFCREATOR_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFCREATOR_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFile, - OTF_DEFFILE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFFILE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefFileGroup, - OTF_DEFFILEGROUP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFFILEGROUP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefKeyValue, - OTF_DEFKEYVALUE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_DEFKEYVALUE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefTimeRange, - OTF_DEFTIMERANGE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, - OTF_DEFTIMERANGE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleDefCounterAssignments, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, - OTF_DEFCOUNTERASSIGNMENTS_RECORD ); - - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleNoOp, - OTF_NOOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_NOOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEventComment, - OTF_EVENTCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_EVENTCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleCounter, - OTF_COUNTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_COUNTER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEnter, - OTF_ENTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_ENTER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleCollectiveOperation, - OTF_COLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_COLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginCollectiveOperation, - OTF_BEGINCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_BEGINCOLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEndCollectiveOperation, - OTF_ENDCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_ENDCOLLOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRecvMsg, - OTF_RECEIVE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_RECEIVE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSendMsg, - OTF_SEND_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_SEND_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleLeave, - OTF_LEAVE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_LEAVE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginProcess, - OTF_BEGINPROCESS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_BEGINPROCESS_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEndProcess, - OTF_ENDPROCESS_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_ENDPROCESS_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFileOperation, - OTF_FILEOPERATION_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_FILEOPERATION_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginFileOperation, - OTF_BEGINFILEOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_BEGINFILEOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEndFileOperation, - OTF_ENDFILEOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_ENDFILEOP_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAPut, - OTF_RMAPUT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_RMAPUT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAPutRemoteEnd, - OTF_RMAPUTRE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_RMAPUTRE_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAGet, - OTF_RMAGET_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_RMAGET_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleRMAEnd, - OTF_RMAEND_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_RMAEND_RECORD ); - - - /* snapshot records */ - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSnapshotComment, - OTF_SNAPSHOTCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_SNAPSHOTCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleEnterSnapshot, - OTF_ENTERSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_ENTERSNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSendSnapshot, - OTF_SENDSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_SENDSNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleOpenFileSnapshot, - OTF_OPENFILESNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_OPENFILESNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginCollopSnapshot, - OTF_BEGINCOLLOPSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_BEGINCOLLOPSNAPSHOT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleBeginFileOpSnapshot, - OTF_BEGINFILEOPSNAPSHOT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, - OTF_BEGINFILEOPSNAPSHOT_RECORD ); - - /* summary records */ - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleSummaryComment, - OTF_SUMMARYCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_SUMMARYCOMMENT_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFunctionSummary, - OTF_FUNCTIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_FUNCTIONSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFunctionGroupSummary, - OTF_FUNCTIONGROUPSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_FUNCTIONGROUPSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleMessageSummary, - OTF_MESSAGESUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_MESSAGESUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleCollopSummary, - OTF_COLLOPSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_COLLOPSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFileOperationSummary, - OTF_FILEOPERATIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_FILEOPERATIONSUMMARY_RECORD ); - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleFileGroupOperationSummary, - OTF_FILEGROUPOPERATIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_FILEGROUPOPERATIONSUMMARY_RECORD ); - - /* marker record types */ - - OTF_HandlerArray_setHandler( handlers, (OTF_FunctionPointer*) handleDefMarker, OTF_DEFMARKER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, OTF_DEFMARKER_RECORD ); - - OTF_HandlerArray_setHandler( handlers, (OTF_FunctionPointer*) handleMarker, OTF_MARKER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, &fcb, OTF_MARKER_RECORD ); - - /* misc records */ - - OTF_HandlerArray_setHandler( handlers, - (OTF_FunctionPointer*) handleUnknown, - OTF_UNKNOWN_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handlers, - &fcb, OTF_UNKNOWN_RECORD ); - - /* ask the mastercontrol for the number of streams and create the - * streaminfos array - */ - fcb.nstreaminfos = 0; - - while ( 0 != OTF_MasterControl_getEntryByIndex( mc, fcb.nstreaminfos )) { - - fcb.nstreaminfos++; - } - - fcb.hash = hash_new (); - - /* global stream yourself, because he isnt in the mapping */ - hash_add( fcb.hash, 0 ); - - /* add all streams to the hash */ - for( i = 0; i < fcb.nstreaminfos; i++ ) { - - mapentry = OTF_MasterControl_getEntryByIndex( mc, i ); - - hash_add( fcb.hash, mapentry->argument ); - } - - /* read definitions */ - retde= OTF_Reader_readDefinitions( reader, handlers ); - if( OTF_READ_ERROR == retde || 1 == fcb.error ) { - fprintf( stderr, "Error while reading definitions. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - /* read markers */ - retma= OTF_Reader_readMarkers( reader, handlers ); - if( OTF_READ_ERROR == retma || 1 == fcb.error ) { - fprintf( stderr, "Error while reading marker records. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - - if ( 0 == showprogress ) { - - /* do not show the progress */ - - /* read events */ - retev= OTF_Reader_readEvents( reader, handlers ); - if( OTF_READ_ERROR == retev ) { - fprintf( stderr, "Error while reading events. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - /* read stats */ - if ( 1 == read_stats ) { - - retst= OTF_Reader_readStatistics( reader, handlers ); - if( OTF_READ_ERROR == retst ) { - fprintf( stderr, "Error while reading statistics. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - } - - /* read snaps */ - if ( 1 == read_snaps ) { - - retsn= OTF_Reader_readSnapshots( reader, handlers ); - if( OTF_READ_ERROR == retsn ) { - fprintf( stderr, "Error while reading snapshots. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - } - - } else { - - /* show progress */ - - initProgressDisplay(); - - /* calculate how many records will be read */ - minbytes= 0; - curbytes= 0; - maxbytes= 0; - - OTF_Reader_setRecordLimit( reader, 0 ); - - retev= OTF_Reader_readEvents( reader, handlers ); - if( OTF_READ_ERROR == retev ) { - fprintf( stderr, "Error while reading events. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - if ( 1 == read_stats ) { - retst= OTF_Reader_readStatistics( reader, handlers ); - if( OTF_READ_ERROR == retst ) { - fprintf( stderr, "Error while reading statistics. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - } - if ( 1 == read_snaps ) { - retsn= OTF_Reader_readSnapshots( reader, handlers ); - if( OTF_READ_ERROR == retsn ) { - fprintf( stderr, "Error while reading snapshots. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - } - - OTF_Reader_eventBytesProgress( reader, &minbytestmp, &curbytestmp, &maxbytestmp ); - - minbytes+= minbytestmp; - maxbytes+= maxbytestmp; - - if ( 1 == read_stats ) { - - OTF_Reader_statisticBytesProgress( reader, &minbytestmp, &curbytestmp, &maxbytestmp ); - minbytes+= minbytestmp; - maxbytes+= maxbytestmp; - } - - /* read snaps */ - if ( 1 == read_snaps ) { - - OTF_Reader_snapshotBytesProgress( reader, &minbytestmp, &curbytestmp, &maxbytestmp ); - minbytes+= minbytestmp; - maxbytes+= maxbytestmp; - } - - curbytes= 0; - totalbytes= maxbytes - minbytes; - - - /* fixed number of records per update in order to provide - frequent update */ - recordsperupdate= 100000; - OTF_Reader_setRecordLimit( reader, recordsperupdate ); - - while ( 0 != ( retev= OTF_Reader_readEvents( reader, handlers ) ) ) { - - if( OTF_READ_ERROR == retev ) { - fprintf( stderr, "Error while reading events. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - OTF_Reader_eventBytesProgress( reader, - &minbytestmp, &curbytestmp, &maxbytestmp ); - - curbytes += curbytestmp - minbytestmp - curbytes; - - updateProgressDisplay( progress_counter++, totalbytes, curbytes ); - } - - /* read stats */ - while ( ( 1 == read_stats ) && - ( 0 != ( retst= OTF_Reader_readStatistics( reader, handlers ) ) ) ) { - - if( OTF_READ_ERROR == retst ) { - fprintf( stderr, "Error while reading statistics. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - OTF_Reader_statisticBytesProgress( reader, - &minbytestmp, &curbytestmp, &maxbytestmp ); - - curbytes += curbytestmp - minbytestmp - curbytes; - - updateProgressDisplay( progress_counter++, totalbytes, curbytes ); - } - - /* read snaps */ - while ( ( 1 == read_snaps ) && - ( 0 != ( retsn= OTF_Reader_readSnapshots( reader, handlers ) ) ) ) { - - if( OTF_READ_ERROR == retsn ) { - fprintf( stderr, "Error while reading snapshots. aborting\n" ); - FINISH_EVERYTHING; - exit(1); - } - - OTF_Reader_snapshotBytesProgress( reader, - &minbytestmp, &curbytestmp, &maxbytestmp ); - - curbytes += curbytestmp - minbytestmp - curbytes; - - updateProgressDisplay( progress_counter++, totalbytes, curbytes ); - } - - finishProgressDisplay(); - } - - FINISH_EVERYTHING; - - return 0; + if( '-' != argv[i][0] ) { + + /* must be free'd at the end */ + infile = OTF_stripFilename( argv[i] ); + + } else { + + fprintf_root( stderr, "Error: unknown option: '%s'\n", + argv[i] ); + + return FINISH_EVERYTHING(1); + + } + + } + + } + + if( ! outfile ) { + + outfile = strdup("out"); + + } else if( outfile[ strlen(outfile) -1 ] == '/' ) { + + strncat( outfile, "out", 3 ); + + } + + /* must be free'd at the end */ + outfile_otf = OTF_getFilename( outfile, 0, OTF_FILETYPE_MASTER, 0, NULL); + + /* check for neccessary options */ + if( infile == NULL ) { + + fprintf_root( stderr, "Error: no input file given.\n"); + + return FINISH_EVERYTHING(1); + + } + + if( max_fhandles < 1 ) { + + fprintf_root( stderr, + "Error: less than 1 filehandle is not permitted.\n" ); + + return FINISH_EVERYTHING(1); + + } + + if( num_ostreams < 0 ) { + + fprintf_root( stderr, + "Error: the number of streams must not be negative.\n" ); + + return FINISH_EVERYTHING(1); + + } + + if( wbufsize < 0 || rbufsize < 0 ) { + + fprintf_root( stderr, + "Error: buffersize must be greater or equal 0.\n" ); + + return FINISH_EVERYTHING(1); + + } + + if( my_rank == 0 ) { + + /* read master of input file */ + manager = OTF_FileManager_open( max_fhandles ); + + if( NULL == manager ) { + + fprintf( stderr, "Error: unable to initialize file manager.\n" ); + + return FINISH_EVERYTHING(1); + + } + + master = OTF_MasterControl_new( manager ); + OTF_MasterControl_read( master, infile ); + + /* get the total number of processes in the otf master file */ + num_cpus = OTF_MasterControl_getrCount( master ); + + /* set one stream per process */ + if( num_ostreams == 0 || num_ostreams > num_cpus ) { + + num_ostreams = num_cpus; + + } + + /* allocate memory */ + ostreams = (OutStream*) malloc( num_ostreams * sizeof(OutStream) ); + cpus = (int*) malloc( num_cpus * sizeof(int) ); + p = cpus; + + /* allocate memory for the info array */ + info = (ProgressInfo*) malloc( num_ranks * sizeof(ProgressInfo) ); + + /* fill the global cpus-array */ + i = 0; + while( 1 ) { + + entry = OTF_MasterControl_getEntryByIndex( master, i ); + + if( entry == NULL ) { + + break; + } + + for( j = 0; (uint32_t)j < entry->n; j++ ) { + + *p++ = entry->values[j]; + + } + + i++; + + } + + /* open new master file for output */ + master_file = fopen( outfile_otf, "w"); + + if( NULL == master_file ) { + + fprintf( stderr, "Error: unable to open file \"%s\".\n", + outfile_otf); + + free( cpus ); + free( ostreams ); + + OTF_MasterControl_close( master ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + /* fill all ostreams with data and write the new master file */ + offset = 0; + for( i = 0; i < num_ostreams; i++ ) { + + ostreams[i].id = i + 1; + ostreams[i].num_cpus = ( num_cpus / num_ostreams ) + + ( i < ( num_cpus % num_ostreams) ? 1 : 0 ); + + ostreams[i].cpus = + (int*) malloc( ostreams[i].num_cpus * sizeof(int) ); + + /* append stream-id to new otf master file */ + fprintf( master_file, "%x:", ostreams[i].id); + + for( j = 0; j < ostreams[i].num_cpus; j++ ) { + + ostreams[i].cpus[j] = cpus[offset + j]; + + /* append cpu-id to master file */ + fprintf( master_file, "%x", ostreams[i].cpus[j]); + + if( (j + 1) < ostreams[i].num_cpus ) { + + fprintf( master_file, ","); + + } + + } + + fprintf( master_file, "\n" ); + offset += ostreams[i].num_cpus; + + } + + /* close new master file */ + fclose( master_file ); + + /* free global cpus-array, because it is not needed anymore */ + if( cpus ) { + + free(cpus); + cpus = NULL; + + } + + offset = 0; + /* send needed data to all ranks */ + for( i = (num_ranks - 1); i >= 0; i-- ) { + + /* get number of output-streams, rank i has to handle */ + rank_data.num_ostreams = + ( num_ostreams / num_ranks) + + ( i < ( num_ostreams % num_ranks) ? 1 : 0 ); + +#ifdef OTFMERGE_MPI + if( i > 0) { + + /* send number of output-streams to rank i */ + MPI_Ssend( &(rank_data.num_ostreams), 1, MPI_INT, i, 0, + MPI_COMM_WORLD); + + } else +#endif /* OTFMERGE_MPI */ + { + + /* save number of output-streams for rank 0 in rank_data */ + rank_data.ostreams = + (OutStream*) malloc( rank_data.num_ostreams * + sizeof(OutStream) ); + + } + + info[i].num_cpus = 0; + /* go through all output-streams of rank i */ + for( j = 0; j < rank_data.num_ostreams; j++ ) { + +#ifdef OTFMERGE_MPI + if( i > 0 ) { + + /* send data to rank */ + MPI_Ssend( &(ostreams[offset + j].id), 1, MPI_INT, i, 0, + MPI_COMM_WORLD); + MPI_Ssend( &(ostreams[offset + j].num_cpus), 1, MPI_INT, i, 0, + MPI_COMM_WORLD); + MPI_Ssend( ostreams[offset + j].cpus, + ostreams[offset + j].num_cpus, MPI_INT, i, 0, + MPI_COMM_WORLD); + + } else +#endif /* OTFMERGE_MPI */ + { + + /* save data for rank 0 */ + rank_data.ostreams[j].id = ostreams[offset + j].id; + rank_data.ostreams[j].num_cpus = + ostreams[offset + j].num_cpus; + rank_data.ostreams[j].cpus = + (int*) malloc( rank_data.ostreams[j].num_cpus * + sizeof(int)); + memcpy(rank_data.ostreams[j].cpus, ostreams[offset + j].cpus, + rank_data.ostreams[j].num_cpus * (sizeof(int))); + + } + + info[i].num_cpus += ostreams[offset + j].num_cpus; + + } + + offset += rank_data.num_ostreams; + + } + + /* can be free'd here because all MPI_Ssends are finished --> they are synchron */ + for( i = 0; i < num_ostreams; i++ ) { + + if( ostreams[i].cpus ) { + + free( ostreams[i].cpus ); + ostreams[i].cpus = NULL; + + } + + } + + if( ostreams ) { + + free( ostreams ); + ostreams = NULL; + + } + + /* initialize the info array */ + for( i = 0; i < num_ranks; i++ ) { + + info[i].percent = (double) info[i].num_cpus / (double) num_cpus; +#ifdef OTFMERGE_MPI + info[i].request = MPI_REQUEST_NULL; +#endif /* OTFMERGE_MPI */ + info[i].value.progress = 0.0; + info[i].value.is_alive = 1; + + } + + /* close master */ + OTF_MasterControl_close( master ); + OTF_FileManager_close( manager ); + + } +#ifdef OTFMERGE_MPI + else { /* my_rank != 0 */ + + info = (ProgressInfo*) malloc( 1 * sizeof(ProgressInfo) ); + + info[0].request = MPI_REQUEST_NULL; + info[0].value.progress = 0.0; + info[0].value.is_alive = 1; + + /* receive number of output-streams for this rank */ + MPI_Recv( &(rank_data.num_ostreams), 1, MPI_INT, 0, 0, MPI_COMM_WORLD, + &status); + + /* allocate memory for output-streams */ + rank_data.ostreams = + (OutStream*) malloc( rank_data.num_ostreams * sizeof(OutStream) ); + + /* go through all output streams */ + for( i = 0; i < rank_data.num_ostreams; i++ ) { + + /* receive id of output-stream and number of cpus in this stream */ + MPI_Recv( &(rank_data.ostreams[i].id), 1, MPI_INT, 0, 0, + MPI_COMM_WORLD, &status); + MPI_Recv( &(rank_data.ostreams[i].num_cpus), 1, MPI_INT, 0, 0, + MPI_COMM_WORLD, &status); + + /* allocate memory for cpus in stream */ + rank_data.ostreams[i].cpus = + (int*) malloc( rank_data.ostreams[i].num_cpus * sizeof(int) ); + + /* receive all cpu-ids */ + MPI_Recv( rank_data.ostreams[i].cpus, + rank_data.ostreams[i].num_cpus, MPI_INT, 0, 0, + MPI_COMM_WORLD, &status ); + + } + + } + + if( show_progress ) { + + /* create new mpi datatype to transfer the progress */ + /* struct { + double progress; + uint8_t is_alive; + }; + */ + + array_of_blocklengths[0] = 1; + array_of_blocklengths[1] = 1; + + MPI_Address( &(info[0].value.progress), &first_var_address ); + MPI_Address( &(info[0].value.is_alive), &second_var_address ); + + array_of_displacements[0] = (MPI_Aint) 0; + array_of_displacements[1] = second_var_address - first_var_address; + + array_of_types[0] = MPI_DOUBLE; + array_of_types[1] = MPI_BYTE; + + MPI_Type_struct( 2, array_of_blocklengths, array_of_displacements, + array_of_types, &(global_data.buftype) ); + + MPI_Type_commit( &(global_data.buftype) ); + + } +#endif /* OTFMERGE_MPI */ + + manager = OTF_FileManager_open( max_fhandles ); + if( NULL == manager ) { + + fprintf( stderr, "Error: unable to initialize file manager.\n" ); + + return FINISH_EVERYTHING(1); + + } + + /* the root process should read the definitions now */ + if( my_rank == 0 ) { + + wstream = OTF_WStream_open( outfile, 0, manager ); + + OTF_WStream_setBufferSizes( wstream, wbufsize ); + OTF_WStream_setCompression( wstream, compression ); + OTF_WStream_setFormat( wstream, format ); + + handlers = OTF_HandlerArray_open(); + + setDefinitionHandlerArray( handlers, wstream ); + + reader = OTF_Reader_open( infile, manager); + + if( reader == NULL) { + + fprintf( stderr, "Error: unable to open file %s.\n", infile ); + + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + } + + OTF_Reader_setBufferSizes( reader, rbufsize ); + + if( OTF_READ_ERROR == + OTF_Reader_readDefinitions( reader, handlers ) ) { + + fprintf( stderr, "Error: while reading definitions from file %s\n", + infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + } + + if( OTF_READ_ERROR == OTF_Reader_readMarkers( reader, handlers ) ) { + + fprintf( stderr, "Error: while reading markers from file %s\n", infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + } + + /* close everything */ + OTF_HandlerArray_close( handlers ); + OTF_Reader_close( reader ); + OTF_WStream_close( wstream ); + + } + + for( i = 0; i < rank_data.num_ostreams; i++ ) { + + total_bytes = 0; + cur_bytes = 0; + cur_bytes_ges = 0; + + wstream = + OTF_WStream_open( outfile, rank_data.ostreams[i].id, manager ); + + OTF_WStream_setBufferSizes( wstream, wbufsize ); + OTF_WStream_setCompression( wstream, compression ); + OTF_WStream_setFormat( wstream, format ); + + handlers = OTF_HandlerArray_open(); + + setEventHandlerArray( handlers, wstream ); + + reader = OTF_Reader_open( infile, manager); + if( reader == NULL) { + + fprintf_root( stderr, "Error: unable to open file %s.\n", infile ); + + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + } + + OTF_Reader_setBufferSizes( reader, rbufsize ); + + OTF_Reader_setProcessStatusAll ( reader, 0 ); + + for( j = 0; j < rank_data.ostreams[i].num_cpus; j++ ) { + + OTF_Reader_setProcessStatus( reader, rank_data.ostreams[i].cpus[j], + 1 ); + + } + + if( show_progress ) { + + OTF_Reader_setRecordLimit( reader, 0 ); + + if( OTF_READ_ERROR == OTF_Reader_readEvents( reader, handlers ) ) { + + fprintf( stderr, "Error: while reading events from file %s\n", + infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + if( read_snaps ) { + + if( OTF_READ_ERROR == + OTF_Reader_readSnapshots( reader, handlers ) ) { + + fprintf( stderr, + "Error: while reading snaphots from file %s\n", + infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + } + + if( read_stats ) { + + if( OTF_READ_ERROR == + OTF_Reader_readStatistics( reader, handlers ) ) { + + fprintf( stderr, + "Error: while reading statistics from file %s\n", + infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + } + + OTF_Reader_eventBytesProgress( reader, &min, &cur, &max ); + /* (min - max) is erroneous because with small traces min == max + --> division by zero */ + total_bytes += max; /* max - min */ + + if( read_snaps ) { + + OTF_Reader_snapshotBytesProgress( reader, &min, &cur, &max ); + total_bytes += max; /* max - min */ + + } + + if( read_stats ) { + + OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max ); + total_bytes += max; /* max - min */ + + } + + OTF_Reader_setRecordLimit( reader, 100000 ); + + } + + while( 0 != ( ret_read = OTF_Reader_readEvents( reader, handlers ) ) ) { + + if( ret_read == OTF_READ_ERROR) { + + fprintf( stderr, "Error: while reading events from file %s\n", + infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + if( show_progress ) { + + OTF_Reader_eventBytesProgress( reader, &min, &cur, &max ); + + cur_bytes = cur; /* cur - min */ + + /* calculate rank specific progress for the current stream */ + global_data.tmp_progress = + (double) ( (double) cur_bytes / (double) total_bytes ); + + update_progress( info, &global_data, i, rank_data.num_ostreams ); + + } + + } + + cur_bytes_ges = cur_bytes; + + /* read snapshots */ + if( read_snaps ) { + + while( 0 != ( ret_read = + OTF_Reader_readSnapshots( reader, handlers ) ) ) { + + if( ret_read == OTF_READ_ERROR) { + + fprintf( stderr, + "Error: while reading snapshots from file %s\n", + infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + if( show_progress ) { + + OTF_Reader_snapshotBytesProgress( reader, &min, &cur, &max ); + + cur_bytes = cur; /* cur - min */ + + /* calculate rank specific progress for the + current stream */ + global_data.tmp_progress = + (double) ( (double) (cur_bytes + cur_bytes_ges) / + (double) total_bytes ); + + update_progress( info, &global_data, i, rank_data.num_ostreams ); + } + + } + + } + + cur_bytes_ges += cur_bytes; + + /* read statistics */ + if( read_stats ) { + + while( 0 != ( ret_read = + OTF_Reader_readStatistics( reader, handlers ) ) ) { + + if( ret_read == OTF_READ_ERROR) { + + fprintf( stderr, + "Error: while reading statistics from file %s\n", + infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + if( show_progress ) { + + OTF_Reader_statisticBytesProgress( reader, &min, &cur, + &max ); + + cur_bytes = cur; /* cur - min */ + + /* calculate rank specific progress for the + current stream */ + global_data.tmp_progress = + (double) ( (double) (cur_bytes + cur_bytes_ges) / + (double) total_bytes ); + + update_progress( info, &global_data, i, rank_data.num_ostreams ); + } + + } + + } + + /* read markers */ + while( 0 != ( ret_read = + OTF_Reader_readMarkers( reader, handlers ) ) ) { + + if( ret_read == OTF_READ_ERROR) { + + fprintf( stderr, "Error: while reading markers from file %s\n", infile ); + + OTF_Reader_close( reader ); + OTF_HandlerArray_close( handlers ); + OTF_WStream_close( wstream ); + OTF_FileManager_close( manager ); + + return FINISH_EVERYTHING(1); + + } + + } + + /* close everything */ + OTF_HandlerArray_close( handlers ); + OTF_Reader_close( reader ); + OTF_WStream_close( wstream ); + + } + + if( show_progress ) { + + /* wait for other processes to finish */ + if( my_rank == 0 ) { + + /* set own progress to 100 % */ + global_data.tmp_progress = 1.0; + + /* check every 0.2 sec for new progress until all ranks + have finished */ + while( 1 ) { + + /* update_progress() returns 0 if all ranks finished */ + if( ! update_progress( info, &global_data, 0, 1) ) { + + break; + + } + + /* sleep 0.2 s --> select is used because of portability */ + tv.tv_sec = 0; + tv.tv_usec = 200000; + select(0, NULL, NULL, NULL, &tv); + + } + + printf("%7.2f %% done\n", 100.0); + fflush( stdout ); + +#ifdef OTFMERGE_MPI + /* clear all open requests in info array */ + for( i = 1; i < num_ranks; i++ ) { + + if( info[i].request != MPI_REQUEST_NULL ) { + + MPI_Cancel( &(info[i].request) ); + + } + + } +#endif /* OTFMERGE_MPI */ + + } + +#ifdef OTFMERGE_MPI + if( my_rank != 0 ) { + + /* rank != 0 has finished and sends a last message to ranks 0 */ + + /* first wait until the previous msg was received by rank 0 */ + MPI_Wait( &(info[0].request), &status ); + /* fill buffer with valid values */ + info[0].value.progress = 100.0; + info[0].value.is_alive = 0; + /* send message and wait until the buffer is free for reuse */ + MPI_Isend( &(info[0].value.progress), 1, global_data.buftype, 0, 0, + MPI_COMM_WORLD, &(info[0].request)); + MPI_Wait( &(info[0].request), &status ); + + } +#endif /* OTFMERGE_MPI */ + + } + + OTF_FileManager_close( manager ); + + /* clear everything and exit */ + return FINISH_EVERYTHING(0); } -void initProgressDisplay() { +double update_progress( ProgressInfo* info, GlobalData *data, int cur_ostream, + int num_ostreams) { + static double progress = 0.0; + static int tmp = 0; + char signs[2] = {' ','.'}; - printf( " %7.2f %%\r", 0.0 ); - fflush( stdout ); +#ifdef OTFMERGE_MPI + MPI_Status status; + int flag = 0; + int j; + + if( data->my_rank != 0 ) { + + /* check if previous msg was received by rank 0 already + --> if not, do nothing in this function; + else calculate new progress and send the result to root later on */ + MPI_Test( &(info[0].request), &flag, &status ); + + } + + /* calculate progress if necessary */ + if( data->my_rank == 0 || flag ) +#endif /* OTFMERGE_MPI */ + { + + info[0].value.progress = + data->tmp_progress / (double)num_ostreams + (double)cur_ostream * + ( 1.0 / (double)num_ostreams ); + info[0].value.progress *= 100.0; + + } + + /* show progress */ + if( data->my_rank == 0) { + + /* set the roots progress as the global progress first + (in the rigth proportion) */ + progress = info[0].value.progress * info[0].percent; + +#ifdef OTFMERGE_MPI + /* listen to all ranks for new messages */ + for( j = 1; j < data->num_ranks; j++ ) { + + /* check if a new MPI_Irecv is necessary/if the previous msg + was received */ + if( MPI_REQUEST_NULL == info[j].request ) { + + /* irecv with derived datatype --> double progress, + uint8_t is_alive */ + MPI_Irecv( &(info[j].buf.progress), 1, data->buftype, j, 0, + MPI_COMM_WORLD, &(info[j].request) ); + + } + + /* test if current msg was received */ + MPI_Test( &(info[j].request), &flag, &status ); + + if( flag ) { + + /* got new values */ + + /* MPI_REQUEST_NULL indicates that a new MPI_Irecv + is necessary */ + info[j].request = MPI_REQUEST_NULL; + /* the receive-buffer must be copied because its value is + needed later on and the buffer itself is locked by MPI_Irecv */ + info[j].value.progress = info[j].buf.progress; + + /* check if it was the last msg from rank j + --> the second field of the buffer (is_alive) would be 0 */ + if( ! info[j].buf.is_alive ) { + + /* decrease the number of still living ranks */ + data->ranks_alive--; + + } + + } + + /* add the progress of rank j proportionally to the + global progress */ + progress += info[j].value.progress * info[j].percent; + + } +#endif /* OTFMERGE_MPI */ + + /* print progress */ + printf("%7.2f %% %c\r", progress, signs[tmp]); + fflush(stdout); + + tmp ^= 1; + + } +#ifdef OTFMERGE_MPI + else { /* data->my_rank != 0 */ + + /* flag is only set if the send-buffer can be used again and a msg is + necessary therefore */ + if( flag ) { + + /* send in synchronous mode --> this is because with MPI_Test we + want to know if the root has started a matching receive operation + already and not only if we can reuse the send-buffer */ + MPI_Issend( &(info[0].value.progress), 1, data->buftype, 0, 0, + MPI_COMM_WORLD, &(info[0].request)); + + } + + } +#endif /* OTFMERGE_MPI */ + + /* returns 0 if all ranks have finished */ + return data->ranks_alive; } -void finishProgressDisplay() { +int finish_everything( char *infile, char* outfile, ProgressInfo* info, + RankData* data, int ret ) { + int i; - printf( " %7.2f %% done\n", 100.0 ); - fflush( stdout ); + if( infile ) { + + free( infile ); + + } + + if( outfile ) { + + free( outfile ); + + } + + if( info ) { + + free( info ); + + } + + if( data->ostreams ) { + + for( i = 0; i < data->num_ostreams; i++ ) { + + if( data->ostreams[i].cpus ) { + + free( data->ostreams[i].cpus ); + + } + + } + + free( data->ostreams ); + + } + +#ifdef OTFMERGE_MPI + if( ret == 0 ) { + + MPI_Finalize(); + + } else { + + MPI_Abort( MPI_COMM_WORLD, ret ); + + } +#endif /* OTFMERGE_MPI */ + + return ret; } - - -void updateProgressDisplay( uint32_t i, uint64_t max, uint64_t cur ) { - - -/* static char animation[]= {"-", "\\", "|", "/" }; */ - static char* animation[]= { "", "." }; - - -/* printf( "%llu / %llu \n", cur, max ); */ - - printf( " %7.2f %% %s \r", - ( ((double) cur) * 100.0 / ((double) max) ), - animation[ i % ( sizeof(animation) / sizeof(animation[0]) ) ] ); - fflush( stdout ); -} - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/otfmerge_vs08.vcproj b/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/otfmerge_vs08.vcproj deleted file mode 100644 index 39ad50ab4d..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfmerge/otfmerge_vs08.vcproj +++ /dev/null @@ -1,386 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/Makefile.am deleted file mode 100644 index 44723403af..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/Makefile.am +++ /dev/null @@ -1,29 +0,0 @@ -INCLUDES = \ - -I$(top_builddir)/otflib -I$(top_srcdir)/otflib \ - -I$(top_builddir)/otfauxlib -I$(top_srcdir)/otfauxlib \ - $(MPI_INCLUDE_LINE) - -if AMBUILDBINARIES -bin_PROGRAMS = \ - otfprofile-mpi -endif - -CXX = $(MPICXX) - -otfprofile_mpi_CXXFLAGS = $(MPICXXFLAGS) -otfprofile_mpi_LDADD = $(top_builddir)/otflib/libotf.la $(MATHLIB) $(MPI_LIB_LINE) -otfprofile_mpi_DEPENDENCIES = $(top_builddir)/otflib/libotf.la -otfprofile_mpi_SOURCES = \ - collect_data.h \ - create_latex.h \ - datastructs.h \ - otfprofile-mpi.h \ - reduce_data.h \ - summarize_data.h \ - collect_data.cpp \ - create_latex.cpp \ - otfprofile-mpi.cpp \ - reduce_data.cpp \ - summarize_data.cpp \ - summarize_data.h - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.cpp deleted file mode 100644 index 29c4c908ee..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.cpp +++ /dev/null @@ -1,906 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz -*/ - -using namespace std; - -#include -#include -#include -#include -#include -#include - -#include "otf.h" -#include "OTF_Platform.h" - -#include "collect_data.h" -#include "otfprofile-mpi.h" -#include "summarize_data.h" -#include "reduce_data.h" -#include "create_latex.h" - - -/* define the following macro to synchronize the error indicator with all - worker ranks - - This enforces that all ranks will be terminated by calling MPI_Abort if - anyone fails. This is necessary to work around a bug that appears at least - with Open MPI where calling MPI_Abort on one task doesn't terminate all - other ranks. */ -#define SYNC_ERROR - -/* define the following macro to print result data to stdout */ -/*#define SHOW_RESULTS*/ - - -/* parse command line options -return 0 if succeeded, 1 if help text or version showed, -1 if failed */ -static int parse_command_line( int argc, char** argv, AllData& alldata ); - -/* assign trace processes to analysis processes explicitly in order to allow -sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc. -in the future, return true if succeeded */ -static bool assign_procs_to_ranks( AllData& alldata ); - -#ifdef SHOW_RESULTS -/* show results on stdout */ -static void show_results( const AllData& alldata ); -#endif /* SHOW_RESULTS */ - -/* show helptext */ -static void show_helptext( void ); - - -int main( int argc, char** argv ) { - - int ret= 0; - - /* start MPI */ - - int my_rank; - int num_ranks; - - MPI_Init(&argc, &argv); - - MPI_Comm_rank(MPI_COMM_WORLD, &my_rank ); - MPI_Comm_size(MPI_COMM_WORLD, &num_ranks ); - - AllData alldata( my_rank, num_ranks ); - - do { - - /* step 0: parse command line options */ - if ( 0 != ( ret= parse_command_line( argc, argv, alldata ) ) ) { - - if ( 1 == ret ) { - - ret= 0; - - } else { /* -1 == ret */ - - ret= 1; - - } - - break; - - } - - VerbosePrint( alldata, 1, true, "initializing\n" ); - - MPI_Barrier( MPI_COMM_WORLD ); - - /* step 1: assign trace processes to analysis processes */ - if ( !assign_procs_to_ranks( alldata ) ) { - - ret= 1; - break; - - } - - MPI_Barrier( MPI_COMM_WORLD ); - - if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) { - - alldata.measureBlockMap[ "analyze data" ].start(); - - } - - /* step 2: collect data by reading input trace file */ - if ( !CollectData( alldata ) ) { - - ret= 1; - break; - - } - - MPI_Barrier( MPI_COMM_WORLD ); - - /* step 3: summarize data; every analysis rank summarizes it's local - data independently */ - if ( !SummarizeData( alldata ) ) { - - ret= 1; - break; - - } - - MPI_Barrier( MPI_COMM_WORLD ); - - /* step 4: reduce data to master */ - if ( !ReduceData( alldata ) ) { - - ret= 1; - break; - - } - - MPI_Barrier( MPI_COMM_WORLD ); - - if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) { - - alldata.measureBlockMap[ "analyze data" ].stop(); - - } - - /* step 5: produce outputs */ - - /* step 5.1: write CSV data */ - - /* the master coordinates the length of each workers CSV text output, - then every worker writes the own portion of the result CSV file - (or couple of files) */ - - /* do later */ - - /* MPI_Barrier( MPI_COMM_WORLD ); */ - - /* the master generates the result data from the global data - reduced above */ - - if ( 0 == my_rank ) { - -#ifdef SHOW_RESULTS - /* step 5.2: show result data on stdout */ - show_results( alldata ); -#endif /* SHOW_RESULTS */ - - alldata.measureBlockMap[ "produce output" ].start(); - - /* step 5.3: generate PGF output */ - if ( !CreateTex( alldata ) ) { - - ret= 1; - break; - - } - - alldata.measureBlockMap[ "produce output" ].stop(); - - } - - } while( false ); - - /* either finalize or abort on error */ - - if ( 0 == ret ) { - - /* show runtime measurement results */ - if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) { - - cout << "runtime measurement results:" << endl; - for ( map < string, MeasureBlock >::const_iterator it= - alldata.measureBlockMap.begin(); - it != alldata.measureBlockMap.end(); it++ ) { - - cout << " " << it->first << ": " << it->second.duration() - << "s" << endl; - } - - } - - MPI_Finalize(); - - VerbosePrint( alldata, 1, true, "done\n" ); - - } else { - - MPI_Abort( MPI_COMM_WORLD, ret ); - - } - - return ret; -} - - -static int parse_command_line( int argc, char** argv, AllData& alldata ) { - - int ret= 0; - - Params& params= alldata.params; - - /* parse command line options */ - - enum { ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID }; - int parse_error= ERR_OK; - - int i; - - for ( i = 1; i < argc; i++ ) { - - /* -h, --help */ - if ( 0 == strcmp( "-h", argv[i] ) || - 0 == strcmp( "--help", argv[i] ) ) { - - if ( 0 == alldata.myRank ) { - - show_helptext(); - - } - - ret= 1; - break; - - /* -V */ - } else if ( 0 == strcmp( "-V", argv[i] ) ) { - - if ( 0 == alldata.myRank ) { - - printf( "%u.%u.%u \"%s\"\n", - OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB, - OTF_VERSION_STRING ); - - } - - ret= 1; - break; - - /* -v */ - } else if ( 0 == strcmp( "-v", argv[i] ) ) { - - params.verbose_level++; - - /* -p */ - } else if ( 0 == strcmp( "-p", argv[i] ) ) { - - params.progress= true; - - /* -f */ - } else if ( 0 == strcmp( "-f", argv[i] ) ) { - - if ( i == argc - 1 ) { - - parse_error= ERR_ARG_MISSING; - break; - - } - - int tmp= atoi( argv[i+1] ); - if ( 0 >= tmp ) { - - parse_error= ERR_ARG_INVALID; - break; - } - - params.max_file_handles= tmp; - i++; - - /* -b */ - } else if ( 0 == strcmp( "-b", argv[i] ) ) { - - if ( i == argc - 1 ) { - - parse_error= ERR_ARG_MISSING; - break; - - } - - int tmp= atoi( argv[i+1] ); - if ( 0 >= tmp ) { - - parse_error= ERR_ARG_INVALID; - break; - } - - params.buffer_size= tmp; - i++; - - /* -o */ - } else if ( 0 == strcmp( "-o", argv[i] ) ) { - - if ( i == argc - 1 ) { - - parse_error= ERR_ARG_MISSING; - break; - - } - - params.output_file_prefix= argv[++i]; - - /* --stat */ - } else if ( 0 == strcmp( "--stat", argv[i] ) ) { - - params.read_from_stats= true; - -#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4 - - /* --nopdf */ - } else if ( 0 == strcmp( "--nopdf", argv[i] ) ) { - - params.create_pdf= false; - -#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */ - - /* input file or unknown option */ - } else { - - if ( 0 == params.input_file_prefix.length() ) { - - char* tmp= OTF_stripFilename( argv[i] ); - if ( tmp ) { - - params.input_file_prefix= tmp; - free( tmp ); - - } - - } else { - - parse_error= ERR_OPT_UNKNOWN; - break; - - } - - } - - } - - /* show specific message on error */ - if ( ERR_OK != parse_error ) { - - if ( 0 == alldata.myRank ) { - - switch( parse_error ) { - - case ERR_OPT_UNKNOWN: - - cerr << "ERROR: Unknown option '" << argv[i] << "'." - << endl; - break; - - case ERR_ARG_MISSING: - - cerr << "ERROR: Expected argument for option '" << argv[i] - << "'." << endl; - break; - - case ERR_ARG_INVALID: - - cerr << "ERROR: Invalid argument for option '" << argv[i] - << "'." << endl; - break; - - default: - - break; - - } - - } - - ret= -1; - - /* show help text if no input trace file is given */ - } else if ( 0 == params.input_file_prefix.length() ) { - - if ( 0 == alldata.myRank ) { - - show_helptext(); - - } - - ret= 1; - - } - - return ret; -} - - -static bool assign_procs_to_ranks( AllData& alldata ) { - - bool error= false; - - OTF_FileManager* manager= NULL; - OTF_MasterControl* master= NULL; - - if ( 0 == alldata.myRank ) { - - /* rank 0 reads OTF master control of input trace file */ - - manager= OTF_FileManager_open( 1 ); - assert( manager ); - - master= OTF_MasterControl_new( manager ); - assert( master ); - - int master_read_ret= - OTF_MasterControl_read( master, - alldata.params.input_file_prefix.c_str() ); - - /* that's the first access to the input trace file; show tidy error - message if failed */ - if ( 0 == master_read_ret ) { - - cerr << "ERROR: Unable to open file '" - << alldata.params.input_file_prefix << ".otf' for reading." - << endl; - error= true; - } - } - - /* broadcast error indicator to workers because Open MPI had all - ranks except rank 0 waiting endlessly in the MPI_Recv, when the '.otf' file - was absent. */ - if ( SyncError( alldata, error, 0 ) ) { - - return false; - - } - - if ( 0 == alldata.myRank ) { - - do { - - /* fill the global array of processes */ - - alldata.myProcessesNum= OTF_MasterControl_getrCount( master ); - alldata.myProcessesList= - (uint32_t*)malloc( alldata.myProcessesNum * sizeof(uint32_t) ); - assert( alldata.myProcessesList ); - - uint32_t i= 0; - uint32_t j= 0; - - while( true ) { - - OTF_MapEntry* entry = - OTF_MasterControl_getEntryByIndex( master, i ); - - if( NULL == entry) break; - - for ( uint32_t k= 0; k< entry->n; k++ ) { - - alldata.myProcessesList[j]= entry->values[k]; - j++; - } - - i++; - } - assert( alldata.myProcessesNum == j ); - - /* DEBUG */ - /*cerr << "processes in trace: "; - for ( uint32_t k= 0; k < alldata.myProcessesNum; k++ ) { - - cerr << alldata.myProcessesList[k] << " "; - } - cerr << endl;*/ - - - /* now we may re-arrange the process list for a better layout - - note that this layout is optimal to re-use OTF streams - if there are multiple processes per stream - - one may read the OTF definitions to know how to re-arrange */ - - /* get number of ranks per worker, send to workers */ - - /* remaining ranks and remaining workers */ - uint32_t r_ranks= alldata.myProcessesNum; - uint32_t r_workers= alldata.numRanks; - - uint32_t pos= 0; - bool warn_for_empty= true; - for ( int w= 0; w < (int)alldata.numRanks; w++ ) { - - uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ? - ( r_ranks / r_workers +1 ) : ( r_ranks / r_workers ); - - if ( ( 0 == n ) && warn_for_empty ) { - - cerr << "Warning: more analysis ranks than trace processes, " - << "ranks " << w << " to " << alldata.numRanks -1 - << " are unemployed" << endl; - - warn_for_empty= false; - } - - if ( 0 == w ) { - - /* for master itself simply truncate processesList, - don't send and receive */ - alldata.myProcessesNum= n; - - } else { - - MPI_Send( &n, 1, MPI_INT, w, 2, MPI_COMM_WORLD ); - - MPI_Send( alldata.myProcessesList + pos, n, MPI_INT, - w, 3, MPI_COMM_WORLD ); - - } - - pos += n; - r_ranks -= n; - r_workers -= 1; - } - - } while( false ); - - /* close OTF master control and file manager */ - OTF_MasterControl_close( master ); - OTF_FileManager_close( manager ); - - } else { /* 0 != my_rank */ - - /* workers receive number and sub-list of their ranks to process */ - - alldata.myProcessesNum= 0; - - MPI_Status status; - - MPI_Recv( &alldata.myProcessesNum, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, - &status ); - - alldata.myProcessesList= (uint32_t*)malloc( - alldata.myProcessesNum * sizeof(uint32_t) ); - assert( alldata.myProcessesList ); - - MPI_Recv( alldata.myProcessesList, alldata.myProcessesNum, MPI_INT, 0, - 3, MPI_COMM_WORLD, &status ); - - } - - /* DEBUG */ - /*cerr << " worker " << my_rank << " handles: "; - for ( uint32_t v= 0; v < alldata.myProcessesNum; v++ ) { - - cerr << alldata.myProcessesList[v] << " "; - } - cerr << endl;*/ - - return !error; -} - - -#ifdef SHOW_RESULTS - -static void show_results( const AllData& alldata ) { - -# define PRINT_MIN_MAX_AVG(v,u) (v.cnt) << " x avg " << ((double)(v.sum))/(v.cnt) << "(" << (v.min) << "-" << (v.max) << ") " << u - - cout << endl << " global data per function: " << endl; - { - map< uint64_t, FunctionData >::const_iterator it= alldata.functionMapGlobal.begin(); - map< uint64_t, FunctionData >::const_iterator itend= alldata.functionMapGlobal.end(); - while ( itend != it ) { - - cout << " global function " << it->first << " -> " ; - if ( it->second.count.cnt ) { - cout << "\t"<< - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") << - " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[s]") << - " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[s]") << endl; - } - - it++; - } - } - - cout << endl << " global counter data per function: " << endl; - { - map< Pair, FunctionData, ltPair >::const_iterator it= alldata.counterMapGlobal.begin(); - map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.counterMapGlobal.end(); - while ( itend != it ) { - - cout << " global counter " << it->first.a << " per function " << it->first.b << " -> " << endl; - if ( it->second.count.cnt ) { - cout << "\t"<< - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]"); - cout << " exc: "; - if ( it->second.excl_time.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.excl_time,"[#]"); - } else { - cout << "0 [#]"; - } - cout << " inc: "; - if ( it->second.incl_time.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.incl_time,"[#]"); - } else { - cout << "0 [#]"; - } - cout << endl; - } - - it++; - } - } - - cout << endl << " global message data per cluster pair: " << endl; - { - map< Pair, MessageData >::const_iterator it= alldata.messageMapPerClusterPair.begin(); - map< Pair, MessageData >::const_iterator itend= alldata.messageMapPerClusterPair.end(); - while ( itend != it ) { - - if ( it->second.count_send.cnt ) { - cout << "\tsent " << it->first.a << " --> " << it->first.b << - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]"); - cout << " byt: "; - if ( it->second.bytes_send.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]"); - } else { - cout << "0 [b]"; - } - cout << " dur: "; - if ( it->second.duration_send.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[s]"); - } else { - cout << "0 [s]"; - } - cout << endl; - } - if ( it->second.count_recv.cnt ) { - cout << "\trecv " << it->first.a << " <-- " << it->first.b << - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]"); - cout << " byt: "; - if ( it->second.bytes_recv.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]"); - } else { - cout << "0 [b]"; - } - cout << " dur: "; - if ( it->second.duration_recv.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[s]"); - } else { - cout << "0 [s]"; - } - cout << endl; - } - - it++; - } - } - - cout << endl << " global message data per cluster: " << endl; - { - map< uint64_t, MessageData >::const_iterator it= alldata.messageMapPerCluster.begin(); - map< uint64_t, MessageData >::const_iterator itend= alldata.messageMapPerCluster.end(); - while ( itend != it ) { - - cout << " msg of cluster " << it->first << " -> " << endl; - if ( it->second.count_send.cnt ) { - cout << "\tsent" << - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]"); - cout << " byt: "; - if ( it->second.bytes_send.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]"); - } else { - cout << "0 [b]"; - } - cout << " dur: "; - if ( it->second.duration_send.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[s]"); - } else { - cout << "0 [s]"; - } - cout << endl; - } - if ( it->second.count_recv.cnt ) { - cout << "\trecv" << - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]"); - cout << " byt: "; - if ( it->second.bytes_recv.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]"); - } else { - cout << "0 [b]"; - } - cout << " dur: "; - if ( it->second.duration_recv.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[s]"); - } else { - cout << "0 [s]"; - } - cout << endl; - } - - it++; - } - } - - cout << endl << " global message speed per length: " << endl; - { - map< Pair, MessageSpeedData, ltPair >::const_iterator it= alldata.messageSpeedMapPerLength.begin(); - map< Pair, MessageSpeedData, ltPair >::const_iterator itend= alldata.messageSpeedMapPerLength.end(); - while ( itend != it ) { - - cout << " msg of speed-bin " << it->first.a << " length-bin " << it->first.b << " -> "; - if ( it->second.count.cnt ) { - cout << "\t" << - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") << endl; - } - - it++; - } - } - - cout << endl << " global collective data per cluster: " << endl; - { - map< Pair, CollectiveData, ltPair >::const_iterator it= alldata.collectiveMapPerCluster.begin(); - map< Pair, CollectiveData, ltPair >::const_iterator itend= alldata.collectiveMapPerCluster.end(); - while ( itend != it ) { - - cout << " collop of class " << it->first.a << " cluster " << it->first.b << " -> " << endl; - if ( it->second.count_send.cnt ) { - cout << "\tsent" << - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]"); - cout << " byt: "; - if ( it->second.bytes_send.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]"); - } else { - cout << "0 [b]"; - } - cout << " dur: "; - if ( it->second.duration_send.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[s]"); - } else { - cout << "0 [s]"; - } - cout << endl; - } - if ( it->second.count_recv.cnt ) { - cout << "\trecv" << - " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]"); - cout << " byt: "; - if ( it->second.bytes_recv.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]"); - } else { - cout << "0 [b]"; - } - cout << " dur: "; - if ( it->second.duration_recv.cnt ) { - cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[s]"); - } else { - cout << "0 [s]"; - } - cout << endl; - } - - it++; - } - } -} - -#endif /* SHOW_RESULTS */ - - -static void show_helptext() { - - cout << endl - << " otfprofile-mpi - generate a profile of a trace in LaTeX format." << endl - << endl - << " Syntax: otfprofile-mpi [options] " << endl - << endl - << " options:" << endl - << " -h, --help show this help message" << endl - << " -V show OTF version" << endl - << " -v increase output verbosity" << endl - << " (can be used more than once)" << endl - << " -p show progress" << endl - << " -f max. number of filehandles available per rank" << endl - << " (default: " << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl - << " -b set buffersize of the reader" << endl - << " (default: " << Params::DEFAULT_BUFFER_SIZE << ")" << endl - << " -o specify the prefix of output file(s)" << endl - << " (default: " << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl - << " --stat read only summarized information, no events" << endl -#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4 - << " --nopdf do not produce PDF output" << endl -#else /* PDFTEX && HAVE_PGFPLOTS_1_4 */ - << endl - << " PDF creation requires the PGFPLOTS package version >1.4" << endl - << " http://sourceforge.net/projects/pgfplots/ " << endl -#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */ - << endl; -} - - -void VerbosePrint( AllData& alldata, uint8_t level, bool root_only, - const char* fmt, ... ) { - - if ( alldata.params.verbose_level >= level ) { - - va_list ap; - - va_start( ap, fmt ); - - /* either only rank 0 print the message */ - if ( root_only ) { - - if ( 0 == alldata.myRank ) { - - vprintf( fmt, ap ); - } - - /* or all ranks print the message */ - } else { - - char msg[1024]; - - /* prepend current rank to message */ - snprintf( msg, sizeof( msg ) -1, "[%u] ", alldata.myRank ); - vsnprintf( msg + strlen( msg ), sizeof( msg ) -1, fmt, ap ); - - /* print message */ - printf( "%s ", msg ); - - } - - va_end( ap ); - - } -} - - -bool SyncError( AllData& alldata, bool& error, uint32_t root ) { - -#ifdef SYNC_ERROR - - if ( 1 < alldata.numRanks ) { - - int buf= ( error ) ? 1 : 0; - - /* either broadcast the error indicator from one rank (root) - or reduce them from all */ - - if ( root != (uint32_t)-1 ) { - - MPI_Bcast( &buf, 1, MPI_INT, (int)root, MPI_COMM_WORLD ); - - error= ( 1 == buf ); - - } else { - - int recv_buf; - - MPI_Allreduce( &buf, &recv_buf, 1, MPI_INT, MPI_MAX, - MPI_COMM_WORLD ); - - error= ( 1 == recv_buf ); - - } - - } - -#endif /* SYNC_ERROR */ - - return error; -} - - -uint64_t Logi( uint64_t x, uint64_t b ) { - - assert( b > 1 ); - - uint64_t c= 1; - uint64_t i= 0; - - while( c <= x ) { - - c*= b; - i++; - } - - return i; -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.h deleted file mode 100644 index ea8de61f0b..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz -*/ - -#ifndef OTFPROFILE_MPI_H -#define OTFPROFILE_MPI_H - - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif /* HAVE_CONFIG_H */ - -#include "datastructs.h" - - -/* print verbose message to stdout - (if root_only is true only rank 0 will print the message) */ -void VerbosePrint( AllData& alldata, uint8_t level, bool root_only, - const char* fmt, ... ); - -/* synchronize error indicator with all worker ranks - (either broadcast from one rank (root) or reduce from all) */ -bool SyncError( AllData& alldata, bool& error, uint32_t root= (uint32_t)-1 ); - -/* logarithm to base b for unsigned 64-bit integer x */ -uint64_t Logi( uint64_t x, uint64_t b= 2 ); - - -#endif /* OTFPROFILE_MPI_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.cpp deleted file mode 100644 index 67936488b1..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.cpp +++ /dev/null @@ -1,649 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz -*/ - -using namespace std; - -#include -#include - -#include "otfprofile-mpi.h" -#include "reduce_data.h" - - -/* fence between statistics parts within the buffer for consistency checking */ -enum { FENCE= 0xDEADBEEF }; - - -/* pack the local alldata into a buffer, return buffer */ -static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) { - - uint64_t fence= FENCE; - - /* get the sizes of all parts that need to be transmitted */ - - sizes[1]= alldata.functionMapGlobal.size(); /* map< uint64_t, FunctionData > functionMapGlobal; */ - sizes[2]= alldata.counterMapGlobal.size(); /* map< Pair, CounterData, ltPair > counterMapGlobal; */ - sizes[3]= alldata.messageMapPerClusterPair.size(); /* map< Pair, MessageData, ltPair > messageMapPerClusterPair; */ - sizes[4]= alldata.messageMapPerCluster.size(); /* map< uint64_t, MessageData > messageMapPerCluster; */ - sizes[5]= alldata.messageSpeedMapPerLength.size(); /* map< Pair, MessageSpeedData, ltPair > messageSpeedMapPerLength; */ - sizes[6]= alldata.collectiveMapPerCluster.size(); /* map< Pair, CollectiveData, ltPair > collectiveMapPerCluster; */ - sizes[7]= 0; - sizes[8]= 0; - sizes[9]= 0; - - /* get bytesize multiplying all pieces */ - - uint32_t bytesize= 0; - int s1, s2; - - MPI_Pack_size( 7, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); - bytesize += s1; - - MPI_Pack_size( sizes[1] * 7, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); - MPI_Pack_size( sizes[1] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); - bytesize += s1 + s2; - - MPI_Pack_size( sizes[2] * 8, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); - MPI_Pack_size( sizes[2] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); - bytesize += s1 + s2; - - MPI_Pack_size( sizes[3] * 20, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); - MPI_Pack_size( sizes[3] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); - bytesize += s1 + s2; - - MPI_Pack_size( sizes[4] * 19, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); - MPI_Pack_size( sizes[4] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); - bytesize += s1 + s2; - - MPI_Pack_size( sizes[5] * 6, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); - bytesize += s1; - - MPI_Pack_size( sizes[6] * 20, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); - MPI_Pack_size( sizes[6] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); - bytesize += s1 + s2; - - /* get the buffer */ - sizes[0]= bytesize; - char* buffer= alldata.guaranteePackBuffer( bytesize ); - - /* pack parts */ - int position= 0; - - /* extra check that doesn't cost too much */ - MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - /* pack functionMapGlobal */ - { - map< uint64_t, FunctionData >::const_iterator it= alldata.functionMapGlobal.begin(); - map< uint64_t, FunctionData >::const_iterator itend= alldata.functionMapGlobal.end(); - for ( ; it != itend; ++it ) { - - MPI_Pack( (void*) &it->first, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - } - alldata.functionMapGlobal.clear(); - } - - /* extra check that doesn't cost too much */ - MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - /* pack counterMapGlobal */ - { - map< Pair, CounterData, ltPair >::const_iterator it= alldata.counterMapGlobal.begin(); - map< Pair, CounterData, ltPair >::const_iterator itend= alldata.counterMapGlobal.end(); - for ( ; it != itend; ++it ) { - - MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - } - alldata.counterMapGlobal.clear(); - } - - /* extra check that doesn't cost too much */ - MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - /* pack messageMapPerClusterPair */ - { - map< Pair, MessageData, ltPair >::const_iterator it= alldata.messageMapPerClusterPair.begin(); - map< Pair, MessageData, ltPair >::const_iterator itend= alldata.messageMapPerClusterPair.end(); - for ( ; it != itend; ++it ) { - - MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.bytes_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.bytes_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.duration_send.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.duration_recv.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - } - alldata.messageMapPerClusterPair.clear(); - } - - /* extra check that doesn't cost too much */ - MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - /* pack messageMapPerCluster */ - { - map< uint64_t, MessageData >::const_iterator it= alldata.messageMapPerCluster.begin(); - map< uint64_t, MessageData >::const_iterator itend= alldata.messageMapPerCluster.end(); - for ( ; it != itend; ++it ) { - - MPI_Pack( (void*) &it->first, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.bytes_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.bytes_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.duration_send.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.duration_recv.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - } - alldata.messageMapPerCluster.clear(); - } - - /* extra check that doesn't cost too much */ - MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - /* pack messageSpeedMapPerLength */ - { - map< Pair, MessageSpeedData, ltPair >::const_iterator it= alldata.messageSpeedMapPerLength.begin(); - map< Pair, MessageSpeedData, ltPair >::const_iterator itend= alldata.messageSpeedMapPerLength.end(); - for ( ; it != itend; ++it ) { - - MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - } - alldata.messageSpeedMapPerLength.clear(); - } - - /* extra check that doesn't cost too much */ - MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - /* pack collectiveMapPerCluster */ - { - map< Pair, CollectiveData, ltPair >::const_iterator it= alldata.collectiveMapPerCluster.begin(); - map< Pair, CollectiveData, ltPair >::const_iterator itend= alldata.collectiveMapPerCluster.end(); - for ( ; it != itend; ++it ) { - - MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.count_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.count_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.bytes_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.bytes_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.duration_send.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - MPI_Pack( (void*) &it->second.duration_recv.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); - MPI_Pack( (void*) &it->second.duration_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - } - alldata.collectiveMapPerCluster.clear(); - } - - /* extra check that doesn't cost too much */ - MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); - - return buffer; -} - - -/* prepare alldata for unpack, return buffer of sufficient size */ -static char* prepare_worker_data( AllData& alldata, uint32_t sizes[10] ) { - - uint32_t bytesize= sizes[0]; - - return alldata.guaranteePackBuffer( bytesize ); -} - -/* unpack the received worker data and add it to the local alldata */ -static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) { - - uint64_t fence; - - /* unpack parts */ - int position= 0; - char* buffer= alldata.getPackBuffer( ); - - /* extra check that doesn't cost too much */ - fence= 0; - MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - assert( FENCE == fence ); - - /* unpack functionMapGlobal */ - for ( uint32_t i= 0; i < sizes[1]; i++ ) { - - uint64_t func; - FunctionData tmp; - - MPI_Unpack( buffer, sizes[0], &position, &func, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - alldata.functionMapGlobal[ func ].add( tmp ); - } - - /* extra check that doesn't cost too much */ - fence= 0; - MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - assert( FENCE == fence ); - - /* unpack counterMapGlobal */ - for ( uint32_t i= 0; i < sizes[2]; i++ ) { - - uint64_t a; - uint64_t b; - CounterData tmp; - - MPI_Unpack( buffer, sizes[0], &position, &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - alldata.counterMapGlobal[ Pair( a, b ) ].add( tmp ); - } - - /* extra check that doesn't cost too much */ - fence= 0; - MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - assert( FENCE == fence ); - - /* unpack messageMapPerClusterPair */ - for ( uint32_t i= 0; i < sizes[3]; i++ ) { - - uint64_t a; - uint64_t b; - MessageData tmp; - - MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - alldata.messageMapPerClusterPair[ Pair(a,b) ].add( tmp ); - } - - /* extra check that doesn't cost too much */ - fence= 0; - MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - assert( FENCE == fence ); - - /* unpack messageMapPerCluster */ - for ( uint32_t i= 0; i < sizes[4]; i++ ) { - - uint64_t a; - MessageData tmp; - - MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - alldata.messageMapPerCluster[ a ].add( tmp ); - } - - /* extra check that doesn't cost too much */ - fence= 0; - MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - assert( FENCE == fence ); - - /* unpack messageSpeedMapPerLength */ - for ( uint32_t i= 0; i < sizes[5]; i++ ) { - - uint64_t a; - uint64_t b; - MessageSpeedData tmp; - - MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - alldata.messageSpeedMapPerLength[ Pair(a,b) ].add( tmp ); - } - - /* extra check that doesn't cost too much */ - fence= 0; - MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - assert( FENCE == fence ); - - /* unpack collectiveMapPerCluster */ - for ( uint32_t i= 0; i < sizes[6]; i++ ) { - - uint64_t a; - uint64_t b; - CollectiveData tmp; - - MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); - MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - alldata.collectiveMapPerCluster[ Pair(a,b) ].add( tmp ); - } - - /* extra check that doesn't cost too much */ - fence= 0; - MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - assert( FENCE == fence ); - -} - - -bool ReduceData( AllData& alldata ) { - - bool ret= true; - - if ( 1 < alldata.numRanks ) { - - VerbosePrint( alldata, 1, true, "reducing data\n" ); - - /* implement reduction myself because MPI and C++ STL don't play with - each other */ - - /* how many rounds until master has all the data? */ - uint32_t num_rounds= Logi( alldata.numRanks ) -1; - uint32_t round_no= 0; - uint32_t round= 1; - while ( round < alldata.numRanks ) { - - round_no++; - - if ( 1 == alldata.params.verbose_level ) { - - VerbosePrint( alldata, 1, true, " round %u / %u\n", - round_no, num_rounds ); - } - - uint32_t peer= alldata.myRank ^ round; - - /* if peer rank is not there, do nothing but go on */ - if ( peer >= alldata.numRanks ) { - - round= round << 1; - continue; - } - - /* send to smaller peer, receive from larger one */ - uint32_t sizes[10]; - char* buffer; - - if ( alldata.myRank < peer ) { - - MPI_Status status; - - MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD, - &status ); - - // DEBUG - //cout << " round " << round << " recv " << peer << "--> " << - //alldata.myRank << " with " << - //sizes[0] << " bytes, " << - //sizes[1] << ", " << - //sizes[2] << ", " << - //sizes[3] << ", " << - //sizes[4] << "" << endl << flush; - - buffer= prepare_worker_data( alldata, sizes ); - - VerbosePrint( alldata, 2, false, - "round %u / %u: receiving %u bytes from rank %u\n", - round_no, num_rounds, sizes[0], peer ); - - MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD, - &status ); - - unpack_worker_data( alldata, sizes ); - - } else { - - buffer= pack_worker_data( alldata, sizes ); - - // DEBUG - //cout << " round " << round << " send " << alldata.myRank << - //" --> " << peer << " with " << - //sizes[0] << " bytes, " << - //sizes[1] << ", " << - //sizes[2] << ", " << - //sizes[3] << ", " << - //sizes[4] << "" << endl << flush; - - VerbosePrint( alldata, 2, false, - "round %u / %u: sending %u bytes to rank %u\n", - round_no, num_rounds, sizes[0], peer ); - - MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD ); - - MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5, - MPI_COMM_WORLD ); - - /* every work has to send off its data at most once, - after that, break from the collective reduction operation */ - break; - } - - round= round << 1; - - } - - alldata.freePackBuffer(); - - } - - return ret; -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.cpp deleted file mode 100644 index ea42e5d3bb..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.cpp +++ /dev/null @@ -1,319 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz -*/ - -using namespace std; - -#include -#include - -#include "summarize_data.h" - - -static void get_clustering( AllData& alldata ) { - - uint32_t r_processes= alldata.allProcesses.size(); - uint32_t r_clusters= Clustering::MAX_CLUSTERS; - - set< Process, ltProcess >::iterator pos= alldata.allProcesses.begin(); - - for ( uint32_t c= 0; - c < Clustering::MAX_CLUSTERS && 0 < r_processes; c++ ) { - - uint32_t n= - ( ( r_processes / r_clusters ) * r_clusters < r_processes ) ? - ( r_processes / r_clusters + 1 ) : ( r_processes / r_clusters ); - - for ( uint32_t i= 0; i < n; i++ ) { - - bool inserted= alldata.clustering.insert( c+1, pos->process ); - assert( inserted ); - - pos++; - r_processes--; - - } - - r_clusters--; - - } -} - - -static void share_clustering( AllData& alldata ) { - - MPI_Barrier( MPI_COMM_WORLD ); - - char* buffer; - int buffer_size= 0; - int buffer_pos= 0; - - if ( 0 == alldata.myRank ) { - - /* get size needed to send clustering information to workers */ - - int size; - - /* alldata.clustering.clustersToProcesses.size() + firsts */ - MPI_Pack_size( 1 + alldata.clustering.clustersToProcesses.size(), - MPI_LONG_LONG_INT, MPI_COMM_WORLD, &size ); - buffer_size+= size; - - /* alldata.clustering.clustersToProcesses.second.size() + second */ - for ( map< uint64_t, set >::const_iterator it= - alldata.clustering.clustersToProcesses.begin(); - it != alldata.clustering.clustersToProcesses.end(); it++ ) { - - MPI_Pack_size( 1 + it->second.size(), MPI_LONG_LONG_INT, - MPI_COMM_WORLD, &size ); - buffer_size+= size; - - } - - } - - /* broadcast buffer size */ - MPI_Bcast( &buffer_size, 1, MPI_INT, 0, MPI_COMM_WORLD ); - - /* allocate buffer */ - buffer= new char[ buffer_size ]; - assert( buffer ); - - /* pack clustering information to buffer */ - - if ( 0 == alldata.myRank ) { - - /* alldata.clustering.clustersToProcesses.size() */ - uint64_t clust_proc_map_size= - alldata.clustering.clustersToProcesses.size(); - MPI_Pack( &clust_proc_map_size, 1, MPI_LONG_LONG_INT, buffer, - buffer_size, &buffer_pos, MPI_COMM_WORLD ); - - /* alldata.clustering.clustersToProcesses */ - for ( map< uint64_t, set >::const_iterator it= - alldata.clustering.clustersToProcesses.begin(); - it != alldata.clustering.clustersToProcesses.end(); it++ ) { - - /* alldata.clustering.clustersToProcesses.first */ - uint64_t cluster= it->first; - MPI_Pack( &cluster, 1, MPI_LONG_LONG_INT, buffer, buffer_size, - &buffer_pos, MPI_COMM_WORLD ); - - /* alldata.clustering.clustersToProcesses.second.size() */ - uint64_t processes_size= it->second.size(); - MPI_Pack( &processes_size, 1, MPI_LONG_LONG_INT, buffer, buffer_size, - &buffer_pos, MPI_COMM_WORLD ); - - /* alldata.clustering.clustersToProcesses.second */ - for ( set::const_iterator it2= it->second.begin(); - it2 != it->second.end(); it2++ ) { - - uint64_t process= *it2; - MPI_Pack( &process, 1, MPI_LONG_LONG_INT, buffer, buffer_size, - &buffer_pos, MPI_COMM_WORLD ); - - } - - } - - } - - /* broadcast definitions buffer */ - MPI_Bcast( buffer, buffer_size, MPI_PACKED, 0, MPI_COMM_WORLD ); - - /* unpack clustering information from buffer */ - - if ( 0 != alldata.myRank ) { - - /* alldata.clustering.clustersToProcesses.size() */ - uint64_t clust_proc_map_size; - MPI_Unpack( buffer, buffer_size, &buffer_pos, &clust_proc_map_size, 1, - MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - /* alldata.clustering.clustersToProcesses */ - for ( uint64_t i= 0; i < clust_proc_map_size; i++ ) { - - /* alldata.clustering.clustersToProcesses.first */ - uint64_t cluster; - MPI_Unpack( buffer, buffer_size, &buffer_pos, &cluster, 1, - MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - /* alldata.clustering.clustersToProcesses.second.size() */ - uint64_t processes_size; - MPI_Unpack( buffer, buffer_size, &buffer_pos, &processes_size, 1, - MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - /* alldata.clustering.clustersToProcesses.second */ - for ( uint64_t j= 0; j < processes_size; j++ ) { - - uint64_t process; - MPI_Unpack( buffer, buffer_size, &buffer_pos, &process, 1, - MPI_LONG_LONG_INT, MPI_COMM_WORLD ); - - bool inserted= alldata.clustering.insert( cluster, process ); - assert( inserted ); - - } - - } - - } - - delete[] buffer; -} - - -bool SummarizeData( AllData& alldata ) { - - bool error= false; - - /* rank 0 gets clustering information */ - - if ( 0 == alldata.myRank ) { - - get_clustering( alldata ); - - } - - /* share clustering information to workers */ - - if ( 1 < alldata.numRanks ) { - - share_clustering( alldata ); - - } - - /* macro to set min, max to sum before summarizing */ -# define MINMAX2SUM(v) \ - if( 0 != (v).cnt ) { \ - (v).cnt = 1; \ - (v).min= (v).max= (v).sum; \ - } else { \ - (v).cnt = 0; \ - /* (v).min= OTF_UINT64_MAX; (v).max= 0; \ - ^^^ this is set already by the constructor and never touched \ - if (v).cnt == 0. Therefore, it is ignored when computing min/max \ - further on. */ \ - } - - /* summarize map ( func x rank ) to map ( func ) */ - { - map< Pair, FunctionData, ltPair >::iterator it= alldata.functionMapPerRank.begin(); - map< Pair, FunctionData, ltPair >::iterator itend= alldata.functionMapPerRank.end(); - while ( itend != it ) { - - alldata.functionMapGlobal[ it->first.a ].add( it->second ); - it++; - } - alldata.functionMapPerRank.clear(); - } - - /* summarize map ( counter x func x rank ) to map ( counter x func ) */ - { - map< Triple, CounterData, ltTriple >::iterator it= alldata.counterMapPerFunctionRank.begin(); - map< Triple, CounterData, ltTriple >::iterator itend= alldata.counterMapPerFunctionRank.end(); - while ( itend != it ) { - - alldata.counterMapGlobal[ Pair( it->first.a, it->first.b ) ].add( it->second ); - it++; - } - alldata.counterMapPerFunctionRank.clear(); - } - - /* will be generated from messageMapPerRankPair, is only used to generate - messageMapPerCluster */ - map< uint64_t, MessageData > message_map_per_rank; - - /* summarize map ( rank x rank ) to map ( cluster x cluster ) */ - { - map< Pair, MessageData, ltPair >::iterator it= alldata.messageMapPerRankPair.begin(); - map< Pair, MessageData, ltPair >::iterator itend= alldata.messageMapPerRankPair.end(); - while ( itend != it ) { - - uint64_t cluster_a= it->first.a; - uint64_t cluster_b= it->first.b; - - message_map_per_rank[ cluster_a ].add( it->second ); - - if ( alldata.clustering.enabled ) { - - cluster_a= alldata.clustering.process2cluster( it->first.a ); - assert( 0 != cluster_a ); - cluster_b= alldata.clustering.process2cluster( it->first.b ); - assert( 0 != cluster_b ); - - } - - MINMAX2SUM( it->second.count_send ); - MINMAX2SUM( it->second.count_recv ); - MINMAX2SUM( it->second.bytes_send ); - MINMAX2SUM( it->second.bytes_recv ); - MINMAX2SUM( it->second.duration_send ); - MINMAX2SUM( it->second.duration_recv ); - - alldata.messageMapPerClusterPair[ Pair( cluster_a, cluster_b ) ].add( it->second ); - it++; - } - alldata.messageMapPerRankPair.clear(); - } - - /* summarize map ( rank ) to map ( cluster ) */ - { - map< uint64_t, MessageData >::iterator it= message_map_per_rank.begin(); - map< uint64_t, MessageData >::iterator itend= message_map_per_rank.end(); - while ( itend != it ) { - - uint64_t cluster= it->first; - - if ( alldata.clustering.enabled ) { - - cluster= alldata.clustering.process2cluster( it->first ); - assert( 0 != cluster ); - - } - - MINMAX2SUM( it->second.count_send ); - MINMAX2SUM( it->second.count_recv ); - MINMAX2SUM( it->second.bytes_send ); - MINMAX2SUM( it->second.bytes_recv ); - MINMAX2SUM( it->second.duration_send ); - MINMAX2SUM( it->second.duration_recv ); - - alldata.messageMapPerCluster[ cluster ].add( it->second ); - it++; - } - message_map_per_rank.clear(); - } - - /* summarize map ( class x rank ) to map ( class x cluster ) */ - { - map< Pair, CollectiveData, ltPair >::iterator it= alldata.collectiveMapPerRank.begin(); - map< Pair, CollectiveData, ltPair >::iterator itend= alldata.collectiveMapPerRank.end(); - while ( itend != it ) { - - const uint64_t& op_class= it->first.a; - uint64_t cluster= it->first.b; - - if ( alldata.clustering.enabled ) { - - cluster= alldata.clustering.process2cluster( it->first.b ); - assert( 0 != cluster ); - - } - - MINMAX2SUM( it->second.count_send ); - MINMAX2SUM( it->second.count_recv ); - MINMAX2SUM( it->second.bytes_send ); - MINMAX2SUM( it->second.bytes_recv ); - MINMAX2SUM( it->second.duration_send ); - MINMAX2SUM( it->second.duration_recv ); - - alldata.collectiveMapPerCluster[ Pair( op_class, cluster ) ].add( it->second ); - it++; - } - alldata.collectiveMapPerRank.clear(); - } - - return !error; -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/CSVParse.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/CSVParse.cpp deleted file mode 100644 index 4e581bd5b7..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/CSVParse.cpp +++ /dev/null @@ -1,768 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#include "OTF_Platform.h" - -#include "CSVParse.h" - - -void Glob_Maps::set_trace_count(uint32_t t_count) -{ - trace_count = t_count; -} - -uint32_t Glob_Maps::set_func(string func_name) -{ - uint32_t id = func_id; - GlobalMaps::iterator it = funcmap.find(func_name); - if(it == funcmap.end()) - { - funcmap.insert(pair (func_name, id)); - func_id++; - return id; - } - else - { - return it->second; - } -} - -uint32_t Glob_Maps::set_funcgroup(string fg_name) -{ - uint32_t id = funcgroup_id; - GlobalMaps::iterator it = funcgroupmap.find(fg_name); - if(it == funcgroupmap.end()) - { - funcgroupmap.insert(pair (fg_name, id)); - funcgroup_id++; - return id; - } - else - { - return it->second; - } -} - -uint32_t Glob_Maps::set_counter(string counter_name) -{ - uint32_t id = counter_id; - GlobalMaps::iterator it = countermap.find(counter_name); - if(it == countermap.end()) - { - countermap.insert(pair (counter_name, id)); - counter_id++; - return id; - } - else - { - return it->second; - } -} - -uint32_t Glob_Maps::set_collop(string collop_name) -{ - uint32_t id = collop_id; - GlobalMaps::iterator it = collopmap.find(collop_name); - if(it == collopmap.end()) - { - collopmap.insert(pair (collop_name, id)); - collop_id++; - return id; - } - else - { - return it->second; - } -} - -uint32_t Glob_Maps::get_trace_count() -{ - return trace_count; -} - -uint32_t Glob_Maps::get_func(string func_name) -{ - GlobalMaps::iterator it = funcmap.find(func_name); - if(it == funcmap.end()) - { - return 0; - } - else - { - return it->second; - } -} - -uint32_t Glob_Maps::get_funcgroup(string fg_name) -{ - GlobalMaps::iterator it = funcgroupmap.find(fg_name); - if(it == funcgroupmap.end()) - { - return 0; - } - else - { - return it->second; - } -} - -uint32_t Glob_Maps::get_counter(string counter_name) -{ - GlobalMaps::iterator it = countermap.find(counter_name); - if(it == countermap.end()) - { - return 0; - } - else - { - return it->second; - } -} - -uint32_t Glob_Maps::get_collop(string collop_name) -{ - GlobalMaps::iterator it = collopmap.find(collop_name); - if(it == collopmap.end()) - { - return 0; - } - else - { - return it->second; - } -} - -bool check_value(char* value, const char* place) -{ - if(value == NULL) - { - cerr << "Error by reading the csv file." << endl; - cerr << "An expected value is missing, for the Token " << place << "." << endl; - return false; - } - else - { - return true; - } -} - -int parse_csv(Summary_Container& sum_container, const char* file, Glob_Maps& glob_maps) -{ - typedef map LocalMaps; - - LocalMaps localfuncmap; - LocalMaps localfuncgroupmap; - LocalMaps localcountermap; - LocalMaps localcollopmap; - - static uint32_t trace_nr = 0; - trace_nr++; - - fstream in; - in.open(file, ios::in); - if(!in) - { - cerr << "Error, can't find : " << file << endl; - return 1; - } - char buffer[255]; - char* value; - uint32_t check = 0; - while(!in.eof()) - { - in.getline(buffer,255); - value = strtok(buffer,";"); - if(value == NULL) - { - continue; - } - - if(0 == strcmp("ticks", value)) - { - value = strtok(NULL,";"); - if(!check_value(value, "ticks")) // read ticks - { - return 1; - } - sum_container.adddef_Ticks(trace_nr, (uint64_t) atol(value)); - } - if(0 == strcmp("progtime", value)) - { - value = strtok(NULL,";"); - if(!check_value(value, "progtime")) // read progtime - { - return 1; - } - sum_container.set_ProgTime(trace_nr, (uint64_t) atol(value)); - } - else if(0 == strcmp("proctime", value)) // read proctime - { - value = strtok(NULL,";"); - if(!check_value(value, "proctime")) - { - return 1; - } - uint32_t proc_id = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "proctime")) - { - return 1; - } - sum_container.addvalues_ProcTime(trace_nr, proc_id, (uint64_t) atol(value)); - } - else if(0 == strcmp("proc", value)) // read proc - { - value = strtok(NULL,";"); - if(!check_value(value, "proc")) - { - return 1; - } - Process_Def_Key p_def_key(trace_nr, (uint32_t) atoi(value)); - value = strtok(NULL,";"); - if(!check_value(value, "proc")) - { - return 1; - } - sum_container.adddef_Proc(p_def_key, strdup(value)); - } - else if(0 == strcmp("fg", value)) // read fg - { - value = strtok(NULL,";"); - if(!check_value(value, "fg")) - { - return 1; - } - uint32_t fg_id = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "fg")) - { - return 1; - } - const char* name_char = strdup(value); - string name; - name.assign(name_char); - localfuncgroupmap.insert(make_pair(fg_id, name)); - fg_id = glob_maps.set_funcgroup(name); - - FG_Def_Key fg_def_key(trace_nr, fg_id); - sum_container.adddef_FG(fg_def_key, name_char); - - } - else if(0 == strcmp("funcdef", value)) // read funcdef - { - value = strtok(NULL,";"); - if(!check_value(value, "funcdef")) - { - return 1; - } - uint32_t func_id = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "funcdef")) - { - return 1; - } - const char* name_char = strdup(value); - string name; - name.assign(name_char); - localfuncmap.insert(make_pair(func_id, name)); - func_id = glob_maps.set_func(name); - Function_Def_Key f_def_key(trace_nr, func_id); - - value = strtok(NULL,";"); - if(!check_value(value, "funcdef")) - { - return 1; - } - LocalMaps::iterator it = localfuncgroupmap.find((uint32_t) atoi(value)); - if(it == localfuncgroupmap.end()) - { - cerr << "Error by getting values for function. No function group name found" - << ", for the given identifier. A failure in the csv file could be" - << " the reason." << endl; - } - else - { - uint32_t fg_id = glob_maps.get_funcgroup(it->second); - if(fg_id == 0) - { - cerr << "No function group entry found." - << "This could be a failure in the csv file." << endl; - continue; - } - Function_Def f_def(name_char, fg_id); - sum_container.adddef_Function(f_def_key, f_def); - } - } - else if(0 == strcmp("func", value)) // read func - { - value = strtok(NULL,";"); - if(!check_value(value, "func")) - { - return 1; - } - uint32_t func_id = (uint32_t) atoi(value); - LocalMaps::iterator it = localfuncmap.find(func_id); - if(it == localfuncmap.end()) - { - cerr << "Error by getting values for function. No function name found" - << ", for the given identifier. A failure in the csv file could be" - << " the reason." << endl; - continue; - } - - func_id = glob_maps.get_func(it->second); - - value = strtok(NULL,";"); - if(!check_value(value, "func")) - { - return 1; - } - uint32_t proc_id = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "func")) - { - return 1; - } - uint64_t invoc = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "func")) - { - return 1; - } - uint64_t excl_time = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "func")) - { - return 1; - } - sum_container.addvalues_Function(trace_nr,func_id, proc_id, invoc, - excl_time, (uint64_t) atol(value)); - } - else if(0 == strcmp("counterdef", value)) // read counterdef - { - value = strtok(NULL,";"); - if(!check_value(value, "counterdef")) - { - return 1; - } - uint32_t counter_id = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "counterdef")) - { - return 1; - } - const char* name_char = strdup(value); - string name; - name.assign(name_char); - localcountermap.insert(pair(counter_id, name)); - counter_id = glob_maps.set_counter(name); - - Counter_Def_Key c_def_key(trace_nr, counter_id); - value = strtok(NULL,";"); - if(!check_value(value, "counterdef")) - { - return 1; - } - Counter_Def c_def(name_char, strdup(value)); - sum_container.adddef_Counter(c_def_key,c_def); - } - else if(0 == strcmp("counter", value)) // read counter - { - value = strtok(NULL,";"); - if(!check_value(value, "counter")) - { - return 1; - } - uint32_t func_id = (uint32_t) atoi(value); - LocalMaps::iterator it = localfuncmap.find(func_id); - if(it == localfuncmap.end()) - { - cerr << "Error by getting values for counter. No function name found" - << ", for the given identifier. A failure in the csv file could be" - << " the reason." << endl; - continue; - } - else - { - func_id = glob_maps.get_func(it->second); - } - value = strtok(NULL,";"); - if(!check_value(value, "counter")) - { - return 1; - } - uint32_t proc_id = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "counter")) - { - return 1; - } - uint32_t counter_id = (uint32_t) atoi(value); - LocalMaps::iterator it2 = localcountermap.find(counter_id); - if(it2 == localcountermap.end()) - { - cerr << "Error by getting values for counter. No counter name found" - << ", for the given identifier. A failure in the csv file could be" - << " the reason." << endl; - continue; - } - else - { - counter_id = glob_maps.get_counter(it2->second); - } - value = strtok(NULL,";"); - if(!check_value(value, "counter")) - { - return 1; - } - uint64_t excl_value = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "counter")) - { - return 1; - } - uint64_t incl_value = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "counter")) - { - return 1; - } - if(0 == strcmp("VALID", value)) - { - sum_container.addvalues_Counter(trace_nr, func_id, proc_id, counter_id, - excl_value, incl_value, VALID); - } - else - { - sum_container.addvalues_Counter(trace_nr, func_id, proc_id, counter_id, - excl_value, incl_value, INVALID); - } - } - else if(0 == strcmp("p2p", value)) // read p2p - { - value = strtok(NULL,";"); - if(!check_value(value, "p2p")) - { - return 1; - } - uint32_t sender = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "p2p")) - { - return 1; - } - uint32_t receiver = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "p2p")) - { - return 1; - } - uint32_t bin_1 = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "p2p")) - { - return 1; - } - uint32_t bin_2 = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "p2p")) - { - return 1; - } - uint64_t invoc = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "p2p")) - { - return 1; - } - uint64_t length = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "p2p")) - { - return 1; - } - sum_container.addvalues_P2P(trace_nr, sender, receiver, bin_1, bin_2, invoc, - length, (uint64_t) atol(value)); - } - else if(0 == strcmp("collopdef", value)) // read collopdef - { - value = strtok(NULL,";"); - if(!check_value(value, "collopdef")) - { - return 1; - } - uint32_t collop_id = (uint32_t) atoi(value); - - value = strtok(NULL,";"); - if(!check_value(value, "collopdef")) - { - return 1; - } - const char* name_char = strdup(value); - string name; - name.assign(name_char); - localcollopmap.insert(pair(collop_id, name)); - collop_id = glob_maps.set_collop(name); - - CollOp_Def_Key collop_def_key(trace_nr, collop_id); - value = strtok(NULL,";"); - if(!check_value(value, "collopdef")) - { - return 1; - } - uint32_t type; - if(0 == strcmp("Barrier", value)) - { - type = OTF_COLLECTIVE_TYPE_BARRIER; - } - else if(0 == strcmp("ONE2ALL", value)) - { - type = OTF_COLLECTIVE_TYPE_ONE2ALL; - } - else if(0 == strcmp("ALL2ONE", value)) - { - type = OTF_COLLECTIVE_TYPE_ALL2ONE; - } - else if(0 == strcmp("ALL2ALL", value)) - { - type = OTF_COLLECTIVE_TYPE_ALL2ALL; - } - else - { - type = OTF_COLLECTIVE_TYPE_UNKNOWN; - } - CollOp_Def collop_def(name_char, type); - sum_container.adddef_CollOp(collop_def_key, collop_def); - } - else if(0 == strcmp("collop", value)) // read collop - { - value = strtok(NULL,";"); - if(!check_value(value, "collop")) - { - return 1; - } - uint32_t proc = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "collop")) - { - return 1; - } - uint32_t collop_id = (uint32_t) atoi(value); - LocalMaps::iterator it = localcollopmap.find(collop_id); - if(it == localcollopmap.end()) - { - cerr << "Error by getting values for Collective Operations." - << "No Collective Operation name found" - << ", for the given identifier. A failure in the csv file could be" - << " the reason." << endl; - continue; - } - - collop_id = glob_maps.get_collop(it->second); - - value = strtok(NULL,";"); - if(!check_value(value, "collop")) - { - return 1; - } - uint64_t invoc_s = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "collop")) - { - return 1; - } - uint64_t invoc_r = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "collop")) - { - return 1; - } - uint64_t sent = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "collop")) - { - return 1; - } - uint64_t received = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "collop")) - { - return 1; - } - sum_container.addvalues_CollOp(trace_nr, proc, collop_id, invoc_s, invoc_r, sent, - received, (uint64_t) atol(value)); - } - else if(0 == strcmp("bin1", value)) // read bin1 - { - value = strtok(NULL,";"); - if(!check_value(value, "bin1")) - { - return 1; - } - uint32_t bin = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "bin1")) - { - return 1; - } - uint64_t min = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "bin1")) - { - return 1; - } - sum_container.setdef_Bin1(trace_nr, bin, min, (uint64_t) atol(value)); - } - else if(0 == strcmp("bin2", value)) // read bin2 - { - value = strtok(NULL,";"); - if(!check_value(value, "bin2")) - { - return 1; - } - uint32_t bin = (uint32_t) atoi(value); - value = strtok(NULL,";"); - if(!check_value(value, "bin2")) - { - return 1; - } - uint64_t min = (uint64_t) atol(value); - value = strtok(NULL,";"); - if(!check_value(value, "bin2")) - { - return 1; - } - sum_container.setdef_Bin2(trace_nr, bin, min, (uint64_t) atol(value)); - } - else - { - if(check < 20) - { - ++check; - } - else - { - cerr << "Error in csv_parse().Too much unknown token in csv file : " - << file << endl; - return 1; - } - } - } - in.close(); - sum_container.adddef_Trace(trace_nr, file); - glob_maps.set_trace_count(trace_nr); - - return 0; -} - -int Glob_Maps::special_synchronize(Summary_Container& sum_container) -{ - GlobalMaps::iterator it; - it = funcmap.begin(); - uint32_t differences[4] = {0,0,0,0}; // 1 := func_def ; 2:= fg_Def ; ... - while(it != funcmap.end()) - { - for(uint32_t i = 1; i <= trace_count; i++) - { - Function_Def_Key f_def_key(i, it->second); - if(!sum_container.find_Function(f_def_key)) - { - cerr << "Function " << it->first << " couldn\'t found in trace " - << sum_container.get_Trace_name(i) << endl; - for(uint32_t u = 1; u <= trace_count; u++) - { - Function_Def_Key f2_def_key(u, it->second); - if(sum_container.find_Function(f2_def_key)) - { - Function_Def f_def = sum_container.get_Function_Def(u, it->second); - sum_container.adddef_Function(f_def_key, f_def); - differences[0] += 1; - cerr << "Problem fixed" << endl; - break; - } - } - } - } - ++it; - } - it = funcgroupmap.begin(); - while(it != funcgroupmap.end()) - { - for(uint32_t i = 1; i <= trace_count; i++) - { - FG_Def_Key fg_def_key(i, it->second); - if(!sum_container.find_FG(fg_def_key)) - { - cerr << "Function Group " << it->first << " couldn\'t found in trace " - << sum_container.get_Trace_name(i) << endl; - sum_container.adddef_FG(fg_def_key, strdup(it->first.c_str())); - differences[1] += 1; - cerr << "Problem fixed" << endl; - } - } - ++it; - } - it = countermap.begin(); - while(it != countermap.end()) - { - for(uint32_t i = 1; i <= trace_count; i++) - { - Counter_Def_Key c_def_key(i, it->second); - if(!sum_container.find_Counter(c_def_key)) - { - cerr << "Counter " << it->first << " couldn\'t found in trace " - << sum_container.get_Trace_name(i) << endl; - for(uint32_t u = 1; u <= trace_count; u++) - { - Counter_Def_Key c2_def_key(u, it->second); - if(sum_container.find_Counter(c2_def_key)) - { - Counter_Def c_def = sum_container.get_Counter_Def(u, it->second); - sum_container.adddef_Counter(c_def_key, c_def); - differences[2] += 1; - cerr << "Problem fixed" << endl; - break; - } - } - } - } - ++it; - } - - it = collopmap.begin(); - while(it != collopmap.end()) - { - for(uint32_t i = 1; i <= trace_count; i++) - { - CollOp_Def_Key co_def_key(i, it->second); - if(!sum_container.find_CollOp(co_def_key)) - { - cerr << "Collective Operation " << it->first << " couldn\'t found in trace " - << sum_container.get_Trace_name(i) << endl; - for(uint32_t u = 1; u <= trace_count; u++) - { - CollOp_Def_Key co2_def_key(u, it->second); - if(sum_container.find_CollOp(co2_def_key)) - { - CollOp_Def co_def = sum_container.get_CollOp_Def(u, it->second); - sum_container.adddef_CollOp(co_def_key, co_def); - differences[3] += 1; - cerr << "Problem fixed" << endl; - break; - } - } - } - } - ++it; - } - - cout << endl; - cout << "Differences in Func_Def : " << differences[0] << endl; - cout << "Differences in Func_Group_Def : " << differences[1] << endl; - cout << "Differences in Counter_Def : " << differences[2] << endl; - cout << "Differences in CollOp_Def : " << differences[3] << endl; - - return 0; -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/CSVParse.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/CSVParse.h deleted file mode 100644 index f1ed0dd8e7..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/CSVParse.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#ifndef CSVPARSER_H -#define CSVPARSER_H - -#include -#include -#include -#include - -#include "OTF_inttypes.h" - -#include "DataStructure.h" - - -using namespace std; - -// to synchronize the data of the different csv files - -class Glob_Maps -{ - typedef map GlobalMaps; - - public: - Glob_Maps(); - int special_synchronize(Summary_Container& sum_container); - void set_trace_count(uint32_t t_count); - uint32_t get_trace_count(); - // the return value is the identifier - uint32_t set_func(string func_name); - uint32_t set_funcgroup(string fg_name); - uint32_t set_counter(string counter_name); - uint32_t set_collop(string collop_name); - - uint32_t get_func(string func_name); - uint32_t get_funcgroup(string fg_name); - uint32_t get_counter(string counter_name); - uint32_t get_collop(string collop_name); - - private: - uint32_t trace_count; - uint32_t func_id; - uint32_t funcgroup_id; - uint32_t counter_id; - uint32_t collop_id; - GlobalMaps funcmap; - GlobalMaps funcgroupmap; - GlobalMaps countermap; - GlobalMaps collopmap; -}; - -inline Glob_Maps::Glob_Maps() -{ - trace_count = 0; - func_id = 1; - funcgroup_id = 1; - counter_id = 1; - collop_id = 1; -} - -int parse_csv(Summary_Container& sum_container, const char* file, Glob_Maps& glob_maps); - -#endif /* CSVPARSER_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/DataStructure.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/DataStructure.cpp deleted file mode 100644 index 4eec0576b4..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/DataStructure.cpp +++ /dev/null @@ -1,1694 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#include "DataStructure.h" - -/************************************** operator< **************************************/ - -bool operator<(const FG_Def_Key& c1, const FG_Def_Key& c2 ) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -bool operator<(const Process_Def_Key& c1, const Process_Def_Key& c2) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -bool operator<(const Function_Def_Key& c1, const Function_Def_Key& c2) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -bool operator<(const Counter_Def_Key& c1, const Counter_Def_Key& c2) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -bool operator<(const CollOp_Def_Key& c1, const CollOp_Def_Key& c2) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -bool operator<(const Bin_1_Key& c1, const Bin_1_Key& c2) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -bool operator<(const Bin_2_Key& c1, const Bin_2_Key& c2) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -bool operator<(const Function_Key& f_key1,const Function_Key& f_key2) -{ - if(f_key1.trace != f_key2.trace) - return (f_key1.trace < f_key2.trace); - else if(f_key1.func != f_key2.func) - return (f_key1.func < f_key2.func); - else if(f_key1.proc != f_key2.proc) - return (f_key1.proc < f_key2.proc); - else - return false; -} - -bool operator<(const Counter_Key& c_key1, const Counter_Key& c_key2) -{ - if(c_key1.trace != c_key2.trace) - return (c_key1.trace < c_key2.trace); - else if(c_key1.func != c_key2.func) - return (c_key1.func < c_key2.func); - else if(c_key1.proc != c_key2.proc) - return (c_key1.proc < c_key2.proc); - else if(c_key1.counter != c_key2.counter) - return (c_key1.counter < c_key2.counter); - else - return false; -} - -bool operator<(const P2P_Key& p2p_key1, const P2P_Key& p2p_key2) -{ - if(p2p_key1.trace != p2p_key2.trace) - return (p2p_key1.trace < p2p_key2.trace); - else if(p2p_key1.sender != p2p_key2.sender) - return (p2p_key1.sender < p2p_key2.sender); - else if(p2p_key1.receiver != p2p_key2.receiver) - return (p2p_key1.receiver < p2p_key2.receiver); - else if(p2p_key1.bin_1 != p2p_key2.bin_1) - return (p2p_key1.bin_1 < p2p_key2.bin_1); - else if(p2p_key1.bin_2 != p2p_key2.bin_2) - return (p2p_key1.bin_2 < p2p_key2.bin_2); - else - return false; -} - -bool operator<(const CollOp_Key& coll_key1, const CollOp_Key& coll_key2) -{ - if(coll_key1.trace != coll_key2.trace) - return (coll_key1.trace < coll_key2.trace); - else if(coll_key1.proc != coll_key2.proc) - return (coll_key1.proc < coll_key2.proc); - else if(coll_key1.collop != coll_key2.collop) - return (coll_key1.collop < coll_key2.collop); - else - return false; -} - - -/************************************** operator== **************************************/ - - -bool operator==(const FG_Def_Key& c1, const FG_Def_Key& c2 ) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} - -bool operator==(const Process_Def_Key& c1, const Process_Def_Key& c2) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} - -bool operator==(const Function_Def_Key& c1, const Function_Def_Key& c2) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} - -bool operator==(const Counter_Def_Key& c1, const Counter_Def_Key& c2) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} - -bool operator==(const CollOp_Def_Key& c1, const CollOp_Def_Key& c2) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} - -bool operator==(const Bin_1_Key& c1, const Bin_1_Key& c2) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} - -bool operator==(const Bin_2_Key& c1, const Bin_2_Key& c2) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} - - - -bool operator==(const Function_Key& f_key1,const Function_Key& f_key2) -{ - if(((0 == f_key1.trace) || (0 == f_key2.trace) || (f_key1.trace == f_key2.trace)) && - ((0 == f_key1.func) || (0 == f_key2.func) || (f_key1.func == f_key2.func)) && - ((0 == f_key1.proc) || (0 == f_key2.proc) || (f_key1.proc == f_key2.proc))) - return true; - else - return false; -} - -bool operator==(const Counter_Key& c_key1,const Counter_Key& c_key2) -{ - if(((0 == c_key1.trace) || (0 == c_key2.trace) || (c_key1.trace == c_key2.trace)) && - ((0 == c_key1.func) || (0 == c_key2.func) || (c_key1.func == c_key2.func)) && - ((0 == c_key1.proc) || (0 == c_key2.proc) || (c_key1.proc == c_key2.proc)) && - ((0 == c_key1.counter) || (0 == c_key2.counter) || (c_key1.counter == c_key2.counter))) - return true; - else - return false; -} - -bool operator==(const P2P_Key& p2p_key1,const P2P_Key& p2p_key2) -{ - if(((0 == p2p_key1.trace) || (0 == p2p_key2.trace) || (p2p_key1.trace == p2p_key2.trace)) && - ((0 == p2p_key1.sender) || (0 == p2p_key2.sender) || (p2p_key1.sender == p2p_key2.sender)) && - ((0 == p2p_key1.receiver) || (0 == p2p_key2.receiver) || (p2p_key1.receiver == p2p_key2.receiver)) && - ((0 == p2p_key1.bin_1) || (0 == p2p_key2.bin_1) || (p2p_key1.bin_1 == p2p_key2.bin_1)) && - ((0 == p2p_key1.bin_2) || (0 == p2p_key2.bin_2) || (p2p_key1.bin_2 == p2p_key2.bin_2))) - return true; - else - return false; -} - -bool operator==(const CollOp_Key& co_key1,const CollOp_Key& co_key2) -{ - if(((0 == co_key1.trace) || (0 == co_key2.trace) || (co_key1.trace == co_key2.trace)) && - ((0 == co_key1.proc) || (0 == co_key2.proc) || (co_key1.proc == co_key2.proc)) && - ((0 == co_key1.collop) || (0 == co_key2.collop) || (co_key1.collop == co_key2.collop))) - return true; - else - return false; -} - - -/************************************** operator+= **************************************/ - -Function_Value& Function_Value::operator+=(const Function_Value& f_value) -{ - invoc += f_value.invoc; - excl_time += f_value.excl_time; - incl_time += f_value.incl_time; - - return *this; -} - -Counter_Value& Counter_Value::operator+=(const Counter_Value& c_value) -{ - if((valid == INVALID) || (c_value.valid == INVALID)) - valid = INVALID; - excl_value += c_value.excl_value; - incl_value += c_value.incl_value; - - return *this; -} - -P2P_Value& P2P_Value::operator+=(const P2P_Value& p2p_value) -{ - invoc += p2p_value.invoc; - length += p2p_value.length; - time += p2p_value.time; - - return *this; -} - -CollOp_Value& CollOp_Value::operator+=(const CollOp_Value& coll_value) -{ - invoc_send += coll_value.invoc_send; - invoc_receive += coll_value.invoc_receive; - length_send += coll_value.length_send; - length_receive += coll_value.length_receive; - time += coll_value.time; - - return *this; -} - -/************************************** operator= **************************************/ - -Function_Value& Function_Value::operator=(const Function_Value& f_value) -{ - invoc = f_value.invoc; - excl_time = f_value.excl_time; - incl_time = f_value.incl_time; - - return *this; -} - -Counter_Value& Counter_Value::operator=(const Counter_Value& c_value) -{ - valid = c_value.valid; - excl_value = c_value.excl_value; - incl_value = c_value.incl_value; - - return *this; -} - -P2P_Value& P2P_Value::operator=(const P2P_Value& p2p_value) -{ - invoc = p2p_value.invoc; - length = p2p_value.length; - time = p2p_value.time; - - return *this; -} - -CollOp_Value& CollOp_Value::operator=(const CollOp_Value& coll_value) -{ - invoc_send = coll_value.invoc_send; - invoc_receive = coll_value.invoc_receive; - length_send = coll_value.length_send; - length_receive = coll_value.length_receive; - time = coll_value.time; - - return *this; -} - -/****************************** Summary_Container methodes ******************************/ - -int Summary_Container::adddef_Function(Function_Def_Key f_def_key, Function_Def f_def) -{ - func_def_map.insert(make_pair(f_def_key, f_def)); - return 0; -} - -int Summary_Container::adddef_Counter(Counter_Def_Key c_def_key, Counter_Def c_def) -{ - counter_def_map.insert(make_pair(c_def_key, c_def)); - return 0; -} - -int Summary_Container::adddef_FG(FG_Def_Key fg_def_key, const char* name) -{ - if(name == NULL) - fg_def_map.insert(make_pair(fg_def_key, - (const char*)"functiongroup")); - else - fg_def_map.insert(make_pair(fg_def_key, name)); - - return 0; -} - -int Summary_Container::adddef_Proc(Process_Def_Key p_def_key, const char* name) -{ - if(name == NULL) - proc_def_map.insert(make_pair(p_def_key, - (const char*)"process")); - else - proc_def_map.insert(make_pair(p_def_key, name)); - - return 0; -} - -int Summary_Container::adddef_CollOp(CollOp_Def_Key coll_def_key, CollOp_Def coll_def) -{ - collop_def_map.insert(make_pair(coll_def_key, coll_def)); - return 0; -} - -int Summary_Container::adddef_Ticks(uint32_t trace, uint64_t ticks) -{ - ticks_def_map.insert(make_pair(trace, ticks)); - return 0; -} - -int Summary_Container::adddef_Trace(uint32_t trace, const char* name) -{ - if(name == NULL) - trace_map.insert(make_pair(trace, - (const char*)"trace")); - else - trace_map.insert(make_pair(trace, name)); - - return 0; -} - -int Summary_Container::adddef_Bin(uint32_t trace) -{ - uint32_t i; - - /* Bin_1 */ - - uint32_t N = 24; - uint64_t border = 1; - uint64_t border_temp = 0; - for(i = 1; i < N; i++) - { - Bin_1_Key b_1_key(trace, i); - Bin_1_Value b_1_value(border_temp, border); - bin_1_map.insert(make_pair(b_1_key, b_1_value)); - border_temp = border + 1; - border <<= 1; - } - Bin_1_Key b_1_key(trace, N); - Bin_1_Value b_1_value(border_temp, border_temp); - // min_value == max_value -> for all values bigger than min_value - bin_1_map.insert(make_pair(b_1_key, b_1_value)); - - /* Bin_2 */ - - N = 16; - border = 4; - border_temp = 0; - for(i = 1; i < N; i++) - { - Bin_2_Key b_2_key(trace, i); - Bin_2_Value b_2_value(border_temp, border); - bin_2_map.insert(make_pair(b_2_key, b_2_value)); - border_temp = border + 1; - border <<= 2; - } - Bin_2_Key b_2_key(trace, N); - Bin_2_Value b_2_value(border_temp, border_temp); - // min_value == max_value -> for all values bigger than min_value - bin_2_map.insert(make_pair(b_2_key, b_2_value)); - - return 0; -} - -int Summary_Container::setdef_Bin1(uint32_t trace, uint32_t bin, uint64_t min, uint64_t max) -{ - if((bin <= 0) || (trace <= 0)) - { - return 1; - } - Bin_1_Key b_1_key(trace, bin); - Bin_1_Value b_1_value(min, max); - bin_1_map.insert(make_pair(b_1_key, b_1_value)); - return 0; -} - -int Summary_Container::setdef_Bin2(uint32_t trace, uint32_t bin, uint64_t min, uint64_t max) -{ - if((bin <= 0) || (trace <= 0)) - { - return 1; - } - Bin_2_Key b_2_key(trace, bin); - Bin_2_Value b_2_value(min, max); - bin_2_map.insert(make_pair(b_2_key, b_2_value)); - return 0; -} - -int Summary_Container::set_ProgTime(uint32_t trace, uint64_t time) -{ - progtime_map[trace] = time; - return 0; -} - -int Summary_Container::addvalues_Function(uint32_t trace, uint32_t func, uint32_t proc, - uint64_t invoc, uint64_t excl_time, - uint64_t incl_time) -{ - if((trace == 0) || (func == 0) || (proc == 0)) - { - cerr << "\nError in addvalues_Function, one or more key parameter were 0." << endl; - return 1; - } - Function_Key f_key(trace, func, proc); - Function_Value f_value(invoc, excl_time, incl_time); - function_map[f_key] += f_value; - - return 0; -} - -int Summary_Container::resetvalues_Function(uint32_t trace, uint32_t func, uint32_t proc, - uint64_t invoc, uint64_t excl_time, - uint64_t incl_time) -{ - if((trace == 0) || (func == 0) || (proc == 0)) - { - cerr << "\nError in resetvalues_Function, one or more key parameter were 0." << endl; - return 1; - } - Function_Key f_key(trace, func, proc); - Function_Value f_value(invoc, excl_time, incl_time); - function_map[f_key] = f_value; - return 0; -} - - -int Summary_Container::addvalues_Counter(uint32_t trace, uint32_t func, uint32_t proc, - uint32_t counter, uint64_t excl_value, - uint64_t incl_value, bool valid) -{ - if((trace == 0) || (func == 0) || (proc == 0) || (counter == 0)) - { - cerr << "\nError in addvalues_Counter, one or more key parameter were 0." << endl; - return 1; - } - Counter_Key c_key(trace, func, proc, counter); - Counter_Value c_value(valid, excl_value, incl_value); - counter_map[c_key] += c_value; - return 0; -} - -int Summary_Container::addvalues_P2P(uint32_t trace, uint32_t sender, uint32_t receiver, - uint32_t bin_1, uint32_t bin_2, uint64_t invoc, - uint64_t length, uint64_t time) -{ - if((trace == 0) || (sender == 0) || (receiver == 0) || (bin_1 == 0) || (bin_2 == 0)) - { - cerr << "\nError in addvalues_P2P, one or more key parameter were 0." << endl; - return 1; - } - - P2P_Value p2p_value(invoc, length, time); - - p2p_map[trace][sender][receiver][bin_1][bin_2] += p2p_value; - - return 0; -} - -int Summary_Container::resetvalues_P2P(uint32_t trace, uint32_t sender, uint32_t receiver, - uint32_t bin_1, uint32_t bin_2, uint64_t invoc, - uint64_t length, uint64_t time) -{ - if((trace == 0) || (sender == 0) || (receiver == 0) || (bin_1 == 0) || (bin_2 == 0)) - { - cerr << "\nError in resetvalues_P2P, one or more key parameter were 0." << endl; - return 1; - } - P2P_Key p2p_key(trace, sender, receiver, bin_1, bin_2); - P2P_Value p2p_value(invoc, length, time); - - map::iterator iter; - iter = p2p_time_map.find(p2p_key); - if(iter == p2p_time_map.end()) p2p_time_map[p2p_key] = 0; - - if(p2p_time_map[p2p_key] == time) { - p2p_map[trace][sender][receiver][bin_1][bin_2] += p2p_value; - } else { - p2p_map[trace][sender][receiver][bin_1][bin_2] = p2p_value; - p2p_time_map[p2p_key] = time; - } - return 0; -} - -int Summary_Container::addvalues_CollOp(uint32_t trace, uint32_t proc, uint32_t collop, - uint64_t invoc_send, uint64_t invoc_receive, uint64_t length_send, - uint64_t length_receive, uint64_t time) -{ - if((trace == 0) || (proc == 0) || (collop == 0)) - { - cerr << "\nError in addvalues_CollOp, one or more key parameter were 0." << endl; - return 1; - } - CollOp_Key collop_key(trace, proc, collop); - CollOp_Value collop_value(invoc_send, invoc_receive, length_send, length_receive, time); - collop_map[collop_key] += collop_value; - return 0; -} - -int Summary_Container::resetvalues_CollOp(uint32_t trace, uint32_t proc, uint32_t collop, - uint64_t invoc_send, uint64_t invoc_receive, uint64_t length_send, uint64_t length_receive, uint64_t time) -{ - if((trace == 0) || (proc == 0) || (collop == 0)) - { - cerr << "\nError in resetvalues_CollOp, one or more key parameter were 0." << endl; - return 1; - } - CollOp_Key collop_key(trace, proc, collop); - CollOp_Value collop_value(invoc_send, invoc_receive, length_send, length_receive, time); - collop_map[collop_key] = collop_value; - return 0; -} - -int Summary_Container::addvalues_ProcTime(uint32_t trace, uint32_t proc, uint64_t time) -{ - if((trace == 0) || (proc == 0)) - { - cerr << "\nError in addvalues_ProcTime, one or more key parameter were 0." << endl; - return 1; - } - Process_Def_Key proc_def_key(trace, proc); - proctime_map[proc_def_key] = time; - return 0; -} - -int Summary_Container::get_Function_Def_Key(uint32_t trace, vector& f_vector) -{ - FuncDefMap::iterator it = func_def_map.begin(); - while(it != func_def_map.end()) - { - if((((Function_Def_Key) it->first).get_trace() == trace) || (trace == 0)) - { - f_vector.push_back(((Function_Def_Key) it->first).get_ident()); - } - ++it; - } - return 0; -} - -int Summary_Container::get_Counter_Def_Key(uint32_t trace, vector& c_vector) -{ - CounterDefMap::iterator it = counter_def_map.begin(); - while(it != counter_def_map.end()) - { - if((((Counter_Def_Key) it->first).get_trace() == trace) || (trace == 0)) - { - c_vector.push_back(((Counter_Def_Key) it->first).get_ident()); - } - ++it; - } - return 0; -} - -int Summary_Container::get_Process_Def_Key(uint32_t trace, vector& p_vector) -{ - ProcDefMap::iterator it = proc_def_map.begin(); - while(it != proc_def_map.end()) - { - if((((Process_Def_Key) it->first).get_trace() == trace) || (trace == 0)) - { - p_vector.push_back(((Process_Def_Key) it->first).get_ident()); - } - ++it; - } - return 0; -} - -int Summary_Container::get_FG_Def_Key(uint32_t trace, vector& fg_vector) -{ - FGDefMap::iterator it = fg_def_map.begin(); - while(it != fg_def_map.end()) - { - if((((FG_Def_Key) it->first).get_trace() == trace) || (trace == 0)) - { - fg_vector.push_back(((FG_Def_Key) it->first).get_ident()); - } - ++it; - } - return 0; -} - -int Summary_Container::get_CollOp_Def_Key(uint32_t trace, vector& collop_vector) -{ - CollOpDefMap::iterator it = collop_def_map.begin(); - while(it != collop_def_map.end()) - { - if((((CollOp_Def_Key) it->first).get_trace() == trace) || (trace == 0)) - { - collop_vector.push_back(((CollOp_Def_Key) it->first).get_ident()); - } - ++it; - } - return 0; -} - -int Summary_Container::get_Bin1_Def_Key(uint32_t trace, vector& bin1_vector) -{ - Bin1Map::iterator it = bin_1_map.begin(); - while(it != bin_1_map.end()) - { - if((((Bin_1_Key) it->first).get_trace() == trace) || (trace == 0)) - { - bin1_vector.push_back(((Bin_1_Key) it->first).get_ident()); - } - ++it; - } - return 0; -} - -int Summary_Container::get_Bin2_Def_Key(uint32_t trace, vector& bin2_vector) -{ - Bin2Map::iterator it = bin_2_map.begin(); - while(it != bin_2_map.end()) - { - if((((Bin_2_Key) it->first).get_trace() == trace) || (trace == 0)) - { - bin2_vector.push_back(((Bin_2_Key) it->first).get_ident()); - } - ++it; - } - return 0; -} - -int Summary_Container::get_Trace(vector& trace_vector) -{ - TraceMap::iterator it = trace_map.begin(); - while(it != trace_map.end()) - { - trace_vector.push_back(it->first); - ++it; - } - return 0; -} - -Function_Def Summary_Container::get_Function_Def(uint32_t trace, uint32_t func) -{ - Function_Def_Key f_def_key(trace, func); - FuncDefMap::iterator it = func_def_map.find(f_def_key); - if(it == func_def_map.end()) - { - Function_Def f_def(NULL, 0); //No entry in map - return f_def; - } - else - return it->second; -} - -Counter_Def Summary_Container::get_Counter_Def(uint32_t trace, uint32_t counter) -{ - Counter_Def_Key c_def_key(trace, counter); - CounterDefMap::iterator it = counter_def_map.find(c_def_key); - if(it == counter_def_map.end()) - { - Counter_Def c_def(NULL, NULL); //No entry in map - return c_def; - } - else - return it->second; -} - -const char* Summary_Container::get_Process_Def(uint32_t trace, uint32_t proc) -{ - Process_Def_Key p_def_key(trace, proc); - ProcDefMap::iterator it = proc_def_map.find(p_def_key); - if(it == proc_def_map.end()) - return NULL; - else - return it->second; -} - -const char* Summary_Container::get_FG_Def(uint32_t trace, uint32_t fg) -{ - FG_Def_Key fg_def_key(trace, fg); - FGDefMap::iterator it = fg_def_map.find(fg_def_key); - if(it == fg_def_map.end()) - return NULL; - else - return it->second; -} - -const char* Summary_Container::get_Trace_name(uint32_t trace) -{ - TraceMap::iterator it = trace_map.find(trace); - if(it == trace_map.end()) - return NULL; - else - return it->second; -} - -CollOp_Def Summary_Container::get_CollOp_Def(uint32_t trace, uint32_t collop) -{ - CollOp_Def_Key collop_def_key(trace, collop); - CollOpDefMap::iterator it = collop_def_map.find(collop_def_key); - if(it == collop_def_map.end()) - { - CollOp_Def collop_def(NULL, 0); //No entry in map - return collop_def; - } - else - return it->second; -} - -uint32_t Summary_Container::get_CollOpType_Def(uint32_t trace, uint32_t collop) -{ - CollOp_Def_Key collop_def_key(trace, collop); - CollOpDefMap::iterator it = collop_def_map.find(collop_def_key); - if(it == collop_def_map.end()) - return 0; - else - return it->second.get_type(); -} - -Function_Value Summary_Container::get_Function(uint32_t trace, uint32_t func, uint32_t proc) -{ - Function_Key f_key(trace, func, proc); - Function_Value f_value; - FunctionMap::iterator it; - - if(trace == 0 || func == 0 || proc == 0) - { - it = function_map.begin(); - while(it != function_map.end()) - { - if(it->first == f_key) - f_value += it->second; - ++it; - } - return f_value; - } - else - { - it = function_map.find(f_key); - if(it == function_map.end()) - return f_value; - else - return f_value += it->second; - } -} - -Counter_Value Summary_Container::get_Counter(uint32_t trace, uint32_t func, uint32_t proc, - uint32_t counter) -{ - Counter_Key c_key(trace, func, proc, counter); - Counter_Value c_value; - CounterMap::iterator it; - - if(trace == 0 || func == 0 || proc == 0 || counter == 0) - { - it = counter_map.begin(); - while(it != counter_map.end()) - { - if(it->first == c_key) - c_value += it->second; - ++it; - } - return c_value; - } - else - { - it = counter_map.find(c_key); - if(it == counter_map.end()) - return c_value; - else - return c_value += it->second; - } -} - -P2P_Value Summary_Container::get_P2P(uint32_t trace, uint32_t sender, uint32_t receiver, - uint32_t bin_1, uint32_t bin_2) -{ - P2P_Value p2p_value; - map > > > >::iterator trace_it; - map > > >::iterator send_it; - map > >::iterator recv_it; - map >::iterator bin1_it; - map::iterator bin2_it; - - if( trace > 0 && bin_1 == 0 && bin_2 == 0) { - if( sender > 0 && receiver > 0) { - - bin1_it = p2p_map[trace][sender][receiver].begin(); - while(bin1_it != p2p_map[trace][sender][receiver].end() ) { - bin2_it = bin1_it->second.begin(); - while( bin2_it != bin1_it->second.end() ) { - p2p_value += bin2_it->second; - ++bin2_it; - } - ++bin1_it; - } - - return p2p_value; - - } else if( sender > 0) { - - recv_it = p2p_map[trace][sender].begin(); - while(recv_it != p2p_map[trace][sender].end() ) { - bin1_it = recv_it->second.begin(); - while(bin1_it != recv_it->second.end() ) { - bin2_it = bin1_it->second.begin(); - while( bin2_it != bin1_it->second.end() ) { - p2p_value += bin2_it->second; - ++bin2_it; - } - ++bin1_it; - } - ++recv_it; - } - - return p2p_value; - - } else if( receiver > 0) { - - send_it = p2p_map[trace].begin(); - while( send_it != p2p_map[trace].end() ) { - bin1_it = send_it->second[receiver].begin(); - while( bin1_it != send_it->second[receiver].end() ) { - bin2_it = bin1_it->second.begin(); - while( bin2_it != bin1_it->second.end() ) { - p2p_value += bin2_it->second; - ++bin2_it; - } - ++bin1_it; - } - ++send_it; - } - - return p2p_value; - - } - - } else if( trace > 0 && sender == 0 && receiver == 0 ) { - if( bin_1 > 0 && bin_2 > 0 ) { - - send_it = p2p_map[trace].begin(); - while(send_it != p2p_map[trace].end() ) { - recv_it = send_it->second.begin(); - while( recv_it != send_it->second.end() ) { - p2p_value += recv_it->second[bin_1][bin_2]; - ++recv_it; - } - ++send_it; - } - - return p2p_value; - - } else if( bin_1 > 0 ) { - - send_it = p2p_map[trace].begin(); - while(send_it != p2p_map[trace].end() ) { - recv_it = send_it->second.begin(); - while( recv_it != send_it->second.end() ) { - bin2_it = recv_it->second[bin_1].begin(); - while( bin2_it != recv_it->second[bin_1].end() ) { - p2p_value += bin2_it->second; - ++bin2_it; - } - ++recv_it; - } - ++send_it; - } - - return p2p_value; - - } - - } - - /* this case should never appear, however following part fits for each case (but it is slow) */ - /*cerr << "Unknown constellation while summarising P2P values." << endl;*/ - - for( trace_it = p2p_map.begin(); trace_it != p2p_map.end(); ++trace_it) { - for( send_it = trace_it->second.begin(); send_it != trace_it->second.end(); ++send_it ) { - for( recv_it = send_it->second.begin(); recv_it != send_it->second.end(); ++recv_it ) { - for( bin1_it = recv_it->second.begin(); bin1_it != recv_it->second.end(); ++bin1_it ) { - for( bin2_it = bin1_it->second.begin(); bin2_it != bin1_it->second.end(); ++bin2_it ) { - - } - } - } - } - } - - return p2p_value; -} - -CollOp_Value Summary_Container::get_CollOp(uint32_t trace, uint32_t proc, uint32_t collop) -{ - CollOp_Key collop_key(trace, proc, collop); - CollOp_Value collop_value; - CollOpMap::iterator it; - - if(trace == 0 || proc == 0 || collop == 0) - { - it = collop_map.begin(); - while(it != collop_map.end()) - { - if(it->first == collop_key) - collop_value += it->second; - ++it; - } - return collop_value; - } - else - { - it = collop_map.find(collop_key); - if(it == collop_map.end()) - return collop_value; - else - return collop_value += it->second; - } -} - -CollOp_Value Summary_Container::get_CollOpType(uint32_t trace, uint32_t proc, uint32_t type) -{ - if(type == 0) - { - cerr << "Sorry, type has to be greater then 0, because type has no wildcard option." - << endl; - } - - CollOp_Value collop_value; - CollOpMap::iterator it; - - if(trace == 0 || proc == 0) - { - it = collop_map.begin(); - while(it != collop_map.end()) - { - if(((it->first.trace == trace) || (trace == 0)) && - ((it->first.proc == proc) || (proc == 0)) && - (type == get_CollOpType_Def(it->first.trace,it->first.collop))) - { - collop_value += it->second; - } - ++it; - } - return collop_value; - } - else - { - vector collop_id; - get_CollOp_Def_Key(trace, collop_id); - vector::iterator it2 = collop_id.begin(); - while(it2 != collop_id.end()) - { - if(type == get_CollOpType_Def(trace, *it2)) - { - CollOp_Key collop_key(trace,proc,*it2); - it = collop_map.find(collop_key); - if(it != collop_map.end()) - { - collop_value += it->second; - } - } - ++it2; - } - return collop_value; - } -} - -uint64_t Summary_Container:: get_ProgTime(uint32_t trace) -{ - ProgTimeMap::iterator it = progtime_map.find(trace); - if(it == progtime_map.end()) - return 0; - else - return it->second; -} - -uint64_t Summary_Container::get_ProcTime(uint32_t trace, uint32_t proc) -{ - Process_Def_Key proc_def_key(trace, proc); - uint64_t time = 0; - ProcTimeMap::iterator it; - - if((trace == 0) || (proc == 0)) - { - it = proctime_map.begin(); - while(it != proctime_map.end()) - { - if(it->first == proc_def_key) - time += it->second; - ++it; - } - return time; - } - else - { - it = proctime_map.find(proc_def_key); - if(it == proctime_map.end()) - return time; - else - return time += it->second; - } -} -uint64_t Summary_Container::get_ticks(uint32_t trace) -{ - TicksDefMap::iterator it = ticks_def_map.find(trace); - if(it == ticks_def_map.end()) - return 0; - else - return it->second; -} - -uint32_t Summary_Container::get_bin_1(uint64_t length) -{ - uint32_t N = 24; - uint32_t i; - uint64_t border = 1; - for(i = 1; i < N; i++) - { - if(length <= border) - return i; - border <<= 1; - } - return N; // length is bigger than the last border -} - -uint32_t Summary_Container::get_bin_2(double speed) -{ - uint64_t sp = (uint64_t) speed; - uint32_t N = 16; - uint32_t i; - uint64_t border = 4; - for(i = 1; i < N; i++) - { - if(sp <= border) - return i; - border <<= 2; - } - return N; // speed is bigger than the last border -} - - -int Summary_Container::get_color_gray(double min, double max, double value, - float& red, float& green, float& blue) -{ - if((value == min) || (min == max)) - { - red = 0.9f; green = 0.9f; blue = 0.9f; - return 0; - } - if(value == max) - { - red = 1.0; green = 0.0; blue = 0.0; - return 0; - } - - double factor = (max - min) / 5.0; - uint32_t part = (uint32_t) (((value - min) * 5.0) / (max - min)); - double min_temp = min + (factor * part); - double max_temp = min + (factor * (part + 1.0)); - double part_temp; - - if(value == min_temp) - part_temp = 0.0; - else if(value == max_temp) - part_temp = 1.0; - else - part_temp = (value - min_temp) / (max_temp - min_temp); - if (part == 0) - part_temp = part_temp / 2; - switch(part) - { - case 0 : - red = (float) (0.9 - part_temp); - green = (float) (0.9 - part_temp); - blue = (float) (0.9 - part_temp); - break; - case 1 : - red = (float) (0.0); - green = (float) (part_temp); - blue = (float) (1.0); - break; - case 2 : - red = (float) (0.0); - green = (float) (1.0); - blue = (float) (1.0 - part_temp); - break; - case 3 : - red = (float) (part_temp); - green = (float) (1.0); - blue = (float) (0.0); - break; - case 4 : - red = (float) (1.0); - green = (float) (1.0 - part_temp); - blue = (float) (0.0); - break; - default : cerr << "Error in get_color(). Wrong part calculated." << endl; return 1; - } - - return 0; -} - -int Summary_Container::get_color(double min, double max, double value, - float& red, float& green, float& blue) -{ - if((value == min) || (min == max)) - { - red = 0.0; green = 0.0; blue = 1.0; - return 0; - } - if(value == max) - { - red = 1.0; green = 0.0; blue = 0.0; - return 0; - } - - double factor = (max - min) / 4.0; - uint32_t part = (uint32_t) (((value - min) * 4.0) / (max - min)); - double min_temp = min + (factor * part); - double max_temp = min + (factor * (part + 1.0)); - double part_temp; - - if(value == min_temp) - part_temp = 0.0; - else if(value == max_temp) - part_temp = 1.0; - else - part_temp = (value - min_temp) / (max_temp - min_temp); - - switch(part) - { - case 0 : red = 0.0f; green = (float) part_temp; blue = 1.0f; break; - case 1 : red = 0.0f; green = 1.0f; blue = (float) (1.0 - part_temp); break; - case 2: red = (float) part_temp; green = 1.0f; blue = 0.0f; break; - case 3 : red = 1.0f; green = (float) (1.0 - part_temp); blue = 0.0f; break; - default : cerr << "Error in get_color(). Wrong part calculated." << endl; return 1; - } - - return 0; -} - -int Summary_Container::get_gray(double min, double max, double value, - float& red, float& green, float& blue) -{ - if((value == min) || (min == max)) - { - red = 1.0; green = 1.0; blue = 1.0; - return 0; - } - if(value == max) - { - red = 0.0; green = 0.0; blue = 0.0; - return 0; - } - - double part = (value - min) / (max - min); - - red = (float) (1.0 - part); - green = (float) (1.0 - part); - blue = (float) (1.0 - part); - - return 0; -} - -int Summary_Container::get_color_gray(uint64_t min, uint64_t max, uint64_t value, - float& red, float& green, float& blue) -{ - return get_color((double) min, (double) max, (double) value, red, green, blue); -} - -int Summary_Container::get_color(uint64_t min, uint64_t max, uint64_t value, - float& red, float& green, float& blue) -{ - return get_color((double) min, (double) max, (double) value, red, green, blue); -} - -int Summary_Container::get_gray(uint64_t min, uint64_t max, uint64_t value, - float& red, float& green, float& blue) -{ - return get_color((double) min, (double) max, (double) value, red, green, blue); -} - -int Summary_Container::csv_Function(fstream& out, uint32_t trace) -{ - if(trace == 0) - { - cerr << "Error in csv_Function. Second parameter was 0" << endl; - return 1; - } - TicksDefMap::iterator it_t = ticks_def_map.find(trace); - FunctionMap::iterator it = function_map.begin(); - CounterDefMap::iterator it_c = counter_def_map.begin(); - CounterMap::iterator it_c2 = counter_map.begin(); - Counter_Value c_value; - - out << ";;;;;"; - while(it_c != counter_def_map.end()) - { - if(((Counter_Def_Key) it_c->first).get_trace() == trace) - { - out << ";" << it_c->second.get_name(); - out << ";" << it_c->second.get_name(); - } - ++it_c; - } - out << endl; - out << "Function;FunctionGroup;Process;Invocation;Exclusive Time;Inclusive Time"; - - it_c = counter_def_map.begin(); - while(it_c != counter_def_map.end()) - { - if(((Counter_Def_Key) it_c->first).get_trace() == trace) - { - out << ";Exclusive Value;Inclusive Value"; - } - ++it_c; - } - out << endl; - while(it != function_map.end()) - { - if(it->first.trace == trace) - { - out << get_Function_Def(trace, it->first.func).get_name() - << ";" << get_FG_Def(trace, get_Function_Def(trace, it->first.func).get_funcgroup_id()) - << ";" << get_Process_Def(trace, it->first.proc) - << ";" << it->second.invoc - << ";" << (double) it->second.excl_time / (double) it_t->second - << ";" << (double) it->second.incl_time / (double) it_t->second; - it_c = counter_def_map.begin(); - while(it_c != counter_def_map.end()) - { - Counter_Key c_key(trace,it->first.func,it->first.proc,((Counter_Def_Key) it_c->first).get_ident()); - it_c2 = counter_map.find(c_key); - if(it_c2 == counter_map.end()) - out << ";no value"; - else if(it_c2->second.valid == INVALID) - { - out << ";invalid value"; - } - else - { - out << ";" << it_c2->second.excl_value - << ";" << it_c2->second.incl_value; - } - ++it_c; - } - out << endl; - } - ++it; - } - out << endl; - - return 0; -} - -int Summary_Container::csv_P2P(fstream& out, uint32_t trace) -{ - if(trace == 0) - { - cerr << "Error in csv_P2P. Second parameter was 0" << endl; - return 1; - } - uint64_t temp = 1; - double duration; - TicksDefMap::iterator it_t = ticks_def_map.find(trace); - - map > > >::iterator send_iter; - map > >::iterator recv_iter; - map >::iterator bin1_iter; - map::iterator bin2_iter; - - out << "Process to;Process;Msg Length Field;Rate Field;Invocation;Msg Length;Duration;Rate" << endl; - - for( send_iter = p2p_map[trace].begin(); send_iter != p2p_map[trace].end(); ++send_iter ) { - for( recv_iter = send_iter->second.begin(); recv_iter != send_iter->second.end(); ++recv_iter ) { - for( bin1_iter = recv_iter->second.begin(); bin1_iter != recv_iter->second.end(); ++bin1_iter ) { - for( bin2_iter = bin1_iter->second.begin(); bin2_iter != bin1_iter->second.end(); ++bin2_iter ) { - - duration = (double) bin2_iter->second.time / (double) it_t->second; - out << get_Process_Def(trace,send_iter->first) - << ";" << get_Process_Def(trace,recv_iter->first) - << ";<" << (temp << bin1_iter->first) - << ";<" << (temp << (bin2_iter->first * 2)) - << ";" << bin2_iter->second.invoc - << ";" << bin2_iter->second.length - << ";" << duration - << ";" << (double) bin2_iter->second.length / duration << endl; - - } - } - } - } - - return 0; -} - -int Summary_Container::csv_CollOp(fstream& out, uint32_t trace) -{ - if(trace == 0) - { - cerr << "Error in csv_P2P. Second parameter was 0" << endl; - return 1; - } - double duration; - TicksDefMap::iterator it_t = ticks_def_map.find(trace); - CollOpMap::iterator it = collop_map.begin(); - out << "Process;Name;Type;Send Invocation;Send Msg Length;Receive Invocation" - << ";Receive Msg Length;Duration;Rate" << endl; - while(it != collop_map.end()) - { - if(it->first.trace == trace) - { - duration = (double) it->second.time / (double) it_t->second; - out << get_Process_Def(trace,it->first.proc) - << ";" << get_CollOp_Def(trace,it->first.collop).get_name(); - switch(get_CollOp_Def(trace,it->first.collop).get_type()) - { - case OTF_COLLECTIVE_TYPE_BARRIER : out << ";Barrier";break; - case OTF_COLLECTIVE_TYPE_ONE2ALL : out << ";ONE2ALL";break; - case OTF_COLLECTIVE_TYPE_ALL2ONE : out << ";ALL2ONE";break; - case OTF_COLLECTIVE_TYPE_ALL2ALL : out << ";ALL2ALL";break; - default : out << ";Unknown Type"; - } - out << ";" << it->second.invoc_send - << ";" << it->second.invoc_receive - << ";" << it->second.length_send - << ";" << it->second.length_receive - << ";" << duration - << ";" << (double) (it->second.length_send + it->second.length_receive) / duration - << endl; - } - ++it; - } - - return 0; -} - -int Summary_Container::csv_Data(fstream& out, uint32_t trace) -{ - if(trace == 0) - { - cerr << "Error in csv_P2P. Second parameter was 0" << endl; - return 1; - } - TicksDefMap::iterator it_t = ticks_def_map.find(trace); - ProgTimeMap::iterator it_pgt = progtime_map.find(trace); - ProcDefMap::iterator it_pd = proc_def_map.begin(); - FGDefMap::iterator it_fgd = fg_def_map.begin(); - FuncDefMap::iterator it_fd = func_def_map.begin(); - FunctionMap::iterator it_f = function_map.begin(); - CounterDefMap::iterator it_cd = counter_def_map.begin(); - CounterMap::iterator it_c = counter_map.begin(); - CollOpDefMap::iterator it_cod = collop_def_map.begin(); - CollOpMap::iterator it_co = collop_map.begin(); - Bin1Map::iterator it_b1 = bin_1_map.begin(); - Bin2Map::iterator it_b2 = bin_2_map.begin(); - ProcTimeMap::iterator it_pt = proctime_map.begin(); - - Counter_Value c_value; - - map > > >::iterator send_iter; - map > >::iterator recv_iter; - map >::iterator bin1_iter; - map::iterator bin2_iter; - - - if(it_t != ticks_def_map.end()) - out << "ticks;" << it_t->second << endl; - else - out << "ticks;1" << endl; - - if(it_pgt != progtime_map.end()) - out << "progtime;" << it_pgt->second << endl; - else - out << "progtime;0" << endl; - - while(it_pt != proctime_map.end()) - { - if(((Process_Def_Key) it_pt->first).get_trace() == trace) - { - out << "proctime" - << ";" << ((Process_Def_Key) it_pt->first).get_ident() - << ";" << it_pt->second - << endl; - } - ++it_pt; - } - while(it_pd != proc_def_map.end()) - { - if(((Process_Def_Key) it_pd->first).get_trace() == trace) - { - out << "proc" - << ";" << ((Process_Def_Key) it_pd->first).get_ident() - << ";" << it_pd->second - << endl; - } - ++it_pd; - } - while(it_fgd != fg_def_map.end()) - { - if(((FG_Def_Key) it_fgd->first).get_trace() == trace) - { - out << "fg" - << ";" << ((FG_Def_Key) it_fgd->first).get_ident() - << ";" << it_fgd->second - << endl; - } - ++it_fgd; - } - while(it_fd != func_def_map.end()) - { - if(((Function_Def_Key) it_fd->first).get_trace() == trace) - { - out << "funcdef" - << ";" << ((Function_Def_Key) it_fd->first).get_ident() - << ";" << it_fd->second.get_name() - << ";" << it_fd->second.get_funcgroup_id() - << endl; - } - ++it_fd; - } - while(it_f != function_map.end()) - { - if(it_f->first.trace == trace) - { - out << "func" - << ";" << it_f->first.func - << ";" << it_f->first.proc - << ";" << it_f->second.invoc - << ";" << it_f->second.excl_time - << ";" << it_f->second.incl_time - << endl; - } - ++it_f; - } - while(it_cd != counter_def_map.end()) - { - if(((Counter_Def_Key) it_cd->first).get_trace() == trace) - { - out << "counterdef" - << ";" << ((Counter_Def_Key) it_cd->first).get_ident() - << ";" << it_cd->second.get_name() - << ";" << it_cd->second.get_unit() - << endl; - } - ++it_cd; - } - while(it_c != counter_map.end()) - { - if(it_c->first.trace == trace) - { - out << "counter" - << ";" << it_c->first.func - << ";" << it_c->first.proc - << ";" << it_c->first.counter; - if(it_c->second.valid == VALID) - { - out << ";" << it_c->second.excl_value - << ";" << it_c->second.incl_value - << ";VALID"; - } - else - { - out << ";0;0;INVALID"; - } - out << endl; - } - ++it_c; - } - - for( send_iter = p2p_map[trace].begin(); send_iter != p2p_map[trace].end(); ++send_iter ) { - for( recv_iter = send_iter->second.begin(); recv_iter != send_iter->second.end(); ++recv_iter ) { - for( bin1_iter = recv_iter->second.begin(); bin1_iter != recv_iter->second.end(); ++bin1_iter ) { - for( bin2_iter = bin1_iter->second.begin(); bin2_iter != bin1_iter->second.end(); ++bin2_iter ) { - - out << "p2p" - << ";" << send_iter->first - << ";" << recv_iter->first - << ";" << bin1_iter->first - << ";" << bin2_iter->first - << ";" << bin2_iter->second.invoc - << ";" << bin2_iter->second.length - << ";" << bin2_iter->second.time - << endl; - - } - } - } - } - - while(it_cod != collop_def_map.end()) - { - if(((CollOp_Def_Key) it_cod->first).get_trace() == trace) - { - out << "collopdef" - << ";" << ((CollOp_Def_Key) it_cod->first).get_ident() - << ";" << it_cod->second.get_name(); - switch(it_cod->second.get_type()) - { - case OTF_COLLECTIVE_TYPE_BARRIER : out << ";Barrier";break; - case OTF_COLLECTIVE_TYPE_ONE2ALL : out << ";ONE2ALL";break; - case OTF_COLLECTIVE_TYPE_ALL2ONE : out << ";ALL2ONE";break; - case OTF_COLLECTIVE_TYPE_ALL2ALL : out << ";ALL2ALL";break; - default : out << ";UNKNOWN"; - } - out << endl; - } - ++it_cod; - } - while(it_co != collop_map.end()) - { - if(it_co->first.trace == trace) - { - out << "collop" - << ";" << it_co->first.proc - << ";" << it_co->first.collop - << ";" << it_co->second.invoc_send - << ";" << it_co->second.invoc_receive - << ";" << it_co->second.length_send - << ";" << it_co->second.length_receive - << ";" << it_co->second.time - << endl; - } - ++it_co; - } - while(it_b1 != bin_1_map.end()) - { - if(((Bin_1_Key) it_b1->first).get_trace() == trace) - { - out << "bin1" - << ";" << ((Bin_1_Key) it_b1->first).get_ident() - << ";" << it_b1->second.get_min_value() - << ";" << it_b1->second.get_max_value() - << endl; - } - ++it_b1; - } - while(it_b2 != bin_2_map.end()) - { - if(((Bin_2_Key) it_b2->first).get_trace() == trace) - { - out << "bin2" - << ";" << ((Bin_2_Key) it_b2->first).get_ident() - << ";" << it_b2->second.get_min_value() - << ";" << it_b2->second.get_max_value() - << endl; - } - ++it_b2; - } - return 0; -} - -bool Summary_Container::find_FG(FG_Def_Key fg_def_key) -{ - FGDefMap::iterator it = fg_def_map.find(fg_def_key); - if(it == fg_def_map.end()) - return false; - else - return true; -} - -bool Summary_Container::find_Function(Function_Def_Key f_def_key) -{ - FuncDefMap::iterator it = func_def_map.find(f_def_key); - if(it == func_def_map.end()) - return false; - else - return true; -} - -bool Summary_Container::find_Counter(Counter_Def_Key c_def_key) -{ - CounterDefMap::iterator it = counter_def_map.find(c_def_key); - if(it == counter_def_map.end()) - return false; - else - return true; -} - -bool Summary_Container::find_CollOp(CollOp_Def_Key coll_def_key) -{ - CollOpDefMap::iterator it = collop_def_map.find(coll_def_key); - if(it == collop_def_map.end()) - return false; - else - return true; -} - -int Summary_Container::mergeContainer(Summary_Container& container) { - FunctionMap fmap = container.function_map; - FunctionMap::iterator fit; - - for(fit = fmap.begin(); fit!=fmap.end(); fit++) { - this->function_map[fit->first] += fit->second; - } - - CounterMap cmap = container.counter_map; - CounterMap::iterator cit; - for(cit = cmap.begin(); cit!=cmap.end(); cit++) { - this->counter_map[cit->first] += cit->second; - } - - map > > > >::iterator trace_iter; - map > > >::iterator send_iter; - map > >::iterator recv_iter; - map >::iterator bin1_iter; - map::iterator bin2_iter; - - for( trace_iter = container.p2p_map.begin(); trace_iter != container.p2p_map.end(); ++trace_iter ) { - for( send_iter = trace_iter->second.begin(); send_iter != trace_iter->second.end(); ++send_iter ) { - for( recv_iter = send_iter->second.begin(); recv_iter != send_iter->second.end(); ++recv_iter ) { - for( bin1_iter = recv_iter->second.begin(); bin1_iter != recv_iter->second.end(); ++bin1_iter ) { - for( bin2_iter = bin1_iter->second.begin(); bin2_iter != bin1_iter->second.end(); ++bin2_iter ) { - - this->p2p_map[trace_iter->first][send_iter->first][recv_iter->first][bin1_iter->first][bin2_iter->first] += bin2_iter->second; - - } - } - } - } - } - - CollOpMap colmap = container.collop_map; - CollOpMap::iterator colit; - for(colit = colmap.begin(); colit!=colmap.end(); colit++) { - this->collop_map[colit->first] = colit->second; - } - - ProcTimeMap procmap = container.proctime_map; - ProcTimeMap::iterator procit; - for(procit = procmap.begin(); procit!=procmap.end(); procit++) { - this->proctime_map[procit->first] += procit->second; - } - - return 0; -} - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/DataStructure.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/DataStructure.h deleted file mode 100644 index 955c165b7c..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/DataStructure.h +++ /dev/null @@ -1,875 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#ifndef DATASTRUCTURE_H -#define DATASTRUCTURE_H - -#include -#include -#include -#include - -#include "OTF_inttypes.h" -#include "otf.h" - -#include "Definitions.h" - -/* show wether the result is valid or invalid */ -#define INVALID false -#define VALID true - -using namespace std; - -/*************************************** FG *****************************************/ -/********** Def_Key **********/ - -class FG_Def_Key -{ - friend bool operator<(const FG_Def_Key& c1, const FG_Def_Key& c2); - friend bool operator==(const FG_Def_Key& c1, const FG_Def_Key& c2); - - public: - FG_Def_Key(uint32_t tr, uint32_t fg_id); - uint32_t get_trace(); - uint32_t get_ident(); - private: - uint32_t trace; - uint32_t ident; -}; - -inline FG_Def_Key::FG_Def_Key(uint32_t tr, uint32_t fg_id) - :trace(tr), ident(fg_id) -{ -} - -inline uint32_t FG_Def_Key::get_trace() -{ - return trace; -} - -inline uint32_t FG_Def_Key::get_ident() -{ - return ident; -} - -/*************************************** Process *****************************************/ -/********** Def_Key **********/ - -class Process_Def_Key -{ - friend bool operator<(const Process_Def_Key& c1, const Process_Def_Key& c2); - friend bool operator==(const Process_Def_Key& c1, const Process_Def_Key& c2); - - public: - Process_Def_Key(uint32_t tr, uint32_t p_id); - uint32_t get_trace(); - uint32_t get_ident(); - private: - uint32_t trace; - uint32_t ident; -}; - -inline Process_Def_Key::Process_Def_Key(uint32_t tr, uint32_t p_id) - :trace(tr), ident(p_id) -{ -} - -inline uint32_t Process_Def_Key::get_trace() -{ - return trace; -} - -inline uint32_t Process_Def_Key::get_ident() -{ - return ident; -} - -/*************************************** Function *****************************************/ -/********** Def_Key **********/ - -class Function_Def_Key -{ - friend bool operator<(const Function_Def_Key& c1, const Function_Def_Key& c2); - friend bool operator==(const Function_Def_Key& c1, const Function_Def_Key& c2); - public: - Function_Def_Key(uint32_t tr, uint32_t f_id); - uint32_t get_trace(); - uint32_t get_ident(); - private: - uint32_t trace; - uint32_t ident; -}; - -inline Function_Def_Key::Function_Def_Key(uint32_t tr, uint32_t f_id) - :trace(tr), ident(f_id) -{ -} - -inline uint32_t Function_Def_Key::get_trace() -{ - return trace; -} - -inline uint32_t Function_Def_Key::get_ident() -{ - return ident; -} - -/********** Def **********/ - -class Function_Def -{ - public: - Function_Def(const char* n, uint32_t fg); - uint32_t get_funcgroup_id(); - const char* get_name(); - private: - const char* name; - uint32_t funcgroup_id; -}; - -inline Function_Def::Function_Def(const char* n, uint32_t fg) - :name(n), funcgroup_id(fg) -{ -} - -inline const char* Function_Def::get_name() -{ - return name; -} - -inline uint32_t Function_Def::get_funcgroup_id() -{ - return funcgroup_id; -} - -/********** Key **********/ - -class Function_Key -{ - friend bool operator<(const Function_Key& f_key1, const Function_Key& f_keyy2); - friend bool operator==(const Function_Key& f_key1,const Function_Key& f_key2); - public: - Function_Key(uint32_t tr, uint32_t f, uint32_t p); - Function_Key(const Function_Key& f_key); - //private: (wenn Tests erfolgreich wieder freigeben) - uint32_t trace; - uint32_t func; - uint32_t proc; -}; - -inline Function_Key::Function_Key(uint32_t tr, uint32_t f, uint32_t p) - :trace(tr), func(f), proc(p) -{ -} -inline Function_Key::Function_Key(const Function_Key& f_key) - :trace(f_key.trace), func(f_key.func), proc(f_key.proc) -{ -} - -/********** Value **********/ - -class Function_Value -{ - friend int print_Funtion(); - public: - Function_Value(); - Function_Value(uint64_t inv, uint64_t excl, uint64_t incl); - Function_Value(const Function_Value& f_value); - Function_Value& operator+=(const Function_Value& func_value_add); - Function_Value& operator=(const Function_Value& f_value); - uint64_t get_invoc(); - uint64_t get_excl_time(); - uint64_t get_incl_time(); - //private:(wenn Tests erfolgreich wieder freigeben) - uint64_t invoc; - uint64_t excl_time; - uint64_t incl_time; -}; - -inline Function_Value::Function_Value() -{ - invoc = 0; - excl_time = 0; - incl_time = 0; -} - -inline Function_Value::Function_Value(uint64_t inv, uint64_t excl, uint64_t incl) - :invoc(inv), excl_time(excl), incl_time(incl) -{ -} - -inline Function_Value::Function_Value(const Function_Value& f_value) - :invoc(f_value.invoc), excl_time(f_value.excl_time), incl_time(f_value.incl_time) -{ -} - -inline uint64_t Function_Value::get_invoc() -{ - return invoc; -} - -inline uint64_t Function_Value::get_excl_time() -{ - return excl_time; -} - -inline uint64_t Function_Value::get_incl_time() -{ - return incl_time; -} - -/*************************************** Counter ******************************************/ -/********** Def_Key **********/ - -class Counter_Def_Key -{ - friend bool operator<(const Counter_Def_Key& c1, const Counter_Def_Key& c2); - friend bool operator==(const Counter_Def_Key& c1, const Counter_Def_Key& c2); - public: - Counter_Def_Key(uint32_t tr, uint32_t c_id); - uint32_t get_trace(); - uint32_t get_ident(); - private: - uint32_t trace; - uint32_t ident; -}; - -inline Counter_Def_Key::Counter_Def_Key(uint32_t tr, uint32_t c_id) - :trace(tr), ident(c_id) -{ -} - -inline uint32_t Counter_Def_Key::get_trace() -{ - return trace; -} - -inline uint32_t Counter_Def_Key::get_ident() -{ - return ident; -} - -/********** Def **********/ - -class Counter_Def -{ - public: - Counter_Def(const char* n, const char* u); - const char* get_name(); - const char* get_unit(); - private: - const char* name; - const char* unit; -}; - -inline Counter_Def::Counter_Def(const char* n, const char* u) - :name(n), unit(u) -{ -} - -inline const char* Counter_Def::get_name() -{ - return name; -} - -inline const char* Counter_Def::get_unit() -{ - return unit; -} - -/********** Key **********/ - -class Counter_Key -{ - friend bool operator<(const Counter_Key& c_key1, const Counter_Key& c_key2); - friend bool operator==(const Counter_Key& c_key1,const Counter_Key& c_key2); - - public: - Counter_Key(uint32_t tr, uint32_t f, uint32_t p, uint32_t c); - Counter_Key(const Counter_Key& c_key); - //private:(wenn Tests erfolgreich wieder freigeben) - uint32_t trace; - uint32_t func; - uint32_t proc; - uint32_t counter; -}; - -inline Counter_Key::Counter_Key(uint32_t tr, uint32_t f, uint32_t p, uint32_t c) - :trace(tr), func(f), proc(p), counter(c) -{ -} - -inline Counter_Key::Counter_Key(const Counter_Key& c_key) - :trace(c_key.trace), func(c_key.func), proc(c_key.proc), counter(c_key.counter) -{ -} - -/********** Value **********/ - -class Counter_Value -{ - public: - Counter_Value(); - Counter_Value(bool val, uint64_t excl, uint64_t incl); - Counter_Value(const Counter_Value& c_value); - Counter_Value& operator+=(const Counter_Value& c_value); - Counter_Value& operator=(const Counter_Value& c_value); - bool get_valid(); - uint64_t get_excl_value(); - uint64_t get_incl_value(); - //private:(wenn Tests erfolgreich wieder freigeben) - bool valid; - uint64_t excl_value; - uint64_t incl_value; -}; - -inline Counter_Value::Counter_Value() -{ - valid = VALID; - excl_value = 0; - incl_value = 0; -} - -inline Counter_Value::Counter_Value(bool val, uint64_t excl, uint64_t incl) - :valid(val), excl_value(excl), incl_value(incl) -{ -} - -inline Counter_Value::Counter_Value(const Counter_Value& c_value) - :valid(c_value.valid), excl_value(c_value.excl_value), incl_value(c_value.incl_value) -{ -} - -inline bool Counter_Value::get_valid() -{ - return valid; -} - -inline uint64_t Counter_Value::get_excl_value() -{ - return excl_value; -} - -inline uint64_t Counter_Value::get_incl_value() -{ - return incl_value; -} - -/****************************************** P2P ********************************************/ -/********** Key **********/ - -class P2P_Key -{ - friend bool operator<(const P2P_Key& p2p_key1, const P2P_Key& p2p_key2); - friend bool operator==(const P2P_Key& p2p_key1,const P2P_Key& p2p_key2); - - public: - P2P_Key(uint32_t tr, uint32_t send, uint32_t rec, uint32_t b_1, uint32_t b_2); - P2P_Key(const P2P_Key& p2p_key); - //private:(wenn Tests erfolgreich wieder freigeben) - uint32_t trace; - uint32_t sender; - uint32_t receiver; - uint32_t bin_1; - uint32_t bin_2; -}; - -inline P2P_Key::P2P_Key(uint32_t tr, uint32_t send, uint32_t rec, uint32_t b_1, uint32_t b_2) - :trace(tr), sender(send), receiver(rec), bin_1(b_1), bin_2(b_2) -{ -} - -inline P2P_Key::P2P_Key(const P2P_Key& p2p_key) - :trace(p2p_key.trace), sender(p2p_key.sender), receiver(p2p_key.receiver), - bin_1(p2p_key.bin_1), bin_2(p2p_key.bin_2) -{ -} - -/********** Value **********/ - -class P2P_Value -{ - public: - P2P_Value(); - P2P_Value(uint64_t inv, uint64_t l, uint64_t t); - P2P_Value(const P2P_Value& p2p_value); - P2P_Value& operator+=(const P2P_Value& p2p_value); - P2P_Value& operator=(const P2P_Value& p2p_value); - uint64_t get_invoc(); - uint64_t get_length(); - uint64_t get_time(); - //private: (wenn Tests erfolgreich wieder freigeben) - uint64_t invoc; - uint64_t length; - uint64_t time; -}; - -inline P2P_Value::P2P_Value() -{ - invoc = 0; - length = 0; - time = 0; -} - -inline P2P_Value::P2P_Value(uint64_t inv, uint64_t l, uint64_t t) - :invoc(inv), length(l), time(t) -{ -} - -inline P2P_Value::P2P_Value(const P2P_Value& p2p_value) - :invoc(p2p_value.invoc), length(p2p_value.length), time(p2p_value.time) -{ -} - -inline uint64_t P2P_Value::get_invoc() -{ - return invoc; -} - -inline uint64_t P2P_Value::get_length() -{ - return length; -} - -inline uint64_t P2P_Value::get_time() -{ - return time; -} - -/**************************************** CollOp ******************************************/ -/********** Def_Key **********/ - -class CollOp_Def_Key -{ - friend bool operator<(const CollOp_Def_Key& c1, const CollOp_Def_Key& c2); - friend bool operator==(const CollOp_Def_Key& c1, const CollOp_Def_Key& c2); - public: - CollOp_Def_Key(uint32_t tr, uint32_t coll_id); - uint32_t get_trace(); - uint32_t get_ident(); - private: - uint32_t trace; - uint32_t ident; -}; - -inline CollOp_Def_Key::CollOp_Def_Key(uint32_t tr, uint32_t coll_id) - :trace(tr), ident(coll_id) -{ -} - -inline uint32_t CollOp_Def_Key::get_trace() -{ - return trace; -} - -inline uint32_t CollOp_Def_Key::get_ident() -{ - return ident; -} - -/********** Def **********/ - -class CollOp_Def -{ - public: - CollOp_Def(const char* n, uint32_t t); - const char* get_name(); - uint32_t get_type(); - private: - const char* name; - uint32_t type; -}; - -inline CollOp_Def::CollOp_Def(const char* n, uint32_t t) - :name(n), type(t) -{ -} - -inline const char* CollOp_Def::get_name() -{ - return name; -} - -inline uint32_t CollOp_Def::get_type() -{ - return type; -} - -/********** Key **********/ - -class CollOp_Key -{ - friend bool operator<(const CollOp_Key& coll_key1, const CollOp_Key& coll_key2); - friend bool operator==(const CollOp_Key& co_key1,const CollOp_Key& co_key2); - - public: - CollOp_Key(uint32_t tr, uint32_t p, uint32_t co); - CollOp_Key(const CollOp_Key& coll_key); - //private:(wenn Tests erfolgreich wieder freigeben) - uint32_t trace; - uint32_t proc; - uint32_t collop; -}; - -inline CollOp_Key::CollOp_Key(uint32_t tr, uint32_t p, uint32_t co) - :trace(tr), proc(p), collop(co) -{ -} - -inline CollOp_Key::CollOp_Key(const CollOp_Key& coll_key) - :trace(coll_key.trace), proc(coll_key.proc), collop(coll_key.collop) -{ -} - -/********** Value **********/ - -class CollOp_Value -{ - public: - CollOp_Value(); - CollOp_Value(uint64_t inv_s, uint64_t inv_r, uint64_t l_s, uint64_t l_r, uint64_t t); - CollOp_Value(const CollOp_Value& coll_value); - CollOp_Value& operator+=(const CollOp_Value& coll_value); - CollOp_Value& operator=(const CollOp_Value& co_value); - uint64_t get_invoc_send(); - uint64_t get_invoc_receive(); - uint64_t get_length_send(); - uint64_t get_length_receive(); - uint64_t get_time(); - //private:(wenn Tests erfolgreich wieder freigeben) - uint64_t invoc_send; - uint64_t invoc_receive; - uint64_t length_send; - uint64_t length_receive; - uint64_t time; -}; - -inline CollOp_Value::CollOp_Value() -{ - invoc_send = 0; - invoc_receive = 0; - length_send = 0; - length_receive = 0; - time = 0; -} - -inline CollOp_Value::CollOp_Value(uint64_t inv_s, uint64_t inv_r, uint64_t l_s, uint64_t l_r, - uint64_t t) - :invoc_send(inv_s), invoc_receive(inv_r), length_send(l_s), length_receive(l_r), time(t) -{ -} - -inline CollOp_Value::CollOp_Value(const CollOp_Value& coll_value) - :invoc_send(coll_value.invoc_send), invoc_receive(coll_value.invoc_receive), - length_send(coll_value.length_send), length_receive(coll_value.length_receive), - time(coll_value.time) -{ -} - -inline uint64_t CollOp_Value::get_invoc_send() -{ - return invoc_send; -} - -inline uint64_t CollOp_Value::get_invoc_receive() -{ - return invoc_receive; -} - -inline uint64_t CollOp_Value::get_length_send() -{ - return length_send; -} - -inline uint64_t CollOp_Value::get_length_receive() -{ - return length_receive; -} - -inline uint64_t CollOp_Value::get_time() -{ - return time; -} - -/**************************************** Bin_1 ******************************************/ -/********** Key **********/ - -class Bin_1_Key -{ - friend bool operator<(const Bin_1_Key& c1, const Bin_1_Key& c2); - friend bool operator==(const Bin_1_Key& c1, const Bin_1_Key& c2); - - public: - Bin_1_Key(uint32_t tr, uint32_t b); - uint32_t get_trace(); - uint32_t get_ident(); - private: - uint32_t trace; - uint32_t ident; -}; - -inline Bin_1_Key::Bin_1_Key(uint32_t tr, uint32_t b) - :trace(tr), ident(b) -{ -} - -inline uint32_t Bin_1_Key::get_trace() -{ - return trace; -} - -inline uint32_t Bin_1_Key::get_ident() -{ - return ident; -} - -/********** Value **********/ -class Bin_1_Value -{ - public: - Bin_1_Value(uint64_t min, uint64_t max); - uint64_t get_min_value(); - uint64_t get_max_value(); - private: - uint64_t min_value; - uint64_t max_value; -}; - -inline Bin_1_Value::Bin_1_Value(uint64_t min, uint64_t max) - :min_value(min), max_value(max) -{ -} - -inline uint64_t Bin_1_Value::get_min_value() -{ - return min_value; -} - -inline uint64_t Bin_1_Value::get_max_value() -{ - return max_value; -} - -/**************************************** Bin_2 ******************************************/ -/********** Key **********/ - -class Bin_2_Key -{ - friend bool operator<(const Bin_2_Key& c1, const Bin_2_Key& c2); - friend bool operator==(const Bin_2_Key& c1, const Bin_2_Key& c2); - - public: - Bin_2_Key(uint32_t tr, uint32_t b); - uint32_t get_trace(); - uint32_t get_ident(); - private: - uint32_t trace; - uint32_t ident; -}; - -inline Bin_2_Key::Bin_2_Key(uint32_t tr, uint32_t b) - :trace(tr), ident(b) -{ -} - -inline uint32_t Bin_2_Key::get_trace() -{ - return trace; -} - -inline uint32_t Bin_2_Key::get_ident() -{ - return ident; -} - -/********** Value **********/ -class Bin_2_Value -{ - public: - Bin_2_Value(uint64_t min, uint64_t max); - uint64_t get_min_value(); - uint64_t get_max_value(); - //private: - uint64_t min_value; - uint64_t max_value; -}; - -inline Bin_2_Value::Bin_2_Value(uint64_t min, uint64_t max) - :min_value(min), max_value(max) -{ -} - -inline uint64_t Bin_2_Value::get_min_value() -{ - return min_value; -} - -inline uint64_t Bin_2_Value::get_max_value() -{ - return max_value; -} - -/********************************** Summary_Container *************************************/ - -class Summary_Container -{ - /* maps with the values */ - typedef map FunctionMap; - typedef map CounterMap; - typedef map p2pTimeMap; - typedef map CollOpMap; - typedef map TraceMap; - typedef map ProcTimeMap; - typedef map ProgTimeMap; - /* maps with the definitions */ - typedef map FuncDefMap; - typedef map FGDefMap; - typedef map ProcDefMap; - typedef map CollOpDefMap; - typedef map CounterDefMap; - typedef map TicksDefMap; - typedef map Bin1Map; - typedef map Bin2Map; - - /* trace sender receiver bin1 bin2 Value */ - typedef map > > > > P2PMap; - - public: - int adddef_Function(Function_Def_Key f_def_key, Function_Def f_def); - int adddef_Counter(Counter_Def_Key c_def_key, Counter_Def c_def); - int adddef_FG(FG_Def_Key fg_def_key, const char* name); - int adddef_Proc(Process_Def_Key p_def_key, const char* name); - int adddef_CollOp(CollOp_Def_Key coll_def_key, CollOp_Def coll_def); - int adddef_Ticks(uint32_t trace, uint64_t ticks); - int adddef_Trace(uint32_t trace, const char* name); - int adddef_Bin(uint32_t trace); - int setdef_Bin1(uint32_t trace, uint32_t bin, uint64_t min, uint64_t max); - int setdef_Bin2(uint32_t trace, uint32_t bin, uint64_t min, uint64_t max); - int set_ProgTime(uint32_t trace, uint64_t time); - - int addvalues_Function(uint32_t trace, uint32_t func, uint32_t proc, uint64_t invoc, - uint64_t excl_time, uint64_t incl_time); - - int resetvalues_Function(uint32_t trace, uint32_t func, uint32_t proc, - uint64_t invoc, uint64_t excl_time, - uint64_t incl_time); - - int addvalues_Counter(uint32_t trace, uint32_t func, uint32_t proc, uint32_t counter, - uint64_t excl_value, uint64_t incl_value, bool valid); - - int addvalues_P2P(uint32_t trace, uint32_t sender, uint32_t receiver, uint32_t bin_1, - uint32_t bin_2, uint64_t invoc, uint64_t length, uint64_t time); - - int resetvalues_P2P(uint32_t trace, uint32_t sender, uint32_t receiver, uint32_t bin_1, - uint32_t bin_2, uint64_t invoc, uint64_t length, uint64_t time); - - int addvalues_CollOp(uint32_t trace, uint32_t proc, uint32_t type, uint64_t invoc_send, - uint64_t invoc_receive, uint64_t length_send, - uint64_t length_receive, uint64_t time); - - int resetvalues_CollOp(uint32_t trace, uint32_t proc, uint32_t collop, - uint64_t invoc_send, uint64_t invoc_receive, uint64_t length_send, uint64_t length_receive, uint64_t time); - - int addvalues_ProcTime(uint32_t trace, uint32_t proc,uint64_t time); - - int get_Function_Def_Key(uint32_t trace, vector& f_vector); - int get_Counter_Def_Key(uint32_t trace, vector& c_vector); - int get_Process_Def_Key(uint32_t trace, vector& p_vector); - int get_FG_Def_Key(uint32_t trace, vector& fg_vector); - int get_CollOp_Def_Key(uint32_t trace, vector& collop_vector); - int get_Bin1_Def_Key(uint32_t trace, vector& bin1_vector); - int get_Bin2_Def_Key(uint32_t trace, vector& bin2_vector); - int get_Trace(vector& trace_vector); - - Function_Def get_Function_Def(uint32_t trace, uint32_t func); - Counter_Def get_Counter_Def(uint32_t trace, uint32_t counter); - const char* get_Process_Def(uint32_t trace, uint32_t proc); - const char* get_FG_Def(uint32_t trace, uint32_t fg); - const char* get_Trace_name(uint32_t trace); - CollOp_Def get_CollOp_Def(uint32_t trace, uint32_t collop); - uint32_t get_CollOpType_Def(uint32_t trace, uint32_t collop); - - Function_Value get_Function(uint32_t trace, uint32_t func, uint32_t proc); - Counter_Value get_Counter(uint32_t trace, uint32_t func, uint32_t proc, - uint32_t counter); - P2P_Value get_P2P(uint32_t trace, uint32_t sender, uint32_t receiver, uint32_t bin_1, - uint32_t bin_2); - CollOp_Value get_CollOp(uint32_t trace, uint32_t proc, uint32_t collop); - CollOp_Value get_CollOpType(uint32_t trace, uint32_t proc, uint32_t type); - - uint64_t get_ProgTime(uint32_t trace); - uint64_t get_ProcTime(uint32_t trace, uint32_t proc); - uint64_t get_ticks(uint32_t trace); - uint32_t get_bin_1(uint64_t length); - uint32_t get_bin_2(double speed); - int get_color_gray(double min, double max, double value, - float& red, float& green, float& blue); - int get_color_gray(uint64_t min, uint64_t max, uint64_t value, - float& red, float& green, float& blue); - int get_color(double min, double max, double value, - float& red, float& green, float& blue); - int get_color(uint64_t min, uint64_t max, uint64_t value, - float& red, float& green, float& blue); - int get_gray(double min, double max, double value, - float& red, float& green, float& blue); - int get_gray(uint64_t min, uint64_t max, uint64_t value, - float& red, float& green, float& blue); - - bool find_FG(FG_Def_Key fg_def_key); - bool find_Function(Function_Def_Key f_def_key); - bool find_Counter(Counter_Def_Key c_def_key); - bool find_CollOp(CollOp_Def_Key coll_def_key); - - int csv_Function(fstream& out, uint32_t trace); - int csv_P2P(fstream& out, uint32_t trace); - int csv_CollOp(fstream& out, uint32_t trace); - int csv_Data(fstream& out, uint32_t trace); - int mergeContainer(Summary_Container& container); - - private: - FunctionMap function_map; - CounterMap counter_map; - P2PMap p2p_map; - p2pTimeMap p2p_time_map; - CollOpMap collop_map; - TraceMap trace_map; - ProcTimeMap proctime_map; - ProgTimeMap progtime_map; - - FuncDefMap func_def_map; - FGDefMap fg_def_map; - ProcDefMap proc_def_map; - CollOpDefMap collop_def_map; - CounterDefMap counter_def_map; - TicksDefMap ticks_def_map; - Bin1Map bin_1_map; - Bin2Map bin_2_map; -}; - -/* -template -bool operator<(const T& c1, const T& c2) -{ - if(c1.trace != c2.trace) - return (c1.trace < c2.trace); - else if(c1.ident != c2.ident) - return (c1.ident < c2.ident); - else - return false; -} - -template -bool operator==(const T& c1, const T& c2) -{ - if(((0 == c1.trace) || (0 == c2.trace) || (c1.trace == c2.trace)) && - ((0 == c1.ident) || (0 == c2.ident) || (c1.ident == c2.ident))) - return true; - else - return false; -} -*/ -#endif /* DATASTRUCTURE_H */ - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Definitions.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Definitions.h deleted file mode 100644 index 2816ffe608..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Definitions.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#ifndef DEFINITIONS_H -#define DEFINITIONS_H - -#define _BYTE 1.0 -#define KBYTE 1024.0 -#define MBYTE 1048576.0 -#define GBYTE 1073741824.0 -#define KILO 1000.0 -#define MEGA 1000000.0 -#define GIGA 1000000000.0 - -/* scale units */ - -#define SECOND " sec" -#define K_SECOND " K*sec" -#define M_SECOND " M*sec" -#define G_SECOND " G*sec" - -#define INVOC " \\#" -#define K_INVOC " K*\\#" -#define M_INVOC " M*\\#" -#define G_INVOC " G*\\#" -#define BYTE_SEC " Byte/s" -#define KBYTE_SEC " KByte/s" -#define MBYTE_SEC " MByte/s" -#define GBYTE_SEC " GByte/s" -#define BYTE_TEXT " Byte" -#define KBYTE_TEXT " KByte" -#define MBYTE_TEXT " MByte" -#define GBYTE_TEXT " GByte" - -/* specify which global summary should be printed */ - -#define TEX_OFF -1 -#define TEX_ALL 0 -#define TEX_ALLPLOT 1 -#define TEX_FUNC 2 -#define TEX_P2P 3 -#define TEX_COLLOP 4 - -/* definitions for prodtex */ - -#define TINY_TEX 6 //range of color (6 different colors) -#define SMALL_TEX 11 //range of color (11 different colors) -#define MIDDLE_TEX 16 //range of color (16 different colors) -#define LARGE_TEX 21 //range of color (21 different colors) -#define HUGE_TEX 26 //range of color (26 different colors) - -#define P2P_AV_RAT 1 // Average Rate P2P -#define P2P_AV_DUR 2 // Average Duration P2P -#define P2P_AV_LEN 3 // Average Message Length P2P -#define P2P_SUM_DUR 4 // Sum of Duration P2P -#define P2P_SUM_LEN 5 // Sum of Message Length P2P -#define P2P_ALL 6 // All plots - -#define MAXIMUM(x , y) (((x) > (y)) ? (x) : (y)) -#define MINIMUM(x , y) (((x) < (y)) ? (x) : (y)) - -#endif /* DEFINITIONS_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Handler.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Handler.cpp deleted file mode 100644 index dd62d448c3..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Handler.cpp +++ /dev/null @@ -1,457 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#include "OTF_Platform.h" - - -#include "Handler.h" -#include "Summary.h" - - -int save_temp(global_data* gd_ptr); - -/* SummaryHandler */ - -int handleFunctionSummary (void *firsthandlerarg, uint64_t time, uint32_t func, uint32_t proc, uint64_t invocations, uint64_t exclTime, uint64_t inclTime) { - - global_data* gd_ptr = (global_data*)firsthandlerarg; - gd_ptr->sum_container.resetvalues_Function(1,func, proc, invocations, exclTime, inclTime); - - return RETURN_HANDLER_OK; -} - -int handleMessageSummary (void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t peer, uint32_t comm, uint32_t type, uint64_t sentNumber, uint64_t receivedNumber, uint64_t sentBytes, uint64_t receivedBytes) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - - if(peer == 0) peer = (uint32_t) -1; - //int bin1 = gd_ptr->sum_container.get_bin_1(sentBytes); - gd_ptr->sum_container.resetvalues_P2P(1, process, peer, 1, 1, sentNumber, sentBytes, time); - gd_ptr->sum_container.resetvalues_P2P(1, peer, process, 1, 1, receivedNumber, receivedBytes, time); - - return RETURN_HANDLER_OK; -} - -int handleCollopSummary (void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t comm, - uint32_t collective, uint64_t sentNumber, uint64_t receivedNumber, - uint64_t sentBytes, uint64_t receivedBytes) { - - /* message length in Summary is calculated differently compared to hanldeCollectiveOperation() */ - - global_data* gd_ptr = (global_data*)firsthandlerarg; - - gd_ptr->sum_container.resetvalues_CollOp(1, process, collective, sentNumber, receivedNumber, sentBytes, receivedBytes, time); - - return RETURN_HANDLER_OK; -} - -/****************/ - -int handleDefCreator(void *firsthandlerarg, uint32_t stream, const char *creator) { - - global_data* gd_ptr = (global_data*)firsthandlerarg; - gd_ptr->creator = creator; - - return RETURN_HANDLER_OK; -} - -int handleDefVersion(void *firsthandlerarg, uint32_t stream, uint8_t major, uint8_t minor, uint8_t sub, const char *string) { - - global_data* gd_ptr = (global_data*)firsthandlerarg; - char ver[30]; - - snprintf(ver,30,"%u.%u.%u %s",major,minor,sub,string); - gd_ptr->version = ver; - - return RETURN_HANDLER_OK; -} - -int handleDefTimerResolution(void* firsthandlerarg, uint32_t streamid, uint64_t ticks_per_sec) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - gd_ptr->sum_container.adddef_Ticks(1, ticks_per_sec); - gd_ptr->ticks = ticks_per_sec; - return RETURN_HANDLER_OK; -} - -int handleDefFunction(void* firsthandlerarg, uint32_t streamid, - uint32_t func, const char* name, uint32_t group, uint32_t scltoken) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - Function_Def_Key f_def_key(1, func); - if(name == NULL) - { - Function_Def f_def("Function", group); - gd_ptr->sum_container.adddef_Function(f_def_key, f_def); - } - else - { - Function_Def f_def(strdup(name), group); - gd_ptr->sum_container.adddef_Function(f_def_key, f_def); - } - return RETURN_HANDLER_OK; -} - -int handleDefFunctionGroup(void* firsthandlerarg, uint32_t streamid, - uint32_t funcg, const char* name) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - FG_Def_Key fg_def_key(1, funcg); - if(name == NULL) - { - gd_ptr->sum_container.adddef_FG(fg_def_key, "FuncGroup"); - } - else - { - gd_ptr->sum_container.adddef_FG(fg_def_key, strdup(name)); - } - return RETURN_HANDLER_OK; -} - -int handleDefProcess(void* firsthandlerarg, uint32_t streamid, - uint32_t proc, const char* name, uint32_t parent) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - Process_Def_Key p_def_key(1, proc); - if(name == NULL) - { - gd_ptr->sum_container.adddef_Proc(p_def_key, "Process"); - } - else - { - - char* dup= strdup( name ); - char* p= dup; - while ( '\0' != *p ) { - - if ( '_' == *p ) *p= ' '; - if ( '\\' == *p ) *p= ' '; - - p++; - } - - gd_ptr->sum_container.adddef_Proc(p_def_key, dup ); - } - Process p; - - /* ignore in lite mode */ - if ( ! lite ) { - - /* make empty recv_map for all processes */ - for(uint32_t i=1; i<= gd_ptr->num_cpu; i++) { - p.clear_recv_map(i); - } - } - - gd_ptr->p_map.insert(pair(proc, p)); - return RETURN_HANDLER_OK; -} - -int handleDefProcessGroup(void *firsthandlerarg, uint32_t stream, uint32_t procGroup, - const char *name, uint32_t numberOfProcs, const uint32_t *procs) -{ - global_data* gd_ptr= (global_data*)firsthandlerarg; - gd_ptr->p_group_map[procGroup] = numberOfProcs; - - return RETURN_HANDLER_OK; -} - -int handleDefCollectiveOperation(void* firsthandlerarg, uint32_t streamid, - uint32_t collop, const char* name, uint32_t type) -{ - global_data* gd_ptr= (global_data*)firsthandlerarg; - CollOp_Def_Key collop_def_key(1, collop); - if(name == NULL) - { - CollOp_Def collop_def("CollOp", type); - gd_ptr->sum_container.adddef_CollOp(collop_def_key, collop_def); - } - else - { - CollOp_Def collop_def(strdup(name), type); - gd_ptr->sum_container.adddef_CollOp(collop_def_key, collop_def); - } - return RETURN_HANDLER_OK; -} - -int handleDefCounter( void* firsthandlerarg, uint32_t streamid, - uint32_t counter, const char* name, uint32_t properties, - uint32_t countergroup, const char* unit ) -{ - if(properties == OTF_COUNTER_TYPE_ACC) - { - global_data* gd_ptr = (global_data*)firsthandlerarg; - Counter_Def_Key c_def_key(1, counter); - if(name == NULL) - { - Counter_Def c_def("Counter", strdup(unit)); - gd_ptr->sum_container.adddef_Counter(c_def_key,c_def); - } - else - { - Counter_Def c_def(strdup(name), strdup(unit)); - gd_ptr->sum_container.adddef_Counter(c_def_key,c_def); - } - } - return RETURN_HANDLER_OK; -} - -int handleEnter(void* firsthandlerarg, uint64_t time, uint32_t func, - uint32_t proc, uint32_t scltoken) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - - if(time > gd_ptr->max_time && gd_ptr->clear_temp) { - return RETURN_HANDLER_ABORT; - } - else if(time > gd_ptr->max_time) - { - save_temp(gd_ptr); - gd_ptr->clear_temp = true; - return RETURN_HANDLER_ABORT; - } - - uint64_t enter_time = time; - - if(time < gd_ptr->min_time) - enter_time = gd_ptr->min_time; - - ProcessMap::iterator it_p = gd_ptr->p_map.find(proc); - if(it_p == gd_ptr->p_map.end()) - { - cerr << "\nprocess : " << proc - << "\n This process wasn't defined. That could cause a failure of the program" - << endl; - return OTF_RETURN_BREAK; - } - it_p->second.set_exclTime(func, enter_time); - it_p->second.set_proc_start(enter_time); - - if(gd_ptr->prog_start > enter_time) - { - gd_ptr->prog_start = enter_time; - } - - return RETURN_HANDLER_OK; -} - -int handleLeave(void* firsthandlerarg, uint64_t time, uint32_t func, - uint32_t proc, uint32_t scltoken) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - pair data_exclt; - - if(time > gd_ptr->max_time && gd_ptr->clear_temp) { - return RETURN_HANDLER_ABORT; - } - else if(time > gd_ptr->max_time) - { - save_temp(gd_ptr); - gd_ptr->clear_temp = true; - return RETURN_HANDLER_ABORT; - } - - ProcessMap::iterator it_p = gd_ptr->p_map.find(proc); - - if(it_p == gd_ptr->p_map.end()) { - cerr << "\nFailure in the otf-file.The process in the leaving event doesn't exist." - << endl; - } - else - { - it_p->second.get_exclTime(func, proc, time, gd_ptr); - it_p->second.set_proc_end(time); - if(gd_ptr->prog_end < time) { - gd_ptr->prog_end = time; - } - } - - return RETURN_HANDLER_OK; -} - -int handleCounter(void* firsthandlerarg, uint64_t time, uint32_t proc, - uint32_t counter, uint64_t value) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - Counter_Def_Key c_def_key(1, counter); - if(gd_ptr->sum_container.find_Counter(c_def_key)) - { - ProcessMap::iterator it_p = gd_ptr->p_map.find(proc); - if(it_p == gd_ptr->p_map.end()) - cerr << "\nFailure in the otf-file.The process in the counter event doesn't exist." - << endl; - else - it_p->second.set_counter(counter, time, value, gd_ptr); - } - - return RETURN_HANDLER_OK; -} - -int handleSendMsg(void* firsthandlerarg, uint64_t time, uint32_t sender, - uint32_t receiver, uint32_t communicator, uint32_t msgtype, uint32_t msglength, - uint32_t scltoken) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - if(time > gd_ptr->max_time && gd_ptr->clear_temp) { - return RETURN_HANDLER_ABORT; - } - else if(time > gd_ptr->max_time) - { - save_temp(gd_ptr); - gd_ptr->clear_temp = true; - return RETURN_HANDLER_ABORT; - } - - ProcessMap::iterator it = gd_ptr->p_map.find(sender); - if(it == gd_ptr->p_map.end()) - cerr << "\nError in SendMsg event. Sender " << sender << " unknown." << endl; - - if(time < gd_ptr->min_time) - it->second.set_mbyte_per_sec(sender, receiver, time, msgtype, INVALID, gd_ptr); - - else - it->second.set_mbyte_per_sec(sender, receiver, time, msgtype, VALID, gd_ptr); - - - return RETURN_HANDLER_OK; - -} - -int handleRecvMsg(void* firsthandlerarg, uint64_t time, uint32_t receiver, - uint32_t sender, uint32_t communicator, uint32_t msgtype, uint32_t msglength, - uint32_t scltoken) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - if(time > gd_ptr->max_time && gd_ptr->clear_temp) { - return RETURN_HANDLER_ABORT; - } - else if(time > gd_ptr->max_time) - { - save_temp(gd_ptr); - gd_ptr->clear_temp = true; - return RETURN_HANDLER_ABORT; - } - - ProcessMap::iterator it = gd_ptr->p_map.find(receiver); - if(it == gd_ptr->p_map.end()) - cerr << "\nError in RecvMsg event. Sender " << sender << " unknown." << endl; - - it->second.get_mbyte_per_sec(sender, receiver, time, msglength, msgtype, gd_ptr); - - return RETURN_HANDLER_OK; -} - -int handleCollectiveOperation(void* firsthandlerarg, uint64_t time, - uint32_t proc, uint32_t collop, uint32_t procgroup, - uint32_t rootprocess, uint32_t sent, uint32_t received, - uint64_t duration, uint32_t scltoken) -{ - global_data* gd_ptr = (global_data*)firsthandlerarg; - uint32_t type; - if(time > gd_ptr->max_time && gd_ptr->clear_temp) - return RETURN_HANDLER_ABORT; - else if(time > gd_ptr->max_time) - { - save_temp(gd_ptr); - gd_ptr->clear_temp = true; - return RETURN_HANDLER_ABORT; - } - else if(time < gd_ptr->min_time) - return RETURN_HANDLER_OK; - - ProcessMap::iterator it_p = gd_ptr->p_map.find(proc); - if((type = gd_ptr->sum_container.get_CollOpType_Def(1, collop)) != 0) - { - if(proc == rootprocess) - it_p->second.set_data_collective(proc, collop, type, true, procgroup, sent, received, duration, - gd_ptr); - else - it_p->second.set_data_collective(proc, collop, type, false, procgroup, sent, received, duration, - gd_ptr); - } - else - { - cerr << "\nCollOp : " << collop << " started on Process " << proc << " wasn't defined." - << " That's a failure in the otf-file." << endl; - } - - return RETURN_HANDLER_OK; - -} - -int set_time_sum_container(global_data* gd_ptr) -{ - uint64_t time = 0; - uint64_t prog_time = 0; - ProcessMap::iterator it_p = gd_ptr->p_map.begin(); - while(it_p != gd_ptr->p_map.end()) - { - if(it_p->second.get_proc_end() < it_p->second.get_proc_start()) - { - time = 0; - cerr << "Error, proc_start is greater than proc_end." << endl; - } - else - { - time = it_p->second.get_proc_end() - it_p->second.get_proc_start(); - } - gd_ptr->sum_container.addvalues_ProcTime(1, it_p->first, time); - ++it_p; - } - if(gd_ptr->prog_end < gd_ptr->prog_start) - { - cerr << "Error, prog_start is greater than prog_end." << endl; - } - else - { - prog_time = gd_ptr->prog_end - gd_ptr->prog_start; - } - gd_ptr->sum_container.set_ProgTime(1, prog_time); - - return 0; -} - -int mergeProgTime(global_data* gd, global_data* data) { - if(gd->prog_start > data->prog_start) { - gd->prog_start = data->prog_start; - } - if(gd->prog_end < data->prog_end) { - gd->prog_end = data->prog_end; - } - - uint64_t time = gd->prog_end - gd->prog_start; - - gd->sum_container.set_ProgTime(1, time); - - return 0; -} - -/* this function clears the stack for the exclusive time in each object of the class Process */ - -int save_temp(global_data* gd_ptr) -{ - ProcessMap::iterator it_p = gd_ptr->p_map.begin(); - uint32_t f_id; - while(it_p != gd_ptr->p_map.end()) - { - - if(it_p->second.get_stack_status()) - ++it_p; - else - { - while(!it_p->second.get_stack_status()) - { - f_id = it_p->second.get_stack_top_func_id(); - it_p->second.get_exclTime(f_id, it_p->first, gd_ptr->max_time, gd_ptr); - } - ++it_p; - } - } - return 0; -} - - - - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Handler.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Handler.h deleted file mode 100644 index 8e4491617f..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Handler.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#ifndef HANDLER_H -#define HANDLER_H - -#include "OTF_inttypes.h" - -#include "Summary.h" - -#define RETURN_HANDLER_OK 0 -#define RETURN_HANDLER_ABORT 1 - -using namespace std; - -/* SummaryHandler */ - -int handleFunctionSummary (void *firsthandlerarg, uint64_t time, uint32_t function, uint32_t process, uint64_t invocations, uint64_t exclTime, uint64_t inclTime); - -int handleMessageSummary (void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t peer, uint32_t comm, uint32_t type, uint64_t sentNumber, uint64_t receivedNumber, uint64_t sentBytes, uint64_t receivedBytes); - -int handleCollopSummary (void *firsthandlerarg, uint64_t time, uint32_t process, uint32_t comm, uint32_t collective, - uint64_t sentNumber, uint64_t receivedNumber, uint64_t sentBytes, uint64_t receivedBytes); - -/*********************/ - -int handleDefCreator(void *firsthandlerarg, uint32_t stream, const char *creator); - -int handleDefVersion(void *firsthandlerarg, uint32_t stream, uint8_t major, uint8_t minor, uint8_t sub, const char *string); - -int handleDefTimerResolution(void* firsthandlerarg, uint32_t streamid, uint64_t ticks_per_sec); - -int handleDefFunction(void* firsthandlerarg, uint32_t streamid, - uint32_t func, const char* name, uint32_t group, uint32_t scltoken); - -int handleDefFunctionGroup(void* firsthandlerarg, uint32_t streamid, - uint32_t funcg, const char* name); - -int handleDefProcess(void* firsthandlerarg, uint32_t streamid, - uint32_t proc, const char* name, uint32_t parent); - -int handleDefProcessGroup(void *firsthandlerarg, uint32_t stream, - uint32_t procGroup, const char *name, uint32_t numberOfProcs, - const uint32_t *procs); - -int handleDefCollectiveOperation(void* firsthandlerarg, uint32_t streamid, - uint32_t collop, const char* name, uint32_t type); - -int handleDefCounter(void* firsthandlerarg, uint32_t streamid, - uint32_t counter, const char* name, uint32_t properties, - uint32_t countergroup, const char* unit); - -int handleEnter(void* firsthandlerarg, uint64_t time, uint32_t func, - uint32_t proc, uint32_t scltoken); - -int handleLeave(void* firsthandlerarg, uint64_t time, uint32_t func, - uint32_t proc, uint32_t scltoken); - -int handleCounter(void* firsthandlerarg, uint64_t time, uint32_t proc, - uint32_t counter, uint64_t value); - -int handleRecvMsg(void* firsthandlerarg, uint64_t time, uint32_t receiver, - uint32_t sender, uint32_t communicator, uint32_t msgtype, uint32_t msglength, - uint32_t scltoken); - -int handleSendMsg(void* firsthandlerarg, uint64_t time, uint32_t sender, - uint32_t receiver, uint32_t communicator, uint32_t msgtype, uint32_t msglength, - uint32_t scltoken); - -int handleCollectiveOperation(void* firsthandlerarg, uint64_t time, - uint32_t proc, uint32_t collop, uint32_t procgroup, - uint32_t rootprocess, uint32_t sent, uint32_t received, - uint64_t duration, uint32_t scltoken); - -int set_time_sum_container(global_data* gd_ptr); -int mergeProgTime(global_data* gd, global_data* data); - -#endif /* HANDLER_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.am index 127c6a3bc1..bd4d1af31b 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.am @@ -1,28 +1,18 @@ -INCLUDES = -I$(top_builddir)/otflib -I$(top_srcdir)/otflib +if AMHAVEMPI +SUBDIRS = . mpi +endif if AMBUILDBINARIES -bin_PROGRAMS = \ - otfprofile +bin_PROGRAMS = otfprofile endif -if AMHAVEOMP -otfprofile_CXXFLAGS = $(OPENMP_CXXFLAGS) -otfprofile_LDFLAGS = $(OPENMP_CXXFLAGS) -endif +OTFPROFILESRCDIR = $(srcdir) +include $(srcdir)/Makefile.common -otfprofile_LDADD = $(top_builddir)/otflib/libotf.la $(MATHLIB) -otfprofile_DEPENDENCIES = $(top_builddir)/otflib/libotf.la -otfprofile_SOURCES = \ - CSVParse.h \ - DataStructure.h \ - Definitions.h \ - Handler.h \ - Prodtex.h \ - Summary.h \ - otfprofile.cpp \ - CSVParse.cpp \ - DataStructure.cpp \ - Handler.cpp \ - Prodtex.cpp \ - Summary.cpp +INCLUDES = $(COMMONINCLUDES) + +otfprofile_CXXFLAGS = $(COMMONCXXFLAGS) +otfprofile_LDADD = $(COMMONLDADD) +otfprofile_DEPENDENCIES = $(COMMONDEPENDENCIES) +otfprofile_SOURCES = $(COMMONSOURCES) diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.common b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.common new file mode 100644 index 0000000000..e06ac88fab --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Makefile.common @@ -0,0 +1,27 @@ +COMMONINCLUDES = \ + -I$(OTFPROFILESRCDIR) \ + -I$(top_builddir)/otflib -I$(top_srcdir)/otflib \ + -I$(top_builddir)/otfauxlib -I$(top_srcdir)/otfauxlib + +COMMONCXXFLAGS = +COMMONLDADD = $(top_builddir)/otflib/libotf.la $(MATHLIB) +COMMONDEPENDENCIES = $(top_builddir)/otflib/libotf.la +COMMONSOURCES = \ + $(OTFPROFILESRCDIR)/clustering.h \ + $(OTFPROFILESRCDIR)/collect_data.h \ + $(OTFPROFILESRCDIR)/comparison.h \ + $(OTFPROFILESRCDIR)/create_csv.h \ + $(OTFPROFILESRCDIR)/create_latex.h \ + $(OTFPROFILESRCDIR)/datastructs.h \ + $(OTFPROFILESRCDIR)/otfprofile.h \ + $(OTFPROFILESRCDIR)/summarize_data.h \ + $(OTFPROFILESRCDIR)/clustering.cpp \ + $(OTFPROFILESRCDIR)/collect_data.cpp \ + $(OTFPROFILESRCDIR)/comparison_clinkage.cpp \ + $(OTFPROFILESRCDIR)/comparison_kmeans.cpp \ + $(OTFPROFILESRCDIR)/create_csv.cpp \ + $(OTFPROFILESRCDIR)/create_latex.cpp \ + $(OTFPROFILESRCDIR)/otfprofile.cpp \ + $(OTFPROFILESRCDIR)/summarize_data.cpp \ + $(OTFPROFILESRCDIR)/summarize_data.h + diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Prodtex.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Prodtex.cpp deleted file mode 100644 index 99a591bdce..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Prodtex.cpp +++ /dev/null @@ -1,2273 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#ifdef _SX -# include -# include /* <- this include is needed on NEC SX platforms for gethostname() */ -#endif - -#ifdef _WIN32 -# include -#endif - -#include "OTF_Platform.h" - -#include "Prodtex.h" - -void bar_data(fstream& out, double value[16][7], bool var, uint32_t proc_count) -{ - double help = 0; - if(!var && (proc_count > 16)) - { - out << "\\def\\bardataI" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(value[k][2] > 0) - out << (k + 0.25) << " " << (log(value[k][2])/log(2.0) + 1) << endl; - if(value[k][5] > 0) - out << (k + 0.75) << " " << (log(value[k][5])/log(2.0) + 1) << endl; - } - out << "}" << endl; - } - out << "\\def\\bardataII" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(value[k][0] > 0) - { - help = value[k][0] / value[k][6]; - if(help < 1) - out << (k + 0.3) << " " << help << endl; - else - out << (k + 0.3) << " " << (log(help)/log(2.0) + 1) << endl; - } - if(value[k][3] > 0) - { - help = value[k][3] / value[k][6]; - if(help < 1) - out << (k + 0.8) << " " << help << endl; - else - out << (k + 0.8) << " " << (log(help)/log(2.0) + 1) << endl; - } - } - out << "}" << endl; - if(!var && (proc_count > 16)) - { - out << "\\def\\bardataIII" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(value[k][1] > 0) - out << (k + 0.4) << " " << (log(value[k][1])/log(2.0) + 1) << endl; - if(value[k][4] > 0) - out << (k + 0.9) << " " << (log(value[k][4])/log(2.0) + 1) << endl; - } - out << "}" << endl; - } -} - -void bar_chart(uint64_t max, fstream& out, string proc_name1[16], string proc_name2[16], - uint32_t proc_count, string chart_name, bool byte , double value[16][7], - bool var, bool sum) -{ - uint64_t max_help = max; - if(var && (proc_count > 16)) - { - double help; - for(int k = 0; k < 16; k++) - { - if((value[k][0] > 0) && (value[k][6] > 1)) - { - - help = value[k][0] / value[k][6]; - value[k][2] = (value[k][1] -( value[k][6] * help * help)) / (value[k][6] - 1.0); - value[k][2] = sqrt(value[k][2]); - max = MAXIMUM(max, max_help + (uint64_t) value[k][2]); - } - else - { - value[k][2] = 0; - } - - if((value[k][3] > 0) && (value[k][6] > 1)) - { - - help = value[k][3] / value[k][6]; - value[k][5] = (value[k][4] - (value[k][6] * help * help)) / (value[k][6] - 1.0); - value[k][5] = sqrt(value[k][5]); - max = MAXIMUM(max, max_help + (uint64_t) value[k][5]); - } - else - { - value[k][5] = 0; - } - } - - } - - out << "{\\Large \\bf " << chart_name << "}" << endl; - out << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{help}" << endl; - out << "\\psset{xunit=1,yunit=0.5}" << endl; - out << "\\begin{pspicture}(0,0)(16," << ((uint64_t) log((double)max)/log(2.0) + 2) << ")" << endl; - out << " \\psaxes[labels=no,Oy=-1,ysubticks=2,ylogBase=2,Dy=2,ytickwidth=1pt," << endl; - out << " ysubtickwidth=1pt,xticksize=-1 " << ((uint64_t) log((double)max)/log(2.0) + 2) - << ",yticksize=0 16,ysubticksize=1," << endl; - out << " yticklinestyle=dotted,ysubticklinestyle=dotted]{-}(0,0)(0,0)(16.1," - << ((uint64_t) log((double)max)/log(2.0) + 2) << ")" << endl; - uint32_t count = 1; - out << " \\rput[r](-0.2,0){0}" << endl; - uint64_t i; - if(byte) - { - for(i = 1; i <= max<<1; i <<= 1) - { - out << "\\psline{-}(-0.15," << count << ")(0," << count << ")" << endl; - if(i < KBYTE) - { - out << " \\rput[r](-0.2," << count << "){" << i <<"}" << endl; - } - else if(i < MBYTE) - { - out << " \\rput[r](-0.2," << count << "){" << (i / KBYTE) << "K}" << endl; - } - else if(i < GBYTE) - { - out << " \\rput[r](-0.2," << count << "){" << (i / MBYTE) << "M}" << endl; - } - else - { - out << " \\rput[r](-0.2," << count << "){" << (i / GBYTE) << "G}" << endl; - } - count += 1; - } - } - else - { - for(i = 1; i <= max<<1; i <<= 1) - { - out << "\\psline{-}(-0.15," << count << ")(0," << count << ")" << endl; - if(i < KILO) - { - out << " \\rput[r](-0.2," << count << "){" << i <<"}" << endl; - } - else if(i < MEGA) - { - out << " \\rput[r](-0.2," << count << "){" << (i / KILO) << "K}" << endl; - } - else if(i < GIGA) - { - out << " \\rput[r](-0.2," << count << "){" << (i / MEGA) << "M}" << endl; - } - else - { - out << " \\rput[r](-0.2," << count << "){" << (i / GIGA) << "G}" << endl; - } - count += 1; - } - } - if(!var && (proc_count > 16)) - { - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.4," << endl; - out << " fillcolor=red,fillstyle=solid]{\\bardataI}" << endl; - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.3," << endl; - out << " fillcolor=green,fillstyle=solid]{\\bardataII}" << endl; - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.10," << endl; - out << " fillcolor=blue,fillstyle=solid]{\\bardataIII}" << endl; - } - else - { - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.46," << endl; - out << " fillcolor=green,fillstyle=solid]{\\bardataII}" << endl; - } - - if(var && (proc_count > 16)) - { - double help, test; - for(int k = 0; k < 16; k++) - { - if(value[k][0] > 0) - { - help = value[k][0] / value[k][6]; - out << "\\psframe[fillstyle=solid,fillcolor=lightgray](" << (k + 0.2) << ","; - if(help < value[k][2]) - out << "0"; - else - { - if((test = help - value[k][2]) < 1) - out << test; - else - out << log(test)/log(2.0) + 1; - } - out << ")(" << (k + 0.4) << ","; - test = help + value[k][2]; - if(test < 1) - out << test; - else - out << log(test)/log(2.0) + 1; - out << ")" << endl; - } - if(value[k][3] > 0) - { - help = value[k][3] / value[k][6]; - out << "\\psframe[fillstyle=solid,fillcolor=lightgray](" << (k + 0.7) << ","; - if(help < value[k][5]) - out << "0"; - else - { - if((test = help - value[k][5]) < 1) - out << test; - else - out << log(test)/log(2.0) + 1; - } - out << ")(" << (k + 0.9) << ","; - test = help + value[k][5]; - if(test < 1) - out << test; - else - out << log(test)/log(2.0) + 1; - out << ")" << endl; - } - } - } - - for(int k = 0; k < 16; k++) - { - if(proc_name1[k].size() > 15) - { - proc_name1[k].resize(15); - } - if(proc_count <= 16) - { - out << " \\rput[r]{90}(" << (k + 0.5) << ",-0.8){" << proc_name1[k] << "}" << endl; - out << " \\rput[t](" << (k + 0.2) << ",-0.2){s}" << endl; - out << " \\rput[t](" << (k + 0.8) << ",-0.2){r}" << endl; - } - else - { - if(proc_name2[k].size() > 15) - { - proc_name2[k].resize(15); - } - out << " \\rput[t](" << (k + 0.2) << ",-0.2){s}" << endl; - out << " \\rput[t](" << (k + 0.8) << ",-0.2){r}" << endl; - out << " \\rput[t](" << (k + 1.2) << ",-0.2){s}" << endl; - out << " \\rput[t](" << (k + 1.8) << ",-0.2){r}" << endl; - out << " \\rput[r]{90}(" << (k + 0.2) << ",-0.8){" << proc_name1[k] << "}" << endl; - out << " \\rput[r]{90}(" << (k + 0.5) << ",-0.8){-}" << endl; - out << " \\rput[r]{90}(" << (k + 0.8) << ",-0.8){" << proc_name2[k] << "}" << endl; - k++; - } - } - - out << " \\rput[l](0,-7){s - Send}" << endl; - out << " \\rput[l](0,-7.7){r - Receive}" << endl; - - if(!sum) { - if( (chart_name.find("ONE2ALL") < chart_name.size()) || (chart_name.find("ALL2ALL") < chart_name.size()) ) { - string sub = "the root"; - if(chart_name.find("ALL2ALL") < chart_name.size()) sub = "every"; - out << " \\rput[l](0,-11.0){In this chart one collective operation call means that " << sub << " process broadcasts only one message to the}" << endl; - out << " \\rput[l](0,-11.7){group and not allways one message to every group member. All processes of the group, the root process}" << endl; - out << " \\rput[l](0,-12.4){included, receive one message in total.}" << endl; - } - } else { - if( (chart_name.find("ONE2ALL") < chart_name.size()) || (chart_name.find("ALL2ALL") < chart_name.size()) ) { - string sub = "the root"; - if(chart_name.find("ALL2ALL") < chart_name.size()) sub = "every"; - out << " \\rput[l](0,-11.0){In this chart one collective operation call means that " << sub << " process broadcasts one message to every}" << endl; - out << " \\rput[l](0,-11.7){group member and not only one to the group. All processes of the group, the root process included,}" << endl; - out << " \\rput[l](0,-12.4){receive one message in total.}" << endl; - } - } - - if(!var && (proc_count > 16)) - { - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=red,linecolor=black](3,-7){Y}" << endl; - out << " \\rput[l](3.2,-7.0){Maximum}" << endl; - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=green,linecolor=black](3,-7.7){Y}" << endl; - out << " \\rput[l](3.2,-7.7){Average}" << endl; - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=blue,linecolor=black](3,-8.4){Y}" << endl; - out << " \\rput[l](3.2,-8.4){Minimum}" << endl; - } - out << "\\end{pspicture}" << endl; - out << "\\end{help}" << endl; - out << "\\newpage" << endl; -} - -/**************************************** tex_header ****************************************/ - -void tex_header(fstream& out, global_data* gd_ptr) -{ - char hostname[255]; - gethostname(hostname, sizeof(hostname)); - out << "\\documentclass[a4paper,10pt]{article}" << endl; - out << "\\usepackage{amssymb}" << endl; - out << "\\usepackage{longtable}" << endl; - out << "\\usepackage{pstricks,pst-plot,pstricks-add}" << endl; - out << "\\textwidth=16.0cm \\textheight=27.0cm \\topmargin=-1.8cm" << endl; - out << "\\oddsidemargin=0.1cm \\evensidemargin=0.1cm \\footskip=45pt" << endl; - out << endl; - out << "\\newcommand{\\PstDrawNode}[7]{%" << endl; - out << " \\definecolor{MyColor}{rgb}{#1,#2,#3}%" << endl; - out << " \\fnode[framesize=#4 #5,fillstyle=solid,fillcolor=MyColor,linecolor=MyColor](#6,#7){Y}" << endl; - out << "}" << endl; - out << endl; - out << "\\newcommand{\\Print}[4]{%" << endl; - out << " \\definecolor{MyColor}{rgb}{#1,#2,#3}%" << endl; - out << " \\textcolor{MyColor}{#4}" << endl; - out << "}" << endl; - out << endl; - out << "\\newenvironment{help}{}{}" << endl; - out << endl; - out << "\\begin{document}" << endl; - out << endl; - out << "\\begin{flushleft}" << endl; - out << endl; - out << "file: \\verb|" << gd_ptr->filename << "| \\\\" << endl; - out << endl; - out << "\\bigskip" << endl; - out << "\\fbox{\\parbox{6.0 cm}{created by : " << gd_ptr->creator << "\\\\" << endl; - out << " OTF-Version: " << gd_ptr->version << "}}" << endl; - //out << " Author: Denis Huenich \\\\" << endl; - //out << " ZIH, TU Dresden}}" << endl; - out << "\\end{flushleft}" << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{center}" << endl; - out << endl; -} - -/***************************************** tex_foot *****************************************/ - -void tex_foot(fstream& out) -{ - out << endl; - out << "\\end{center}" << endl; - out << endl; out << "\\end{document}" << endl; -} - -/*********************************** Function and Counter ***********************************/ - -int tex_func(fstream& out, global_data* gd_ptr, vector counter_names, bool sum) -{ - typedef multimap > TempMap; - typedef map CountMap1; - - int min; - int max = 0; - - string tmp; - uint32_t stellen = 0; - - int i; - int help; - string func_name; - string count_name; - bool c_name; - - TempMap temp_map; - CountMap1 count_map1; - double ticks; - double excl_time, incl_time; - double rate; - float red,green,blue; // color values for rgb - - uint64_t min_invoc = (uint64_t) -1; - uint64_t max_invoc = 0; - // 0 = excl_time, 1 = incl_time - double min_time[2] = {(double)((uint64_t) -1), (double)((uint64_t) -1)}; - double max_time[2] = {0.0, 0.0}; - - vector func_vector; - vector count_vector; - vector count_name_vector; - Function_Value f_value; - Counter_Value c_value; - - gd_ptr->sum_container.get_Function_Def_Key(1, func_vector); - gd_ptr->sum_container.get_Counter_Def_Key(1, count_vector); - ticks = (double) gd_ptr->sum_container.get_ticks(1); - if(ticks < 1.0) - { - ticks = 1.0; - cerr << "Error in tex_func. No ticks given for this trace." << endl; - } - - /* collects information for Function and Counter */ - - vector::iterator it_vector = func_vector.begin(); - vector::iterator it_c_vector; - vector::iterator it_c_name; - CountMap1::iterator it_c_map1; - TempMap::iterator it_map; - TempMap::iterator it_map_page; - - if(!counter_names.empty()) - { - it_c_vector = count_vector.begin(); - while(it_c_vector != count_vector.end()) - { - string s(strdup(gd_ptr->sum_container.get_Counter_Def(1, *it_c_vector).get_name())); - transform(s.begin(), s.end(), s.begin(), ::toupper); - vector::iterator help_v; - help_v = counter_names.begin(); - while(help_v != counter_names.end()) - { - if(s == *help_v) - { - count_name_vector.push_back(*it_c_vector); - break; - } - ++help_v; - } - ++it_c_vector; - } - } - while(it_vector != func_vector.end()) - { - func_temp f_temp; - f_value = gd_ptr->sum_container.get_Function(1, *it_vector, 0); - if(f_value.get_invoc() > 0) - { - excl_time = (double) f_value.get_excl_time() / ticks; - incl_time = (double) f_value.get_incl_time() / ticks; - f_temp.func_id = *it_vector; - f_temp.invoc = f_value.get_invoc(); - f_temp.incl_time = incl_time; - - it_c_vector = count_vector.begin(); - while(it_c_vector != count_vector.end()) - { - c_name = false; - c_value = gd_ptr->sum_container.get_Counter(1, *it_vector, 0, *it_c_vector); - if(c_value.get_valid() == VALID) - { - if(count_name_vector.empty()) - { - if((excl_time == 0.0) || (c_value.get_excl_value() == 0)) - rate = 0.0; - else - rate = ((double) c_value.get_excl_value()) / excl_time / MEGA; - } - else - { - it_c_name = count_name_vector.begin(); - while(it_c_name != count_name_vector.end()) - { - if(*it_c_name == *it_c_vector) - { - c_name = true; - break; - } - ++it_c_name; - } - if(c_name) - { - if(c_value.get_excl_value() == 0) - rate = 0.0; - else - rate = ((double) c_value.get_excl_value()) / MEGA; - } - else - { - if((excl_time == 0.0) || (c_value.get_excl_value() == 0)) - rate = 0.0; - else - rate = ((double) c_value.get_excl_value()) / excl_time / MEGA; - } - } - f_temp.count_map2[*it_c_vector] = rate; - - it_c_map1 = count_map1.find(*it_c_vector); - if(it_c_map1 == count_map1.end()) - { - count_temp c_temp; - c_temp.min = rate; - c_temp.max = rate; - count_map1.insert(pair(*it_c_vector,c_temp)); - } - else - { - if(it_c_map1->second.min == 0.0) - it_c_map1->second.min = rate; - else if(rate > 0.0) - it_c_map1->second.min = MINIMUM(rate,it_c_map1->second.min); - it_c_map1->second.max = MAXIMUM(rate,it_c_map1->second.max); - } - } - ++it_c_vector; - } - temp_map.insert(pair(excl_time, f_temp)); - - min_invoc = MINIMUM(f_value.get_invoc(), min_invoc); - max_invoc = MAXIMUM(f_value.get_invoc(), max_invoc); - min_time[0] = MINIMUM(excl_time, min_time[0]); - max_time[0] = MAXIMUM(excl_time, max_time[0]); - min_time[1] = MINIMUM(incl_time, min_time[1]); - max_time[1] = MAXIMUM(incl_time, max_time[1]); - } - ++it_vector; - } - - /********************************** Top 50 of Function **********************************/ - /* How many functions in total*/ - - for(it_map = temp_map.begin(); it_map!=temp_map.end(); ++it_map) { - max++; - tmp.assign(gd_ptr->sum_container.get_Function_Def(1, it_map->second.func_id).get_name()); - if(tmp.size() > stellen) { - if(tmp.size() <= 20) stellen = (uint32_t) tmp.size(); else stellen = 20; - } - } - - min = gd_ptr->TOP_FUNC; - if(max < gd_ptr->TOP_FUNC) { - min = max; - } - - it_map = temp_map.begin(); - - out << "{\\Large \\bf Top " << min << " of " << max << " Functions}" << endl; - out << endl; - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{longtable}{|l||r|r|r|}" << endl; - out << endl; - out << " \\hline" << endl; - out << " \\bf Function & \\bf invocations[\\#] & \\bf excl. time[sec] $\\nabla$ & " - << " \\bf incl. time[sec] \\\\" << endl; - out << " \\hline\\hline" << endl; - for(i = 1; i <= min; i++) - { - func_name.assign(gd_ptr->sum_container.get_Function_Def(1, it_map->second.func_id).get_name()); - //if(func_name.size() > 20) - func_name.resize(stellen,32); - - out << " \\verb|" << func_name << "| & "; - out.precision(2); - gd_ptr->sum_container.get_color(min_invoc, max_invoc, it_map->second.invoc, - red, green, blue); - out << "\\Print{" << red << "}{" << green << "}{" << blue << "}{" - << it_map->second.invoc << "} & "; - gd_ptr->sum_container.get_color(min_time[0], max_time[0], it_map->first, - red, green, blue); - out << "\\Print{" << red << "}{" << green << "}{" << blue << "}{"; - out.precision(6); - out << it_map->first << "} & "; - out.precision(2); - gd_ptr->sum_container.get_color(min_time[1], max_time[1], it_map->second.incl_time, - red, green, blue); - out << "\\Print{" << red << "}{" << green << "}{" << blue << "}{"; - out.precision(6); - out << it_map->second.incl_time << "} \\\\" << endl; - - if((i % 3) == 0) - out << " \\hline" << endl; - - ++it_map; - - if(it_map == temp_map.end()) - break; - } - out << " \\hline" << endl; - out << "\\end{longtable}" << endl; - out << endl; - out << "\\newpage" << endl; - out << endl; - - /********************************** Top 50 of Counter **********************************/ - - CountMap1::iterator it_c_tmp_map; - - if(!sum) { - - it_c_map1 = count_map1.begin(); - CountMap2::iterator it_c_map2; - int page = 1; - - while(it_c_map1 != count_map1.end()) - { - page = 1; - it_map_page = temp_map.begin(); - it_map = it_map_page; - - while(page <= gd_ptr->TOP_FUNC && it_map != temp_map.end()) - { - out << "{\\Large \\bf Top " << gd_ptr->TOP_FUNC << " of Counter [in Mega]}" << endl; - out << endl; - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{tabular}{|l||r|}" << endl; - out << endl; - out << " \\hline" << endl; - out << " \\bf Function & \\bf excl. time[sec] \\\\" << endl; - out << " \\hline\\hline" << endl; - - it_map = it_map_page; - for(i = page; i <= gd_ptr->TOP_FUNC; i++) - { - func_name.assign(gd_ptr->sum_container.get_Function_Def(1, it_map->second.func_id).get_name()); - //if(func_name.size() > 20) - func_name.resize(stellen,32); - out << " \\verb|" << func_name << "| & " << endl; - gd_ptr->sum_container.get_color(min_time[0], max_time[0], it_map->first, - red, green, blue); - out << "\\Print{" << red << "}{" << green << "}{" << blue << "}{"; - out.precision(6); - out << it_map->first << "}\\\\" << endl; - if((i % 3) == 0) - out << " \\hline" << endl; - ++it_map; - if((it_map == temp_map.end()) || ((i % 54) == 0)) - break; - } - out << " \\hline" << endl; - out << "\\end{tabular}" << endl; - - it_map = it_map_page; - help = 1; - i = page; - while((help <= 2) && (it_c_map1 != count_map1.end())) - { - if((i % 54) == 1) - { - count_name.assign(gd_ptr->sum_container.get_Counter_Def(1, it_c_map1->first).get_name()); - if(count_name.size() > 20) - count_name.resize(20); - out << "\\begin{tabular}{r|}" << endl; - out << endl; - out << " \\hline" << endl; - - c_name = false; - if(count_name_vector.empty()) - { - out << " \\bf\\verb|" << count_name << " #/sec| \\\\" << endl; - } - else - { - it_c_name = count_name_vector.begin(); - while(it_c_name != count_name_vector.end()) - { - if(*it_c_name == it_c_map1->first) - { - c_name = true; - break; - } - ++it_c_name; - } - if(c_name) - { - out << " \\bf\\verb|" << count_name << " #| \\\\" << endl; - } - else - { - out << " \\bf\\verb|" << count_name << " #/sec| \\\\" << endl; - } - } - out << " \\hline\\hline" << endl; - } - - it_c_map2 = it_map->second.count_map2.find(it_c_map1->first); - if((it_c_map2 == it_map->second.count_map2.end()) || (it_c_map2->second == 0.0)) - { - out << " no value\\\\" << endl; - } - else - { - out.precision(2); - gd_ptr->sum_container.get_color(it_c_map1->second.min, it_c_map1->second.max, - it_c_map2->second, red, green, blue); - out << " \\Print{" << red << "}{" << green << "}{" << blue << "}{"; - out.precision(6); - out << it_c_map2->second << "}\\\\" << endl; - } - - if((i % 3) == 0) - out << " \\hline" << endl; - - ++it_map; - ++i; - - if((it_map == temp_map.end()) || (i > gd_ptr->TOP_FUNC) || ((i % 54) == 1)) - { - ++help; - - out << " \\hline" << endl; - out << "\\end{tabular}" << endl; - it_c_tmp_map = count_map1.end(); - --it_c_tmp_map; - if((help > 2) || (it_c_map1 == it_c_tmp_map)) - { - it_map_page = it_map; - page = i; - out << "\\newpage" << endl; - out << endl; - if((i % 54) == 1) { - if(it_c_map1 != it_c_tmp_map) { - --it_c_map1; - } - help = 1; - break; - } - else { - ++it_c_map1;} - } - else - { - it_map = it_map_page; - i = page; - ++it_c_map1; - } - } - } - } - out << "\\newpage" << endl; - out << endl; - } - } - - return 0; -} - -/******************************************* P2P *******************************************/ - - /************************************ tex_p2p_values ************************************/ - -int tex_p2p_values(vector proc_vector, fstream& out, global_data* gd_ptr, - double ticks, uint32_t range, int type) -{ - P2P_Value p2p_value; - float red,green,blue; // color values for rgb - double min = (double)((uint64_t) - 1); - double max = 0.0; - double min_var = (double)((uint64_t) - 1); - double max_var = 0.0; - double value = 0.0; - double value_field[16][16][5];// 0 = value, 1 = min_value_local, 2 = max_value_local, 3 = count, 4 = value² - string proc_name1[16]; - string proc_name2[16]; - - for(int j = 0; j < 16; j++) - { - proc_name1[j].assign(" "); - proc_name2[j].assign(" "); - for(int k = 0; k < 16; k++) - { - for(int l = 0; l < 5; l++) - { - if(l == 1) - value_field[j][k][l] = (double)((uint64_t) - 1); - else - value_field[j][k][l] = 0.0; - } - } - } - vector::iterator it_vector = proc_vector.begin(); - vector::iterator it_vector2; - int count1 = 16; // 15 is the first process group in value field and 0 is the last - int count2 = 16; // 15 is the first process group in value field and 0 is the last - double j = 0.0; - double k = 0.0; - int help1 = (int) proc_vector.size(); - int help2; - - while(it_vector != proc_vector.end()) - { - for(j = 0.0; j < ((double) help1 / (double) count1); j++) - { - if(j == 0.0) - proc_name1[16 - count1].assign(gd_ptr->sum_container.get_Process_Def(1, *it_vector)); - it_vector2 = proc_vector.begin(); - count2 = 16; - help2 = (int) proc_vector.size(); - - while(it_vector2 != proc_vector.end()) - { - for(k = 0.0; k < ((double) help2 / (double) count2); k++) - { - - p2p_value = gd_ptr->sum_container.get_P2P(1, *it_vector, *it_vector2, 0, 0); - if((p2p_value.get_time() > 0) && (p2p_value.get_length() > 0)) - { - switch(type) - { - case P2P_AV_RAT : value = (double) p2p_value.get_length() / - ((double) p2p_value.get_time() / ticks);break; - case P2P_AV_DUR : value = ((double) p2p_value.get_time() / - (double) p2p_value.get_invoc()) / ticks;break; - case P2P_AV_LEN : value = (double) p2p_value.get_length() / - (double) p2p_value.get_invoc();break; - case P2P_SUM_DUR : value = (double) p2p_value.get_time() / ticks;break; - case P2P_SUM_LEN : value = (double) p2p_value.get_length();break; - default : cerr << "Error in tex_p2p_values().Wrong type." - << endl; - return 1; - } - } - else - value = 0.0; - value_field[16 - count1][16 - count2][3] += 1.0; - value_field[16 - count1][16 - count2][0] += value; - if(value_field[16 - count1][16 - count2][0] > 0.0) - { - value_field[16 - count1][16 - count2][1] = MINIMUM(value_field[16 - count1][16 - count2][1], value); - value_field[16 - count1][16 - count2][2] = MAXIMUM(value_field[16 - count1][16 - count2][2], value); - - if(gd_ptr->var) - { - value_field[16 - count1][16 - count2][4] += value * value; - } - min = MINIMUM(min, value); - max = MAXIMUM(max, value); - } - - ++it_vector2; - } - --it_vector2; - proc_name2[16 - count2].assign(gd_ptr->sum_container.get_Process_Def(1, *it_vector2)); - ++it_vector2; - help2 -= (int) k; - if(count2 < 1) - cerr << "Error in tex_p2p(). Wrong count2 value." << endl; - --count2; - } - - ++it_vector; - } - help1 -= (int) j; - if(count1 < 1) - cerr << "Error in tex_p2p(). Wrong count1 value." << endl; - --count1; - } - - switch(type) - { - case P2P_AV_RAT : out << "{\\Large \\bf P2P - Message Rate (average)}" << endl;break; - case P2P_AV_DUR : out << "{\\Large \\bf P2P - Message Duration (average)}" << endl;break; - case P2P_AV_LEN : out << "{\\Large \\bf P2P - Message Length (average)}" << endl;break; - case P2P_SUM_DUR : out << "{\\Large \\bf P2P - Message Duration (sum)}" << endl;break; - case P2P_SUM_LEN : out << "{\\Large \\bf P2P - Message Length (sum)}" << endl;break; - default : cerr << "Error in tex_p2p_values.Wrong type." << endl; return 1; - } - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - if(max <= 0.0) - { - out << "no values" << endl; - out << "\\newpage" << endl; - out << endl; - - return 0; - } - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{help}" << endl; - out << "\\psset{xunit=0.75,yunit=0.75}" << endl; - out << "\\begin{pspicture}(0,-3)(16,13)" << endl; - out << " \\psgrid[subgriddiv=0,%" << endl; - out << " griddots=5,%" << endl; - out << " gridlabels=0](0,-3)(16,13)" << endl; - - if(gd_ptr->var && (proc_vector.size() > 16)) - { - min = (double)((uint64_t) - 1); - max = 0; - for(int j = 0; j < 16; j++) - { - for(int k = 0; k < 16; k++) - { - if(value_field[j][k][0] > 0.0) - { - value_field[j][k][0] = value_field[j][k][0] / value_field[j][k][3]; - min = MINIMUM(min,value_field[j][k][0]); - max = MAXIMUM(max,value_field[j][k][0]); - - if(value_field[j][k][3] == 1.0) - { - value_field[j][k][4] = 0; - } - else - { - value_field[j][k][4] = (value_field[j][k][4] - (value_field[j][k][3] * - value_field[j][k][0] * value_field[j][k][0])) / - (value_field[j][k][3] - 1.0); - value_field[j][k][4] = sqrt(value_field[j][k][4]); - } - min_var = MINIMUM(min_var, value_field[j][k][4]); - max_var = MAXIMUM(max_var, value_field[j][k][4]); - } - } - } - } - for(int l = 0; l < 16; l++) - { - for(int m = 0; m < 16; m++) - { - if(value_field[l][m][0] > 0.0) - { - if(gd_ptr->var && (proc_vector.size() > 16)) - { - if(!gd_ptr->vis) - { - gd_ptr->sum_container.get_gray(min_var, max_var, value_field[l][m][4], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.73}{0.73}{" << (m + 0.5) << "}{" - << (12 - l + 0.5) << "}" << endl; - gd_ptr->sum_container.get_color(min, max, value_field[l][m][0], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.5}{0.5}{" << (m + 0.5) << "}{" - << (12 - l + 0.5) << "}" << endl; - } - else - { - gd_ptr->sum_container.get_color(min, max, value_field[l][m][0], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.73}{0.73}{" << (m + 0.5) << "}{" - << (12 - l + 0.5) << "}" << endl; - gd_ptr->sum_container.get_gray(min_var, max_var, value_field[l][m][4], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.25}{0.5}{" << (m + 0.75) << "}{" - << (12 - l + 0.5) << "}" << endl; - } - } - else if(!gd_ptr->vis) - { - gd_ptr->sum_container.get_color(min, max, value_field[l][m][2], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.73}{0.73}{" << (m + 0.5) << "}{" - << (12 - l + 0.5) << "}" << endl; - if(proc_vector.size() > 16) - { - gd_ptr->sum_container.get_color(min, max, value_field[l][m][1], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.58}{0.58}{" << (m + 0.5) << "}{" - << (12 - l + 0.5) << "}" << endl; - gd_ptr->sum_container.get_color(min, max, (value_field[l][m][0] / value_field[l][m][3]), red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.43}{0.43}{" << (m + 0.5) << "}{" - << (12 - l + 0.5) << "}" << endl; - } - } - else - { - gd_ptr->sum_container.get_color(min, max, (value_field[l][m][0] / value_field[l][m][3]), red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.73}{0.73}{" << (m + 0.5) << "}{" - << (12 - l + 0.5) << "}" << endl; - if(proc_vector.size() > 16) - { - gd_ptr->sum_container.get_color(min, max, value_field[l][m][1], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.25}{0.25}{" << (m + 0.75) << "}{" - << (12 - l + 0.25) << "}" << endl; - gd_ptr->sum_container.get_color(min, max, value_field[l][m][2], red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.25}{0.25}{" << (m + 0.75) << "}{" - << (12 - l + 0.75) << "}" << endl; - } - } - } - } - } - for(int l = 0; l < 16; l++) - { - if(proc_name1[l].size() > 15) - proc_name1[l].resize(15); - if(proc_vector.size() <= 16) - { - out << " \\rput[r](-0.2," << (12 - l + 0.5) << "){" << proc_name1[l] << "}" << endl; - out << " \\rput[l]{90}(" << (l + 0.5) << ",13.2){" << proc_name1[l] << "}" << endl; - } - else - { - if(proc_name2[l].size() > 15) - proc_name2[l].resize(15); - out << " \\rput[r](-0.2," << (12 - l + 0.8) << "){" << proc_name1[l] << "}" << endl; - out << " \\rput[r](-0.2," << (12 - l + 0.5) << "){-}" << endl; - out << " \\rput[r](-0.2," << (12 - l + 0.2) << "){" << proc_name2[l] << "}" << endl; - out << " \\rput[l]{90}(" << (l + 0.2) << ",13.2){" << proc_name1[l] << "}" << endl; - out << " \\rput[l]{90}(" << (l + 0.5) << ",13.2){-}" << endl; - out << " \\rput[l]{90}(" << (l + 0.8) << ",13.2){" << proc_name2[l] << "}" << endl; - l++; - } - } - out << endl; - /******************** color scale for P2P - value rate plot ********************/ - - double factor = (max - min) / (double) range; - double temp = min; - const char* scale_unit; - double unit; - j = 0.0; - out.precision(3); - if((type == P2P_AV_DUR) || (type == P2P_SUM_DUR)) - { - double scale_index = (max - min) / 2.0; - if(scale_index < 0.1) - { - unit = 1; - scale_unit = SECOND; - out.precision(6); - } - else if(scale_index < KILO) - { - unit = 1; - scale_unit = SECOND; - } - else if(scale_index < MEGA) - { - unit = KILO; - scale_unit = K_SECOND; - } - else if(scale_index < GIGA) - { - unit = MEGA; - scale_unit = M_SECOND; - } - else - { - unit = GIGA; - scale_unit = G_SECOND; - } - } - else - { - double scale_index = (max - min) / 2.0; - if(scale_index < KBYTE) - { - unit = _BYTE; - if(type == P2P_AV_RAT) - scale_unit = BYTE_SEC; - else - scale_unit = BYTE_TEXT; - } - else if(scale_index < MBYTE) - { - unit = KBYTE; - if(type == P2P_AV_RAT) - scale_unit = KBYTE_SEC; - else - scale_unit = KBYTE_TEXT; - } - else if(scale_index < GBYTE) - { - unit = MBYTE; - if(type == P2P_AV_RAT) - scale_unit = MBYTE_SEC; - else - scale_unit = MBYTE_TEXT; - } - else - { - unit = GBYTE; - if(type == P2P_AV_RAT) - scale_unit = GBYTE_SEC; - else - scale_unit = GBYTE_TEXT; - } - } - - for(uint32_t i = 1; i <= range; i++) - { - gd_ptr->sum_container.get_color(min, max, temp, red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.5}{0.5}{" << j << "}{" - << (-5) << "}" << endl; - out << " \\rput[r]{90}(" << j << "," << (-5.5) << "){" - << (temp / unit) << scale_unit << "}" << endl; - temp += factor; - j += 0.75; - if(min == max) - break; - } - - if(gd_ptr->var && (proc_vector.size() > 16)) - { - factor = (max_var - min_var) / (double) range; - temp = min_var; - j = 0.0; - - for(uint32_t i = 1; i <= range; i++) - { - gd_ptr->sum_container.get_gray(min_var, max_var, temp, red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.5}{0.5}{" << j << "}{" - << (-12) << "}" << endl; - out << " \\rput[r]{90}(" << j << "," << (-12.5) << "){" - << (temp / unit) << scale_unit << "}" << endl; - temp += factor; - j += 0.75; - if(min_var == max_var) - break; - } - } - if((proc_vector.size() > 16) && !gd_ptr->vis && !gd_ptr->var) - { - out << endl; - out << "\\PstDrawNode{1.000000}{0}{0}{1.46}{1.46}{0}{-11}" << endl; - out << "\\PstDrawNode{0}{0}{1.000000}{1.16}{1.16}{0}{-11}" << endl; - out << "\\PstDrawNode{0}{1}{0}{0.86}{0.86}{0}{-11}" << endl; - out << "\\psline{<-}(0.9,-10.4)(4,-10.4)" << endl; - out << "\\psline{<-}(0,-11.0)(4,-11)" << endl; - out << "\\psline{<-}(0.7,-11.6)(4,-11.6)" << endl; - out << "\\rput[l](4.5,-10.4){Maximum}" << endl; - out << "\\rput[l](4.5,-11){Average}" << endl; - out << "\\rput[l](4.5,-11.6){Minimum}" << endl; - } - if((proc_vector.size() > 16) && gd_ptr->vis && !gd_ptr->var) - { - out << endl; - out << "\\PstDrawNode{0}{1}{0}{1.46}{1.46}{0}{-11}" << endl; - out << "\\PstDrawNode{1.000000}{0}{0}{0.5}{0.5}{0.5}{-10.5}" << endl; - out << "\\PstDrawNode{0}{0}{1.000000}{0.5}{0.5}{0.5}{-11.5}" << endl; - out << "\\psline{<-}(0.7,-10.4)(4,-10.4)" << endl; - out << "\\psline{<-}(0,-11.0)(4,-11)" << endl; - out << "\\psline{<-}(0.7,-11.6)(4,-11.6)" << endl; - out << "\\rput[l](4.5,-10.4){Maximum}" << endl; - out << "\\rput[l](4.5,-11){Average}" << endl; - out << "\\rput[l](4.5,-11.6){Minimum}" << endl; - } - out << "\\end{pspicture}" << endl; - out << "\\end{help}" << endl; - out << endl; - out << "\\newpage" << endl; - out << endl; - - return 0; -} - - /**************************************** tex_p2p ****************************************/ - -int tex_p2p(fstream& out, global_data* gd_ptr, int tex, bool sum) -{ - string chart_name; - double ticks; - double value; - uint32_t i = 1; - uint32_t range = HUGE_TEX; //range of color-scale - float red,green,blue; // color values for rgb - - vector proc_vector; - vector bin1_vector; - vector bin2_vector; - P2P_Value p2p_value; - - /* - gd_ptr->var == false - 0 = value_sent, 1 = min_value_sent, 2 = max_value_sent, 3 = value_receive, - 4 = min_value_receive, 5 = max_value_receive, 6 = number of processes - gd_ptr->var == true and > 16 processes - 0 = value_sent, 1 = value_sent², 2 = value_sent_standard degression , 3 = value_receive, - 4 = value_receive², 5 = value_receive_standard degression, 6 = number of processes - */ - double invoc[16][7]; - double length[16][7]; - string proc_name1[16]; - string proc_name2[16]; - - for(int j = 0; j < 16; j++) - { - proc_name1[j].assign(" "); - proc_name2[j].assign(" "); - for(int k = 0; k < 7; k++) - { - if(((k == 1) || (k == 4)) && !gd_ptr->var) - { - invoc[j][k] = (double)((uint64_t) - 1); - length[j][k] = (double)((uint64_t) - 1); - } - else - { - invoc[j][k] = 0.0; - length[j][k] = 0.0; - } - } - } - - int count = 16; // 15 is the first process group in value field and 0 is the last - double j = 0.0; - int help; - - double invocation; - double min; - double max = 0.0; - double max2 = 0.0; - - gd_ptr->sum_container.get_Process_Def_Key(1, proc_vector); - ticks = (double) gd_ptr->sum_container.get_ticks(1); - if(ticks < 1.0) - { - ticks = 1.0; - cerr << "Error in tex_p2p. No ticks given for this trace." << endl; - } - - - /* assign processes to 16 bins */ - - vector::iterator it_vector = proc_vector.begin(); - help = (int) proc_vector.size(); - while(it_vector != proc_vector.end()) - { - - /* for all processes in current bin */ - for(j = 0.0; j < ((double) help / (double) count); j++) - { - - /* first process name in bin */ - if( 0.0 == j ) - { - proc_name1[16 - count].assign(gd_ptr->sum_container.get_Process_Def(1, *it_vector)); - } - - /* summ values to bin */ - - //send - p2p_value = gd_ptr->sum_container.get_P2P(1, *it_vector, 0, 0, 0); - value = (double) p2p_value.get_invoc(); - invoc[16 - count][0] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - invoc[16 - count][1] += value * value; - } - else - { - invoc[16 - count][1] = MINIMUM(invoc[16 - count][1], value); - invoc[16 - count][2] = MAXIMUM(invoc[16 - count][2], value); - } - max = MAXIMUM(max, value); - value = (double) p2p_value.get_length(); - length[16 - count][0] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - length[16 - count][1] += value * value; - } - else - { - length[16 - count][1] = MINIMUM(length[16 - count][1], value); - length[16 - count][2] = MAXIMUM(length[16 - count][2], value); - } - max2 = MAXIMUM(max2, value); - - //receive - - p2p_value = gd_ptr->sum_container.get_P2P(1, 0, *it_vector, 0, 0); - value = (double) p2p_value.get_invoc(); - invoc[16 - count][3] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - invoc[16 - count][4] += value * value; - } - else - { - invoc[16 - count][4] = MINIMUM(invoc[16 - count][4],value); - invoc[16 - count][5] = MAXIMUM(invoc[16 - count][5],value); - } - max = MAXIMUM(max, value); - value = (double) p2p_value.get_length(); - length[16 - count][3] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - length[16 - count][4] += value * value; - } - else - { - length[16 - count][4] = MINIMUM(length[16 - count][4], value); - length[16 - count][5] = MAXIMUM(length[16 - count][5], value); - } - - max2 = MAXIMUM(max2, value); - - invoc[16 - count][6] += 1.0; - length[16 - count][6] += 1.0; - ++it_vector; - } - - /* last process name in bin */ - - --it_vector; - proc_name2[16 - count].assign(gd_ptr->sum_container.get_Process_Def(1, *it_vector)); - ++it_vector; - - help -= (int) j; - if(count < 1) - cerr << "Error in tex_p2p(). Wrong count1 value." << endl; - --count; - } - - out.precision(2); - - if(max > 0) - { - chart_name = "Number of P2P Invocations (sum)"; - bar_data(out, invoc, gd_ptr->var, (uint32_t) proc_vector.size()); - bar_chart((uint64_t) max, out, proc_name1, proc_name2, (uint32_t) proc_vector.size(), chart_name, false, invoc, gd_ptr->var, sum); - } - - if(max2 > 0) - { - chart_name = "P2P Message Length (sum)[in Byte]"; - bar_data(out, length, gd_ptr->var, (uint32_t) proc_vector.size()); - bar_chart((uint64_t) max2, out, proc_name1, proc_name2, (uint32_t) proc_vector.size(), chart_name, true, length,gd_ptr->var, sum); - } - - - /* - ACHTUNG: 4-fach-Schleife in tex_p2p_values(...) --> kann bei hoher Anzahl an CPUs sehr langsam werden !!! - */ - /***************************** P2P - message value plots *****************************/ - - if(sum == false) { - if(tex == TEX_ALLPLOT) - { - for(i = P2P_AV_RAT; i < P2P_ALL; i++) - { - tex_p2p_values(proc_vector, out, gd_ptr, ticks, range, i); - } - } - else - tex_p2p_values(proc_vector, out, gd_ptr, ticks, range, P2P_AV_RAT); - - } - /*************************** P2P - message rate histogram ***************************/ - - if(sum == false) { - out.precision(6); - gd_ptr->sum_container.get_Bin1_Def_Key(1, bin1_vector); - gd_ptr->sum_container.get_Bin2_Def_Key(1, bin2_vector); - vector::iterator it_bin1_vector; - vector::iterator it_bin2_vector; - - map > InvocMap; - map >::iterator bin1_iter; - map::iterator bin2_iter; - - min = 0.0; - max = 0.0; - it_bin1_vector = bin1_vector.begin(); - - while(it_bin1_vector != bin1_vector.end()) - { - it_bin2_vector = bin2_vector.begin(); - while(it_bin2_vector != bin2_vector.end()) - { - p2p_value = gd_ptr->sum_container.get_P2P(1, 0, 0, *it_bin1_vector, *it_bin2_vector); - invocation = (double) p2p_value.get_invoc(); - InvocMap[*it_bin1_vector][*it_bin2_vector] = invocation; - if(invocation > 0.0) - { - if(min == 0.0) - min = invocation; - else - min = MINIMUM(invocation,min); - max = MAXIMUM(invocation,max); - } - ++it_bin2_vector; - } - ++it_bin1_vector; - } - - out.precision(2); - out << "{\\Large \\bf P2P - message rate histogram}" << endl; - if(max <= 0.0) - { - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << "no values" << endl; - out << "\\newpage" << endl; - out << endl; - - return 0; - } - else - { - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{help}" << endl; - out << "\\psset{xunit=0.5,yunit=0.25}" << endl; - out << "\\begin{pspicture}(25,33)" << endl; - out << " \\psgrid[subgriddiv=0,%" << endl; - out << " griddots=5,%" << endl; - out << " gridlabels=0,%" << endl; - out << " yunit=2](24,16)" << endl; - - it_bin1_vector = bin1_vector.begin(); - while(it_bin1_vector != bin1_vector.end()) - { - it_bin2_vector = bin2_vector.begin(); - while(it_bin2_vector != bin2_vector.end()) - { - /*p2p_value = gd_ptr->sum_container.get_P2P_new(1, 0, 0, *it_bin1_vector, *it_bin2_vector); - invocation = (double) p2p_value.get_invoc();*/ - invocation = InvocMap[*it_bin1_vector][*it_bin2_vector]; - if(invocation > 0.0) - { - gd_ptr->sum_container.get_color_gray(min, max, invocation, red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.48}{0.48}{" << (*it_bin1_vector - 0.5) << "}{" - << ((*it_bin2_vector * 2) - 1) << "}" << endl; - } - ++it_bin2_vector; - } - ++it_bin1_vector; - } - out << " \\psaxes[labels = no,axesstyle = axes,subticks=2,Ox=0,Oy=0,xylogBase=2,Dx=2,Dy=4]" - << "{->}(26,33)" - << endl; - for(i = 2; i <= 32; i += 2) - { - out << "\\psline{-}(-0.15," << (i - 1) << ")(0," << (i - 1) << ")" << endl; - if(i < 10) - { - out << " \\rput[r](-0.2," << (i - 1) << "){" << pow(2.0, (double) i) <<"}" << endl; - } - else if(i < 20) - { - out << " \\rput[r](-0.2," << (i - 1) << "){" << pow(2.0, (double) (i - 10)) << "K}"; - out << endl; - } - else if(i < 30) - { - out << " \\rput[r](-0.2," << (i - 1) << "){" << pow(2.0, (double) (i - 20)) << "M}"; - out << endl; - } - else - { - if(i == 32) - { - out << " \\rput[r](-0.2," << (i - 1) << "){$>$" << pow(2.0, (double) (i - 32)); - out << "G}" << endl; - } - else - { - out << " \\rput[r](-0.2," << (i - 1) << "){" << pow(2.0, (double) (i - 30)); - out << "G}" << endl; - } - } - } - for(i = 0; i <= 23; i++) - { - out << "\\psline{-}(" << ((double) i - 0.5) <<",-0.15)(" << ((double) i - 0.5); - out << ",0)" << endl; - if(i < 10) - { - out << " \\rput[r]{90}(" << ((double) i + 0.5) << ",-0.22,){"; - out << pow(2.0, (double) i) <<"}" << endl; - } - else if(i < 20) - { - out << " \\rput[r]{90}(" << ((double) i + 0.5) << ",-0.22,){"; - out << pow(2.0, (double) (i - 10)) << "K}" << endl; - } - else - { - if(i == 23) - { - out << " \\rput[r]{90}(" << ((double) i + 0.5) << ",-0.22,){$>$"; - out << pow(2.0, (double) (i - 21)) << "M}" << endl; - } - else - { - out << " \\rput[r]{90}(" << ((double) i + 0.5) << ",-0.22,){"; - out << pow(2.0, (double) (i - 20)) << "M}" << endl; - } - } - } - out << " \\rput{90}(-5,15){rate [byte/sec]}" << endl; - out << " \\rput{0}(13,-8){message length [byte]}" << endl; - - /******************* color scale for P2P - message rate histogram *******************/ - double factor = (max - min) / (double)range; - double temp = min; - for(i = 1; i <= range; i++) - { - gd_ptr->sum_container.get_color_gray(min, max, temp, red, green, blue); - out << " \\PstDrawNode{" << red << "}{" << green << "}{" << blue << "}" - << "{0.48}{0.48}{" << ((int) i - 1) << "}{" - << (-13) << "}" << endl; - out << " \\rput[r]{90}(" << ((int) i - 1) << "," << (-14.5) << "){" - << (uint64_t) temp << " \\#}" << endl; - temp += factor; - } - out << "\\end{pspicture}" << endl; - out << "\\end{help}" << endl; - out << endl; - out << "\\newpage" << endl; - out << endl; - out.precision(6); - } - - /**************************** P2P - message length histogram ****************************/ - max = 0.0; - it_bin1_vector = bin1_vector.begin(); - out << "\\def\\barData{"; - while(it_bin1_vector != bin1_vector.end()) - { - p2p_value = gd_ptr->sum_container.get_P2P(1, 0, 0, *it_bin1_vector, 0); - if(p2p_value.get_invoc() > 0) - { - out << endl; - invocation = log((double)p2p_value.get_invoc())/log(2.0); - max = MAXIMUM(invocation,max); - out << (*it_bin1_vector - 1) << " " << invocation; - } - ++it_bin1_vector; - } - - out << "}" << endl; - out << endl; - out << "{\\Large \\bf P2P - message length histogram}" << endl; - if(max <= 0.0) - { - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << "no values" << endl; - out << "\\newpage" << endl; - out << endl; - - return 0; - } - else - { - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{help}" << endl; - out << "\\psset{xunit=0.5,yunit=0.6}" << endl; - out << "\\begin{pspicture}(25," << ((uint64_t) max) + 2 << ")" << endl; - out << " \\psgrid[subgriddiv=0,%" << endl; - out << " griddots=5,%" << endl; - out << " gridlabels=0,%" << endl; - out << " yunit=1](24," << ((uint32_t) max) + 1 << ")" << endl; - out << " \\psaxes[labels = no,axesstyle = axes,Ox=0,Oy=0,xylogBase=2,Dx=2,Dy=2]{->}(25," - << ((uint32_t) max) + 2 << ")" << endl; - out << " \\listplot[shadow=false,linecolor=blue,plotstyle=bar,barwidth=0.2cm," << endl; - out << " fillcolor=red,fillstyle=solid]{\\barData}" << endl; - out.precision(2); - for(i = 0; i <= ((uint32_t) max + 1); i += 2) - { - out << "\\psline{-}(-0.15," << i << ")(0," << i << ")" << endl; - if(i < 10) - { - out << " \\rput[r](-0.4," << i << "){" << pow(2.0, (double) i) <<"}" << endl; - } - else if(i < 20) - { - out << " \\rput[r](-0.4," << i << "){" << pow(2.0, (double) i) / KILO<< "K}"; - out << endl; - } - else if(i < 30) - { - out << " \\rput[r](-0.4," << i << "){" << pow(2.0, (double) i) / MEGA<< "M}"; - out << endl; - } - else - { - out << " \\rput[r](-0.4," << i << "){" << pow(2.0, (double) i) / GIGA; - out << "G}" << endl; - } - } - for(i = 0; i <= 24; i += 2) - { - out << "\\psline{-}(" << i <<",-0.15)(" << i; - out << ",0)" << endl; - if(i < 10) - { - out << " \\rput[r]{90}(" << i << ",-0.22,){"; - out << pow(2.0, (double) i) <<"}" << endl; - } - else if(i < 20) - { - out << " \\rput[r]{90}(" << i << ",-0.22,){"; - out << pow(2.0, (double) (i - 10)) << "K}" << endl; - } - else - { - if(i == 23) - { - out << " \\rput[r]{90}(" << i << ",-0.22,){$>$"; - out << pow(2.0, (double) (i - 20)) << "M}" << endl; - } - else - { - out << " \\rput[r]{90}(" << i << ",-0.22,){"; - out << pow(2.0, (double) (i - 20)) << "M}" << endl; - } - } - } - out.precision(6); - out << " \\rput{90}(-5," << ((uint32_t) ((max + 1.0) / 2)) << "){invocation[\\#]}" - << endl; - out << " \\rput{0}(12,-4){message length[byte]}" << endl; - out << "\\end{pspicture}" << endl; - out << "\\end{help}" << endl; - out << endl; - } - - } //my - - return 0; -} - -/*********************************** Collective Operation ***********************************/ - -int tex_collop(fstream& out, global_data* gd_ptr, bool sum) -{ - string chart_name; - double ticks; - double value; - int coll_type[4] = {OTF_COLLECTIVE_TYPE_ONE2ALL, OTF_COLLECTIVE_TYPE_ALL2ONE, - OTF_COLLECTIVE_TYPE_ALL2ALL, OTF_COLLECTIVE_TYPE_BARRIER}; - - vector proc_vector; - CollOp_Value collop_value; - gd_ptr->sum_container.get_Process_Def_Key(1, proc_vector); - ticks = (double) gd_ptr->sum_container.get_ticks(1); - if(ticks < 1.0) - { - ticks = 1.0; - cerr << "Error in tex_collop. No ticks given for this trace." << endl; - } - - /* first dim. : 16 bins for processes - second dim. : 0 = one2all, 1 = all2one, 2 = all2all -> not used for barrier - third dim. : gd_ptr == false - 0 = value_sent, 1 = min_value_sent, 2 = max_value_sent, - 3 = value_receive, 4 = min_value_receive, - 5 = max_value_receive, 6 = number of processes - - gd_ptr == true - 0 = value_sent, 1 = value_sent², 2 = value_sent_standard degression, - 3 = value_receive, 4 = value_receive², - 5 = value_receive_standard degression, 6 = number of processes - - */ - double invoc[16][3][7]; - double length[16][3][7]; - double barrier[16][7]; - double help_invoc[16][7]; - double help_length[16][7]; - - string proc_name1[16]; - string proc_name2[16]; - - for(int j = 0; j < 16; j++) - { - proc_name1[j].assign(" "); - proc_name2[j].assign(" "); - - for(int l = 0; l < 3; l++) - { - for(int k = 0; k < 7; k++) - { - if(((k == 1) || (k == 4)) && !gd_ptr->var) - { - invoc[j][l][k] = (double)((uint64_t) - 1); - length[j][l][k] = (double)((uint64_t) - 1); - if(l == 0) - barrier[j][k] = (double)((uint64_t) - 1); - } - else - { - invoc[j][l][k] = 0.0; - length[j][l][k] = 0.0; - if(l == 0) - barrier[j][k] = 0.0; - } - } - } - } - - - int count = 16; // 15 is the first process group in value field and 0 is the last - double j = 0.0; - int help; - double time; - - /* 0 = one2all, 1 = all2one, 2 = all2all, 3 = barrier */ - double max1[4] = {0.0, 0.0, 0.0, 0.0}; - double max2[4] = {0.0, 0.0, 0.0, 0.0}; - - ticks = (double) gd_ptr->sum_container.get_ticks(1); - if(ticks < 1.0) - { - ticks = 1.0; - cerr << "Error in tex_p2p. No ticks given for this trace." << endl; - } - - - /* assign processes to 16 bins */ - - vector::iterator it_vector = proc_vector.begin(); - help = (int) proc_vector.size(); - while(it_vector != proc_vector.end()) - { - - /* for all processes in current bin */ - for(j = 0.0; j < ((double) help / (double) count); j++) - { - - /* first process name in bin */ - if( 0.0 == j ) - { - proc_name1[16 - count].assign(gd_ptr->sum_container.get_Process_Def(1, *it_vector)); - } - - /* summ values to bin */ - - for(int l = 0; l <= 2; l++) - { - //send - - collop_value = gd_ptr->sum_container.get_CollOpType(1, *it_vector, coll_type[l]); - value = (double) collop_value.get_invoc_send(); - invoc[16 - count][l][0] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - invoc[16 - count][l][1] += value * value; - } - else - { - invoc[16 - count][l][1] = MINIMUM(invoc[16 - count][l][1],value); - invoc[16 - count][l][2] = MAXIMUM(invoc[16 - count][l][2],value); - } - max1[l] = MAXIMUM(max1[l], value); - value = (double) collop_value.get_length_send(); - length[16 - count][l][0] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - length[16 - count][l][1] += value * value; - } - else - { - length[16 - count][l][1] = MINIMUM(length[16 - count][l][1],value); - length[16 - count][l][2] = MAXIMUM(length[16 - count][l][2],value); - } - max2[l] = MAXIMUM(max2[l], value); - - //receive - - value = (double) collop_value.get_invoc_receive(); - invoc[16 - count][l][3] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - invoc[16 - count][l][4] += value * value; - } - else - { - invoc[16 - count][l][4] = MINIMUM(invoc[16 - count][l][4],value); - invoc[16 - count][l][5] = MAXIMUM(invoc[16 - count][l][5],value); - } - max1[l] = MAXIMUM(max1[l], value); - value = (double) collop_value.get_length_receive(); - length[16 - count][l][3] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - length[16 - count][l][4] += value * value; - } - else - { - length[16 - count][l][4] = MINIMUM(length[16 - count][l][4],value); - length[16 - count][l][5] = MAXIMUM(length[16 - count][l][5],value); - } - max2[l] = MAXIMUM(max2[l], value); - invoc[16 - count][l][6] += 1.0; - length[16 - count][l][6] += 1.0; - } - collop_value = gd_ptr->sum_container.get_CollOpType(1, *it_vector, coll_type[3]); - value = (double) collop_value.get_invoc_send(); - barrier[16 - count][0] += value; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - barrier[16 - count][1] += value * value; - } - else - { - barrier[16 - count][1] = MINIMUM(barrier[16 - count][1],value); - barrier[16 - count][2] = MAXIMUM(barrier[16 - count][2],value); - } - time = (double) collop_value.get_time() / ticks; - barrier[16 - count][3] += time; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - barrier[16 - count][4] += time * time; - } - else - { - barrier[16 - count][4] = MINIMUM(barrier[16 - count][4], time); - barrier[16 - count][5] = MAXIMUM(barrier[16 - count][5], time); - } - barrier[16 - count][6] += 1.0; - max1[3] = MAXIMUM(max1[3], value); - max2[3] = MAXIMUM(max2[3], time); - - ++it_vector; - } - - /* last process name in bin */ - - --it_vector; - proc_name2[16 - count].assign(gd_ptr->sum_container.get_Process_Def(1, *it_vector)); - ++it_vector; - - help -= (int) j; - if(count < 1) - cerr << "Error in tex_collop(). Wrong count1 value." << endl; - --count; - } - - out.precision(2); - for(int l = 0; l <= 2; l++) - { - if(max1[l] == 0.0) - { - switch(l) - { - case 0 : chart_name = "ONE2ALL(sum)";break; - case 1 : chart_name = "ALL2ONE (sum)";break; - case 2 : chart_name = "ALL2ALL (sum)";break; - default: cerr << "Error in tex_collop.Wrong value in switch statement. " << endl; - } - continue; - } - for(int k = 0; k < 16; k++) - { - for(int m = 0; m < 7; m++) - { - help_invoc[k][m] = invoc[k][l][m]; - help_length[k][m] = length[k][l][m]; - } - } - switch(l) - { - case 0 : chart_name = "ONE2ALL Invocation (sum)";break; - case 1 : chart_name = "ALL2ONE Invocation (sum)";break; - case 2 : chart_name = "ALL2ALL Invocation (sum)";break; - default: cerr << "Error in tex_collop.Wrong value in switch statement. " << endl; - } - bar_data(out, help_invoc, gd_ptr->var, (uint32_t) proc_vector.size()); - bar_chart((uint64_t) max1[l], out, proc_name1, proc_name2, (uint32_t) proc_vector.size(), chart_name, false, help_invoc,gd_ptr->var, sum); - - switch(l) - { - case 0 : chart_name = "ONE2ALL Message Length (sum)[in Byte]";break; - case 1 : chart_name = "ALL2ONE Message Length (sum)[in Byte]";break; - case 2 : chart_name = "ALL2ALL Message Length (sum)[in Byte]";break; - default: cerr << "Error in tex_collop.Wrong value in switch statement. " << endl; - } - bar_data(out, help_length, gd_ptr->var, (uint32_t) proc_vector.size()); - bar_chart((uint64_t) max2[l], out, proc_name1, proc_name2, (uint32_t) proc_vector.size(), chart_name, true, help_length, gd_ptr->var, sum); - } - - /******************************* Barrier *******************************/ - - if(gd_ptr->var && proc_vector.size()) - { - double max_help[2] = {max1[3],max2[3]}; - if(gd_ptr->var && (proc_vector.size() > 16)) - { - double help_lok; - for(int k = 0; k < 16; k++) - { - if((barrier[k][0] > 0) && (barrier[k][6] > 1)) - { - - help_lok = barrier[k][0] / barrier[k][6]; - barrier[k][2] = (barrier[k][1] -( barrier[k][6] * help_lok * help_lok)) / (barrier[k][6] - 1.0); - barrier[k][2] = sqrt(barrier[k][2]); - max1[3] = MAXIMUM(max1[3], max_help[0] + barrier[k][2]); - } - else - { - barrier[k][2] = 0; - } - if((barrier[k][3] > 0) && (barrier[k][6] > 1)) - { - help_lok = barrier[k][3] / barrier[k][6]; - barrier[k][5] = (barrier[k][4] - (barrier[k][6] * help_lok * help_lok)) / - (barrier[k][6] - 1.0); - barrier[k][5] = sqrt(barrier[k][5]); - max2[3] = MAXIMUM(max2[3], max_help[1] + barrier[k][5]); - } - else - { - barrier[k][5] = 0; - } - } - } - } - /************************ Barrier Invocation ************************/ - double help_barrier; - double test_barrier; - float pos; - - if(!gd_ptr->var && (proc_vector.size() > 16)) - pos = 0.6f; - else - pos = 0.5f; - if(max1[3] == 0.0) - { - return 0; - } - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << "\\def\\bardataI" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(barrier[k][2] > 0) - out << (k + 0.5) << " " << (log(barrier[k][2])/log(2.0) + 1) << endl; - } - out << "}" << endl; - } - out << "\\def\\bardataII" << endl; - out << "{" << endl; - - for(int k = 0; k < 16; k++) - { - if(barrier[k][0] > 0) - { - help_barrier = barrier[k][0] / barrier[k][6]; - if(barrier[k][0] < 1) - out << (k + pos) << " " << help_barrier << endl; - else - out << (k + pos) << " " << (log(help_barrier)/log(2.0) + 1) << endl; - } - } - out << "}" << endl; - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << "\\def\\bardataIII" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(barrier[k][1] > 0) - out << (k + 0.8) << " " << (log(barrier[k][1])/log(2.0) + 1) << endl; - } - out << "}" << endl; - } - - out << "{\\Large \\bf Barrier Invocation [sum]}" << endl; - out << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{help}" << endl; - out << "\\psset{xunit=1,yunit=0.5}" << endl; - out << "\\begin{pspicture}(0,0)(16," << ((uint64_t) log(max1[3])/log(2.0) + 2) << ")" << endl; - out << " \\psaxes[labels=no,Oy=-1,ysubticks=2,ylogBase=2,Dy=2,ytickwidth=1pt," << endl; - out << " ysubtickwidth=1pt,xticksize=-1 " << ((uint64_t) log(max1[3])/log(2.0) + 2) - << ",yticksize=0 16,ysubticksize=1," << endl; - out << " yticklinestyle=dotted,ysubticklinestyle=dotted]{-}(0,0)(0,0)(16.1," - << ((uint64_t) log(max1[3])/log(2.0) + 2) << ")" << endl; - count = 1; - out << " \\rput[r](-0.2,0){0}" << endl; - uint64_t i; - for(i = 1; i <= ((uint64_t) max1[3])<<1; i <<= 1) - { - out << "\\psline{-}(-0.15," << count << ")(0," << count << ")" << endl; - if(i < KBYTE) - { - out << " \\rput[r](-0.2," << count << "){" << i <<"}" << endl; - } - else if(i < MBYTE) - { - out << " \\rput[r](-0.2," << count << "){" << (i / KILO) << "K}" << endl; - } - else if(i < GBYTE) - { - out << " \\rput[r](-0.2," << count << "){" << (i / MEGA) << "M}" << endl; - } - else - { - out << " \\rput[r](-0.2," << count << "){" << (i / GIGA) << "G}" << endl; - } - count += 1; - } - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.8," << endl; - out << " fillcolor=red,fillstyle=solid]{\\bardataI}" << endl; - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.6," << endl; - out << " fillcolor=green,fillstyle=solid]{\\bardataII}" << endl; - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.2," << endl; - out << " fillcolor=blue,fillstyle=solid]{\\bardataIII}" << endl; - } - else - { - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.8," << endl; - out << " fillcolor=green,fillstyle=solid]{\\bardataII}" << endl; - } - - if(gd_ptr->var && (proc_vector.size() > 16)) - { - for(int k = 0; k < 16; k++) - { - if(barrier[k][0] > 0) - { - help_barrier = barrier[k][0] / barrier[k][6]; - out << "\\psframe[fillstyle=solid,fillcolor=lightgray](" << (k + 0.4) << ","; - if(help_barrier < barrier[k][2]) - out << "0"; - else - { - if((test_barrier = help_barrier - barrier[k][2]) < 1) - out << test_barrier; - else - out << log(test_barrier)/log(2.0) + 1; - } - out << ")(" << (k + 0.6) << ","; - test_barrier = help_barrier + barrier[k][2]; - if(test_barrier < 1) - out << test_barrier; - else - out << log(test_barrier)/log(2.0) + 1; - out << ")" << endl; - } - } - } - - for(int k = 0; k < 16; k++) - { - if(proc_name1[k].size() > 15) - { - proc_name1[k].resize(15); - } - if(proc_vector.size() <= 16) - { - out << " \\rput[r]{90}(" << (k + 0.5) << ",-0.2){" << proc_name1[k] << "}" << endl; - } - else - { - if(proc_name2[k].size() > 15) - { - proc_name2[k].resize(15); - } - out << " \\rput[r]{90}(" << (k + 0.2) << ",-0.2){" << proc_name1[k] << "}" << endl; - out << " \\rput[r]{90}(" << (k + 0.5) << ",-0.2){-}" << endl; - out << " \\rput[r]{90}(" << (k + 0.8) << ",-0.2){" << proc_name2[k] << "}" << endl; - k++; - } - } - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=red,linecolor=black](0,-7){Y}" << endl; - out << " \\rput[l](0.2,-7.0){Maximum}" << endl; - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=green,linecolor=black](0,-7.7){Y}" << endl; - out << " \\rput[l](0.2,-7.7){Average}" << endl; - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=blue,linecolor=black](0,-8.4){Y}" << endl; - out << " \\rput[l](0.2,-8.4){Minimum}" << endl; - } - out << "\\end{pspicture}" << endl; - out << "\\end{help}" << endl; - out << "\\newpage" << endl; - - /************************ Barrier Duration ************************/ - - if(sum) return 0; - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << "\\def\\bardataI" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(barrier[k][5] > 0) - out << (k + 0.5) << " " << (log10(barrier[k][5] * MEGA) + 1) << endl; - } - out << "}" << endl; - } - out << "\\def\\bardataII" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(barrier[k][3] > 0) - { - help_barrier = barrier[k][3] / barrier[k][6] * MEGA; - if(help_barrier < 1) - out << (k + pos) << " " << help_barrier << endl; - else - out << (k + pos) << " " << (log10(help_barrier) + 1) << endl; - } - } - out << "}" << endl; - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << "\\def\\bardataIII" << endl; - out << "{" << endl; - for(int k = 0; k < 16; k++) - { - if(barrier[k][4] > 0) - out << (k + 0.8) << " " << (log10(barrier[k][4] * MEGA) + 1) << endl; - } - out << "}" << endl; - } - out << "{\\Large \\bf Barrier Duration (sum)[in seconds]}" << endl; - out << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << "\\bigskip" << endl; - out << endl; - out << "\\begin{help}" << endl; - out << "\\psset{xunit=1,yunit=0.5}" << endl; - out << "\\begin{pspicture}(0,0)(16," << ((uint64_t) log10(max2[3] * MEGA) + 3) << ")" << endl; - out << " \\psaxes[labels=no,Oy=-1,ysubticks=2,ylogBase=10,Dy=2,ytickwidth=1pt," << endl; - out << " ysubtickwidth=1pt,xticksize=-1 " - << ((uint64_t) log10(max2[3] * MEGA) + 3) - << ",yticksize=0 16,ysubticksize=1," << endl; - out << " yticklinestyle=dotted,ysubticklinestyle=dotted]{-}(0,0)(0,0)(16.1," - << ((uint64_t) log10(max2[3] * MEGA) + 3) << ")" << endl; - count = 1; - out << " \\rput[r](-0.2,0){0}" << endl; - double m; - for(m = 0.000001; m <= (max2[3]*100); m *= 10) - { - out << "\\psline{-}(-0.15," << count << ")(0," << count << ")" << endl; - if(m < 1) - { - switch(count) - { - case 1 : out.precision(6);break; - case 2 : out.precision(5);break; - case 3 : out.precision(4);break; - case 4 : out.precision(3);break; - case 5 : out.precision(2);break; - case 6 : out.precision(1);break; - } - out << " \\rput[r](-0.2," << count << "){" << m <<"}" << endl; - } - else if(m < KILO) - { - out << " \\rput[r](-0.2," << count << "){" << (uint64_t) m <<"}" << endl; - } - else if(m < MEGA) - { - out << " \\rput[r](-0.2," << count << "){" << (uint64_t) (m / KILO) << "K}" << endl; - } - else if(m < GIGA) - { - out << " \\rput[r](-0.2," << count << "){" << (uint64_t) (m / MEGA) << "M}" << endl; - } - else - { - out << " \\rput[r](-0.2," << count << "){" << (uint64_t) (m / GIGA) << "G}" << endl; - } - count += 1; - } - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.8," << endl; - out << " fillcolor=red,fillstyle=solid]{\\bardataI}" << endl; - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.6," << endl; - out << " fillcolor=green,fillstyle=solid]{\\bardataII}" << endl; - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.2," << endl; - out << " fillcolor=blue,fillstyle=solid]{\\bardataIII}" << endl; - } - else - { - out << " \\listplot[shadow=false,plotstyle=bar,barwidth=0.8," << endl; - out << " fillcolor=green,fillstyle=solid]{\\bardataII}" << endl; - } - - if(gd_ptr->var && (proc_vector.size() > 16)) - { - for(int k = 0; k < 16; k++) - { - if(barrier[k][3] > 0) - { - help_barrier = barrier[k][3] / barrier[k][6]; - out << "\\psframe[fillstyle=solid,fillcolor=lightgray](" << (k + 0.4) << ","; - if(help_barrier < barrier[k][5]) - out << "0"; - else - { - if((test_barrier = (help_barrier - barrier[k][5]) * MEGA) < 1) - out << test_barrier; - else - out << log10(test_barrier) + 1; - } - out << ")(" << (k + 0.6) << ","; - test_barrier = (help_barrier + barrier[k][5]) * MEGA; - if(test_barrier < 1) - out << test_barrier; - else - out << log10(test_barrier) + 1; - out << ")" << endl; - } - } - } - - for(int k = 0; k < 16; k++) - { - if(proc_name1[k].size() > 15) - { - proc_name1[k].resize(15); - } - if(proc_vector.size() <= 16) - { - out << " \\rput[r]{90}(" << (k + 0.5) << ",-0.2){" << proc_name1[k] << "}" << endl; - } - else - { - if(proc_name2[k].size() > 15) - { - proc_name2[k].resize(15); - } - out << " \\rput[r]{90}(" << (k + 0.2) << ",-0.2){" << proc_name1[k] << "}" << endl; - out << " \\rput[r]{90}(" << (k + 0.5) << ",-0.2){-}" << endl; - out << " \\rput[r]{90}(" << (k + 0.8) << ",-0.2){" << proc_name2[k] << "}" << endl; - k++; - } - } - if(!gd_ptr->var && (proc_vector.size() > 16)) - { - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=red,linecolor=black](0,-7){Y}" << endl; - out << " \\rput[l](0.2,-7.0){Maximum}" << endl; - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=green,linecolor=black](0,-7.7){Y}" << endl; - out << " \\rput[l](0.2,-7.7){Average}" << endl; - out << " \\fnode[framesize=0.2 0.2,fillstyle=solid,fillcolor=blue,linecolor=black](0,-8.4){Y}" << endl; - out << " \\rput[l](0.2,-8.4){Minimum}" << endl; - } - out << "\\end{pspicture}" << endl; - out << "\\end{help}" << endl; - - return 0; -} - -/***************************************** prod_tex *****************************************/ - -int prod_tex(int tex, global_data* gd_ptr, vector counter_names, bool sum) -{ - fstream out((gd_ptr->filename_path + "_result.tex").c_str(), ios::out | ios::trunc); - out.setf(ios::fixed, ios::floatfield); - out.precision(6); - - tex_header(out, gd_ptr); - - if((tex == TEX_ALL) || (tex == TEX_FUNC) || (tex == TEX_ALLPLOT)) - tex_func(out, gd_ptr, counter_names, sum); - - out << "\\newpage" << endl; - - if((tex == TEX_ALL) || (tex == TEX_P2P) || (tex == TEX_ALLPLOT)) - tex_p2p(out, gd_ptr, tex, sum); - - out << "\\newpage" << endl; - - if((tex == TEX_ALL) || (tex == TEX_COLLOP) || (tex == TEX_ALLPLOT)) - tex_collop(out, gd_ptr, sum); - - tex_foot(out); - out.close(); - - return 0; -} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Prodtex.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Prodtex.h deleted file mode 100644 index 3639da2765..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Prodtex.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#ifndef PRODTEX_H -#define PRODTEX_H - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - - -#include -#include -#include -#if defined(HAVE_UNISTD_H) && HAVE_UNISTD_H -# include -#endif - -#include "OTF_inttypes.h" - -#include "Handler.h" - - -using namespace std; - -typedef map CountMap2; - -struct func_temp -{ - uint32_t func_id; - uint64_t invoc; - double incl_time; - CountMap2 count_map2; -}; - -struct count_temp -{ - double min; - double max; -}; - -int prod_tex(int tex, global_data* gd_ptr, vector counter_names, bool sum); - -#endif /* PRODTEX_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Summary.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Summary.cpp deleted file mode 100644 index aaf1be9fa9..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Summary.cpp +++ /dev/null @@ -1,275 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#include "Summary.h" - - -using namespace std; - -/*********************** methodes of the class Process ***********************************/ - -/* fills the stack with functions, which have an enter event on this Process */ - -void Process::set_exclTime(uint32_t func, uint64_t time) -{ - exclt_str e_str; - e_str.func_id = func; - e_str.time = time; - e_str.diff_time = 0; - exclt_stack.push(e_str); -} - -/* returns the time, which has to differ from the leave time */ - -int Process::get_exclTime(uint32_t func, uint32_t process, uint64_t time, global_data* gd_ptr) -{ - if(time < gd_ptr->min_time) - { - exclt_stack.pop(); - return 0; - } - exclt_str e_str; - uint64_t incl_time; - uint64_t counter_incl_value, counter_excl_value; - - if ( exclt_stack.empty() == true) { - /* time for error output */ - /* more leave than enter records */ - cerr << "call stack on process " << process << " invalid: stray Leave " - << func << " at time stamp " << time << " == " << hex << time << dec << endl; - return 1; - } - - e_str = exclt_stack.top(); - if((e_str.func_id != func) && (func != 0)) - cerr << "\nFailure in the otf-file.The leaving function doesn't exist." << endl; - - ProcessCounterMap::iterator it_c; - Function_Def_Key f_def_key(1, e_str.func_id); - if(!gd_ptr->sum_container.find_Function(f_def_key)) - cerr << "\nError.Wrong function id saved on stack!" << endl; - - exclt_stack.pop(); - - incl_time = time - e_str.time; - if(!exclt_stack.empty()) - { - exclt_stack.top().diff_time += incl_time; - it_c = exclt_stack.top().p_counter_map.begin(); - ProcessCounterMap::iterator it_c_local; - while(it_c != exclt_stack.top().p_counter_map.end()) - { - it_c_local = e_str.p_counter_map.find(it_c->first); - if((it_c->second.valid == INVALID) || (it_c_local->second.valid == INVALID) - || (it_c_local == e_str.p_counter_map.end())) - { - it_c->second.valid = INVALID; - } - else - { - it_c->second.diff_value += it_c_local->second.second_value - - it_c_local->second.start_value; - } - ++it_c; - } - } - gd_ptr->sum_container.addvalues_Function(1,e_str.func_id, process, 1, - (incl_time - e_str.diff_time), incl_time); - - it_c = e_str.p_counter_map.begin(); - while(it_c != e_str.p_counter_map.end()) - { - if(it_c->second.second_value == 0 ) - gd_ptr->sum_container.addvalues_Counter(1, e_str.func_id, process, it_c->first, - 0, 0, INVALID); - else - { - counter_incl_value = it_c->second.second_value - it_c->second.start_value; - counter_excl_value = counter_incl_value - it_c->second.diff_value; - gd_ptr->sum_container.addvalues_Counter(1, e_str.func_id, process, - it_c->first,counter_excl_value, - counter_incl_value, VALID); - } - ++it_c; - } - return 0; -} - -int Process::set_counter(uint32_t counter, uint64_t time, uint64_t value, global_data* gd_ptr) -{ - if(exclt_stack.empty()) - return 0; - - ProcessCounterMap::iterator it_count = exclt_stack.top().p_counter_map.find(counter); - if(it_count == exclt_stack.top().p_counter_map.end()) - { - count_str c_str; - if(exclt_stack.top().time != time) - { - c_str.start_value = 0; - c_str.second_value = 0; - c_str.diff_value = 0; - c_str.start_time = 0; - c_str.second_time = 0; - c_str.valid = INVALID; - } - else - { - c_str.start_value = value; - c_str.second_value = 0; - c_str.diff_value = 0; - c_str.start_time = time; - c_str.second_time = 0; - c_str.valid = VALID; - } - exclt_stack.top().p_counter_map.insert(pair(counter,c_str)); - } - else - { - if(it_count->second.valid == VALID) - { - it_count->second.second_value = value; - it_count->second.second_time = time; - } - } - return 0; -} - -/* counts the events of the different collective operations */ - -void Process::set_data_collective(uint32_t process, uint32_t collop, uint32_t type, bool root, - uint32_t procGroup, uint32_t sent, uint32_t received, - uint64_t duration, global_data* gd_ptr) -{ - - /* completely ignore in lite mode */ - if ( lite ) return; - - - uint32_t root_sent = 0; - uint32_t root_received = 0; - - if(type == OTF_COLLECTIVE_TYPE_BARRIER) - { - gd_ptr->sum_container.addvalues_CollOp(1, process, collop, 1, 0, 0, 0, duration); - } - else if(type == OTF_COLLECTIVE_TYPE_ALL2ALL) - { - root_sent = sent / gd_ptr->p_group_map[procGroup]; - root_received = received / gd_ptr->p_group_map[procGroup]; - gd_ptr->sum_container.addvalues_CollOp(1, process, collop, 1, 1, root_sent, root_received, duration); - } - else if(type == OTF_COLLECTIVE_TYPE_ONE2ALL) - { - if(root) { - root_sent = sent / gd_ptr->p_group_map[procGroup]; - gd_ptr->sum_container.addvalues_CollOp(1, process, collop, 1, 1, root_sent, received, duration); - } - else { - gd_ptr->sum_container.addvalues_CollOp(1, process, collop, 0, 1, sent, received, duration); - } - } - else if(type == OTF_COLLECTIVE_TYPE_ALL2ONE) - { - if(root) { - gd_ptr->sum_container.addvalues_CollOp(1, process, collop, 1, 1, sent, received, duration); - } else { - gd_ptr->sum_container.addvalues_CollOp(1, process, collop, 1, 0, sent, received, duration); - } - } - else - cerr << "\nError in otf-file, unknowen type in collective event." << endl; -} - - -/* collects the necessary information to calculate Mbyte per second by a send event*/ - -void Process::set_mbyte_per_sec( uint32_t sender, uint32_t receiver, uint64_t time, uint32_t tag, - bool valid_loc, global_data* gd_ptr ) -{ - - /* completely ignore in lite mode */ - if ( lite ) return; - - /* what is this good for again ???? */ - - receive_str r_str; - - r_str.valid = valid_loc; - r_str.receiver = receiver; - r_str.start_time = time; - r_str.comm_tag = tag; - timelist.push_back(r_str); - - list::iterator send_iter; -} - - -/* collects the necessary information to calculate Mbyte per second by a receive event*/ - -void Process::get_mbyte_per_sec(uint32_t sender, uint32_t receiver, uint64_t time, - uint64_t length, uint32_t tag, global_data* gd_ptr) -{ - - /* completely ignore in lite mode */ - if ( lite ) return; - - send_str s_str; - s_str.sender = sender; - s_str.comm_tag = tag; - s_str.end_time = time; - s_str.length = length; - - recv_map[sender].push_back(s_str); -} - - -void Process::calc_mbyte_per_sec(uint32_t sender, map& proc_map, uint32_t* cpu2thread ,global_data** data_ptr) { - list::iterator send_iter; - list::iterator tmp_iter; - list::iterator recv_iter; - - - /* completely ignore in lite mode */ - if ( lite ) return; - - - uint64_t dur; - uint32_t bin_1, bin_2; - - Process *proc; - for(send_iter = timelist.begin(); send_iter != timelist.end(); ++send_iter) { - proc = &data_ptr[cpu2thread[ proc_map[send_iter->receiver]] ]->p_map[send_iter->receiver]; - for(recv_iter = proc->recv_map[sender].begin(); recv_iter != proc->recv_map[sender].end(); ++recv_iter) - { - if( (recv_iter->sender == sender) && (recv_iter->comm_tag == send_iter->comm_tag) ){ - if(send_iter->valid) { - if(recv_iter->end_time <= send_iter->start_time) { - proc->recv_map[sender].erase(recv_iter); - --send_iter; - cerr << "Error, no time entry / no tag found by calculating MByte per second.\n" << endl; - break; - } - dur = recv_iter->end_time - send_iter->start_time; - bin_1 = data_ptr[0]->sum_container.get_bin_1(recv_iter->length); - bin_2 = data_ptr[0]->sum_container.get_bin_2((double) recv_iter->length / ((double) dur / - (double) data_ptr[0]->ticks)); - - data_ptr[cpu2thread[proc_map[sender]]]->sum_container.addvalues_P2P(1, sender, send_iter->receiver, - bin_1, bin_2, 1, recv_iter->length, dur); - proc->recv_map[sender].erase(recv_iter); - } else { - proc->recv_map[sender].erase(recv_iter); - } - tmp_iter = send_iter; - --send_iter; - timelist.erase(tmp_iter); - break; - } - } - } - -} - diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Summary.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Summary.h deleted file mode 100644 index 76113b99d0..0000000000 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/Summary.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier -*/ - -#ifndef SUMMARY_H -#define SUMMARY_H - -#include -#include -#include -#include -#include -#include - - -#include "OTF_inttypes.h" -#include "otf.h" - -#include "Definitions.h" -#include "DataStructure.h" - -using namespace std; - - -/* global variable to switch operation mode, see '-lite' command line switch */ -extern bool lite; - - -struct global_data; - - -/******************************** class Process **************************************/ - -/* This class summarizes informations about a Process. - Informations are: -Point2Point: send and receive - -Collective operations: barrier, broadcast, - all2one and all2all - -Process identifier and Process name - -A stack which contains all entered functions - without an leaving event.This is useful to - determine the exclusive time of a function.*/ - -class Process -{ - public: - Process(); - void set_proc_start(uint64_t time); - void set_proc_end(uint64_t time); - void set_exclTime(uint32_t func, uint64_t time); - void set_mbyte_per_sec(uint32_t sender, uint32_t receiver, uint64_t time, uint32_t tag, bool valid_loc, - global_data* gd_ptr); - int set_counter(uint32_t counter, uint64_t time, uint64_t value, global_data* gd_ptr); - void set_data_collective(uint32_t process, uint32_t collop, uint32_t type, bool root, - uint32_t procGroup, uint32_t sent, uint32_t received, - uint64_t duration, global_data* gd_ptr); - uint64_t get_proc_start(); - uint64_t get_proc_end(); - int get_exclTime(uint32_t func, uint32_t process, uint64_t time, global_data* gd_ptr); - void get_mbyte_per_sec(uint32_t sender, uint32_t receiver, uint64_t time, - uint64_t length, uint32_t tag, global_data* gd_ptr); - bool get_stack_status(); - uint32_t get_stack_top_func_id(); - void calc_mbyte_per_sec(uint32_t sender, map& proc_map, uint32_t* cpu2thread, - global_data** data_ptr); - void clear_recv_map(int i); - - private: - - bool proc_set; - uint64_t proc_start; - uint64_t proc_end; - - // attributes to get the exclusive time and count - struct count_str - { - uint64_t start_value; - uint64_t second_value; - uint64_t diff_value; - uint64_t start_time; - uint64_t second_time; - bool valid; - }; - typedef map ProcessCounterMap; - struct exclt_str - { - uint32_t func_id; - uint64_t time; - uint64_t diff_time; - ProcessCounterMap p_counter_map; - }; - stack exclt_stack; - - // attribute to get MByte per second - struct receive_str - { - uint32_t receiver; - uint32_t comm_tag; - uint64_t start_time; - bool valid; //necessary by time interval - }; - list timelist; - - struct send_str - { - uint32_t sender; - uint32_t comm_tag; - uint64_t end_time; - uint32_t length; - }; - - /* not used in lite mode */ - map > recv_map; -}; - - -/* *** more types *** */ - -typedef map ProcessMap; -typedef map ProcessGroupMap; // - -struct global_data -{ - string filename; - string filename_path; - string creator; - string version; - bool clear_temp; - bool prog; - int TOP_FUNC; - bool vis; //form of visualization - bool var; // show variance instaed of average - uint64_t prog_start; - uint64_t prog_end; - uint64_t min_time; - uint64_t max_time; - uint64_t ticks; - uint32_t num_cpu; - ProcessMap p_map; - ProcessGroupMap p_group_map; - Summary_Container sum_container; -}; - - - -/* *** inline methods *** */ - - -inline Process::Process() -{ - proc_start = 0; - proc_end = 0; - proc_set = false; -} - -inline void Process::set_proc_start(uint64_t ps) -{ - if(!proc_set) - { - proc_start = ps; - proc_set = true; - } -} - -inline void Process::set_proc_end(uint64_t pe) -{ - proc_end = pe; -} - -inline bool Process::get_stack_status() -{ - return exclt_stack.empty(); -} - -inline uint32_t Process::get_stack_top_func_id() -{ - return exclt_stack.top().func_id; -} - -inline uint64_t Process::get_proc_start() -{ - return proc_start; -} - -inline uint64_t Process::get_proc_end() -{ - return proc_end; -} - -inline void Process::clear_recv_map(int i) -{ - - recv_map[i].clear(); -} - -#endif /* SUMMARY_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.cpp new file mode 100644 index 0000000000..b34af45d26 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.cpp @@ -0,0 +1,187 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "OTF_Platform.h" + +#include "clustering.h" +#include "comparison.h" +#include "otfprofile.h" + + +using namespace std; + + +bool ProcessClustering( AllData& alldata ) { + + bool error= false; + + do { + + ostringstream map_data; + + /* start runtime measurement for process comparison */ + StartMeasurement( alldata, 1, true, "process comparison" ); + + /* compare processes */ + + if ( CLUSTER_ALG_CLINKAGE == alldata.params.clustering.alg ) { + + VerbosePrint( alldata, 1, true, + "comparing processes using CLINKAGE\n" ); + + error= !ProcessComparisonCLINKAGE( alldata, map_data ); + + } else { /* CLUSTER_ALG_KMEANS == alldata.params.clustering.alg */ + + VerbosePrint( alldata, 1, true, + "comparing processes using KMEANS\n" ); + + error= !ProcessComparisonKMEANS( alldata, map_data ); + } + +#ifdef OTFPROFILE_MPI + /* synchronize error indicator with workers */ + if ( SyncError( alldata, error ) ) { + + break; + } +#endif /* OTFPROFILE_MPI */ + + /* stop runtime measurement for process comparison */ + StopMeasurement( alldata, true, "process comparison" ); + + /* check for process comparison result */ + + char have_map_data= (char)( 0 < map_data.str().length() ); + +#ifdef OTFPROFILE_MPI + MPI_Bcast( &have_map_data, 1, MPI_CHAR, 0, MPI_COMM_WORLD ); +#endif /* OTFPROFILE_MPI */ + + if ( !have_map_data ) { + + if ( 0 == alldata.myRank ) { + + cerr << "WARNING: Process comparison did not give any results." + << endl; + } + + break; + } + + /* the master creates the process mapping file and (if desired) applies + it to otfshrink */ + + if ( 0 == alldata.myRank ) { + + /* open process mapping file */ + + ofstream map_file( + alldata.params.clustering.map_file_name.c_str() ); + if ( !map_file ) { + + cerr << "ERROR: Unable to open file '" + << alldata.params.clustering.map_file_name + << "' for writing." << endl; + + error= true; + + } else { + + /* write mapping data to file */ + map_file << map_data.str(); + + /* close process mapping file */ + map_file.close(); + + VerbosePrint( alldata, 2, true, " created file: %s\n", + alldata.params.clustering.map_file_name.c_str() ); + + if ( alldata.params.clustering.shrink ) { + + /* call otfshrink with created process mapping file */ + + /* start runtime measurement for shrinking input trace */ + StartMeasurement( alldata, 1, false, "shrink input trace" ); + + VerbosePrint( alldata, 1, true, "shrinking input trace\n" ); + + /* composing command */ + + char cmd[1024]; + + snprintf( cmd, sizeof( cmd ) - 1, + "otfshrink -i %s -o %s -f %s", + alldata.params.input_file_prefix.c_str(), + alldata.params.clustering.shrink_output_prefix.c_str(), + alldata.params.clustering.map_file_name.c_str() ); + + /* run command */ + + VerbosePrint( alldata, 2, true, " running command: %s\n", + cmd ); + + int rc= system( cmd ); + + /* evaluate exit status */ + + int es= ( -1 != rc ) ? WEXITSTATUS( rc ) : 0; + + /* command could not be executed; print warning message */ + if ( -1 == rc || 127 == es ) { + + ostringstream warn_msg; + + warn_msg << "Warning: Could not execute command '" + << cmd << "'"; + + if ( -1 == rc ) { + + warn_msg << " (" << strerror( errno ) << ")"; + } + + warn_msg << "." << endl + << "Try to run this command manually in terminal " + << "to shrink the input trace."; + + cerr << warn_msg.str() << endl; + + /* command executed, but failed; abort */ + } else if ( 0 != es ) { + + cerr << "ERROR: Could not shrink input trace '" + << alldata.params.input_file_prefix << "' to '" + << alldata.params.clustering.shrink_output_prefix + << "'. otfshrink returned with exit code " << es + << "." << endl; + error= true; + + /* command executed successfully */ + } else { /* es == 0 */ + + /* stop runtime measurement for shrinking input trace */ + StopMeasurement( alldata, false, "shrink input trace" ); + } + } + } + } + +#ifdef OTFPROFILE_MPI + /* broadcast error indicator to workers */ + SyncError( alldata, error, 0 ); +#endif /* OTFPROFILE_MPI */ + + } while( false ); + + return !error; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.h new file mode 100644 index 0000000000..fde289348c --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/clustering.h @@ -0,0 +1,17 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#ifndef CLUSTERING_H +#define CLUSTERING_H + + +#include "datastructs.h" + + +/* do additional process clustering */ +bool ProcessClustering( AllData& alldata ); + + +#endif /* CLUSTERING_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.cpp similarity index 76% rename from ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.cpp rename to ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.cpp index be43747fad..62e0c858a8 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.cpp @@ -3,8 +3,6 @@ Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz */ -using namespace std; - #include #include #include @@ -16,7 +14,10 @@ using namespace std; #include "otfaux.h" #include "collect_data.h" -#include "otfprofile-mpi.h" +#include "otfprofile.h" + + +using namespace std; static void prepare_progress( AllData& alldata, uint64_t max_bytes ) { @@ -25,6 +26,8 @@ static void prepare_progress( AllData& alldata, uint64_t max_bytes ) { progress.cur_bytes= 0; progress.max_bytes= max_bytes; + +#ifdef OTFPROFILE_MPI progress.ranks_left= alldata.numRanks -1; if ( 1 < alldata.numRanks ) { @@ -79,6 +82,7 @@ static void prepare_progress( AllData& alldata, uint64_t max_bytes ) { MPI_Barrier( MPI_COMM_WORLD ); } +#endif /* OTFPROFILE_MPI */ if ( 0 == alldata.myRank ) { @@ -99,6 +103,7 @@ static void update_progress( AllData& alldata, uint64_t delta_bytes, uint64_t sum_cur_bytes= progress.cur_bytes; +#ifdef OTFPROFILE_MPI if ( 1 < alldata.numRanks ) { if ( 0 == alldata.myRank ) { @@ -181,6 +186,7 @@ static void update_progress( AllData& alldata, uint64_t delta_bytes, } } +#endif /* OTFPROFILE_MPI */ if ( 0 == alldata.myRank ) { @@ -203,6 +209,7 @@ static void update_progress( AllData& alldata, uint64_t delta_bytes, static void finish_progress( AllData& alldata ) { +#ifdef OTFPROFILE_MPI Progress& progress= alldata.progress; if ( 1 < alldata.numRanks ) { @@ -217,6 +224,17 @@ static void finish_progress( AllData& alldata ) { update_progress( alldata, 0, true ); } + /* ensure that all requests are inactive before freeing memory */ + MPI_Waitall( alldata.numRanks - 1, progress.recv_requests, + progress.recv_statuses ); + + /* free memory */ + delete [] progress.recv_buffers; + delete [] progress.recv_requests; + delete [] progress.recv_statuses; + delete [] progress.recv_indices; + delete [] progress.rank_cur_bytes; + } else { /* 0 != alldata.myRank */ MPI_Status status; @@ -236,6 +254,7 @@ static void finish_progress( AllData& alldata ) { } } +#endif /* OTFPROFILE_MPI */ if ( 0 == alldata.myRank ) { @@ -243,21 +262,6 @@ static void finish_progress( AllData& alldata ) { printf( "%7.2f %% done\n", 100.0 ); } - - if( 1 < alldata.numRanks && 0 == alldata.myRank ) { - - /* ensure that all requests are inactive before freeing memory */ - MPI_Waitall( alldata.numRanks - 1, progress.recv_requests, - progress.recv_statuses ); - - /* free memory */ - delete [] progress.recv_buffers; - delete [] progress.recv_requests; - delete [] progress.recv_statuses; - delete [] progress.recv_indices; - delete [] progress.rank_cur_bytes; - - } } @@ -480,7 +484,7 @@ static int handle_leave( void* fha, uint64_t time, uint32_t function, uint64_t counter_incl= lastvalue - firstvalue; uint64_t counter_excl= counter_incl - it->second.childDelta; - alldata->counterMapPerFunctionRank[ Triple( counter, func, process ) ] + alldata->counterMapPerFunctionRank[ Triple( process, func, counter ) ] .add( 1, counter_excl, counter_incl ); if ( parent_it != stack.rend() ) { @@ -500,7 +504,7 @@ static int handle_leave( void* fha, uint64_t time, uint32_t function, cerr << " func " << func << " @ process " << process << ": " << "excl " << excl << " ticks, incl " << incl << " ticks" << endl; */ - alldata->functionMapPerRank[ Pair( func, process ) ].add( 1, excl, incl ); + alldata->functionMapPerRank[ Pair( process, func ) ].add( 1, excl, incl ); return OTF_RETURN_OK; } @@ -568,13 +572,19 @@ static int handle_send( void* fha, uint64_t time, uint32_t sender, if ( 0 == OTF_KeyValueList_getUint64( kvlist, alldata->recvTimeKey, &recv_time ) ) { - duration= (double) ( recv_time - time ); + /* ignore "backward-running" messages */ + if( recv_time > time ) { + + duration= (double) ( recv_time - time ); + + } } } alldata->messageMapPerRankPair[ Pair(sender, receiver) ] .add_send( 1, length, duration ); + alldata->messageMapPerRank[ sender ].add_send( 1, length, duration ); /* get message speed */ @@ -608,6 +618,7 @@ static int handle_recv( void* fha, uint64_t time, uint32_t receiver, alldata->messageMapPerRankPair[ Pair(receiver, sender) ] .add_recv( 1, length, duration ); + alldata->messageMapPerRank[ receiver ].add_recv( 1, length, duration ); return OTF_RETURN_OK; } @@ -655,22 +666,22 @@ static int handle_end_collop( void* fha, uint64_t time, uint32_t process, if ( OTF_COLLECTIVE_TYPE_BARRIER == op_class ) { - alldata->collectiveMapPerRank[ Pair( op_class, process ) ] + alldata->collectiveMapPerRank[ Pair( process, op_class ) ] .add_send( 1, 0, duration ); - alldata->collectiveMapPerRank[ Pair( op_class, process ) ] + alldata->collectiveMapPerRank[ Pair( process, op_class ) ] .add_recv( 1, 0, duration ); } else { if ( 0 < pending.bytes_send ) { - alldata->collectiveMapPerRank[ Pair( op_class, process ) ] + alldata->collectiveMapPerRank[ Pair( process, op_class ) ] .add_send( 1, pending.bytes_send, duration ); } if ( 0 < pending.bytes_recv ) { - alldata->collectiveMapPerRank[ Pair( op_class, process ) ] + alldata->collectiveMapPerRank[ Pair( process, op_class ) ] .add_recv( 1, pending.bytes_recv, duration ); } @@ -706,7 +717,7 @@ static int handle_function_summary( void* fha, uint64_t time, uint32_t func, tmp.incl_time.sum = inclTime; tmp.incl_time.min = tmp.incl_time.max = 0; - alldata->functionMapPerRank[ Pair( func, process ) ]= tmp; + alldata->functionMapPerRank[ Pair( process, func ) ]= tmp; return OTF_RETURN_OK; } @@ -750,7 +761,7 @@ static int handle_message_summary( void* fha, uint64_t time, uint32_t process, } alldata->messageMapPerRankPair[ Pair(process, peer) ]= tmp; - + alldata->messageMapPerRank[ process ]= tmp; } return OTF_RETURN_OK; @@ -803,7 +814,7 @@ static int handle_collop_summary( void* fha, uint64_t time, uint32_t process, } - alldata->collectiveMapPerRank[ Pair( op_class, process ) ]= tmp; + alldata->collectiveMapPerRank[ Pair( process, op_class ) ]= tmp; } @@ -886,9 +897,10 @@ static bool read_definitions( AllData& alldata, OTF_Reader* reader ) { } +#ifdef OTFPROFILE_MPI static void share_definitions( AllData& alldata ) { - MPI_Barrier( MPI_COMM_WORLD ); + assert( 1 < alldata.numRanks ); char* buffer; int buffer_size= 0; @@ -898,12 +910,59 @@ static void share_definitions( AllData& alldata ) { if ( 0 == alldata.myRank ) { - MPI_Pack_size( 1 + alldata.collectiveOperationsToClasses.size() * 2 + + int size; + + MPI_Pack_size( 1 + alldata.functionIdNameMap.size() * 2 + + 1 + alldata.counterIdNameMap.size() * 2 + + 1 + alldata.collectiveOperationsToClasses.size() * 2 + 1 + alldata.countersOfInterest.size() + 1 /* timerResolution */ + 1 /* recvTimeKey */, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &buffer_size ); + /* functionIdNameMap seconds */ + for ( map< uint64_t, string >::const_iterator it = + alldata.functionIdNameMap.begin(); + it != alldata.functionIdNameMap.end(); it++ ) { + + MPI_Pack_size( it->second.length() +1, MPI_CHAR, + MPI_COMM_WORLD, &size ); + buffer_size += size; + + } + + /* counterIdNameMap seconds */ + for ( map< uint64_t, string >::const_iterator it = + alldata.counterIdNameMap.begin(); + it != alldata.counterIdNameMap.end(); it++ ) { + + MPI_Pack_size( it->second.length() +1, MPI_CHAR, + MPI_COMM_WORLD, &size ); + buffer_size += size; + + } + + /* get size of additional definitions needed for CSV creation */ + if ( alldata.params.create_csv ) { + + /* processIdNameMap.size() + firsts */ + MPI_Pack_size( 1 + alldata.processIdNameMap.size() * 2, + MPI_LONG_LONG_INT, MPI_COMM_WORLD, &size ); + buffer_size += size; + + /* processIdNameMap seconds */ + for ( map< uint64_t, string >::const_iterator it = + alldata.processIdNameMap.begin(); + it != alldata.processIdNameMap.end(); it++ ) { + + MPI_Pack_size( it->second.length() +1, MPI_CHAR, + MPI_COMM_WORLD, &size ); + buffer_size += size; + + } + + } + } /* broadcast buffer size */ @@ -917,6 +976,64 @@ static void share_definitions( AllData& alldata ) { if ( 0 == alldata.myRank ) { + /* functionIdNameMap.size() */ + uint64_t funcid_name_map_size= alldata.functionIdNameMap.size(); + MPI_Pack( &funcid_name_map_size, 1, MPI_LONG_LONG_INT, buffer, + buffer_size, &buffer_pos, MPI_COMM_WORLD ); + + /* functionIdNameMap */ + for ( map< uint64_t, string >::const_iterator it = + alldata.functionIdNameMap.begin(); + it != alldata.functionIdNameMap.end(); it++ ) { + + /* functionIdNameMap.first */ + uint64_t first= it->first; + MPI_Pack( &first, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* functionIdNameMap.second.length() */ + uint64_t second_length= it->second.length() +1; + MPI_Pack( &second_length, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* functionIdNameMap.second */ + char* second= strdup( it->second.c_str() ); + assert( second ); + MPI_Pack( second, second_length, MPI_CHAR, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + free( second ); + + } + + /* counterIdNameMap.size() */ + uint64_t cntrid_name_map_size= alldata.counterIdNameMap.size(); + MPI_Pack( &cntrid_name_map_size, 1, MPI_LONG_LONG_INT, buffer, + buffer_size, &buffer_pos, MPI_COMM_WORLD ); + + /* counterIdNameMap */ + for ( map< uint64_t, string >::const_iterator it = + alldata.counterIdNameMap.begin(); + it != alldata.counterIdNameMap.end(); it++ ) { + + /* counterIdNameMap.first */ + uint64_t first= it->first; + MPI_Pack( &first, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* counterIdNameMap.second.length() */ + uint64_t second_length= it->second.length() +1; + MPI_Pack( &second_length, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* counterIdNameMap.second */ + char* second= strdup( it->second.c_str() ); + assert( second ); + MPI_Pack( second, second_length, MPI_CHAR, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + free( second ); + + } + /* collectiveOperationsToClasses.size() */ uint64_t collop_classes_map_size= alldata.collectiveOperationsToClasses.size(); @@ -963,6 +1080,40 @@ static void share_definitions( AllData& alldata ) { MPI_Pack( &(alldata.timerResolution), 1, MPI_LONG_LONG_INT, buffer, buffer_size, &buffer_pos, MPI_COMM_WORLD ); + /* pack additional definitions needed for CSV creation */ + if ( alldata.params.create_csv ) { + + /* processIdNameMap.size() */ + uint64_t procid_name_map_size= alldata.processIdNameMap.size(); + MPI_Pack( &procid_name_map_size, 1, MPI_LONG_LONG_INT, buffer, + buffer_size, &buffer_pos, MPI_COMM_WORLD ); + + /* processIdNameMap */ + for ( map< uint64_t, string >::const_iterator it = + alldata.processIdNameMap.begin(); + it != alldata.processIdNameMap.end(); it++ ) { + + /* processIdNameMap.first */ + uint64_t first= it->first; + MPI_Pack( &first, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* processIdNameMap.second.length() */ + uint64_t second_length= it->second.length() +1; + MPI_Pack( &second_length, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* processIdNameMap.second */ + char* second= strdup( it->second.c_str() ); + assert( second ); + MPI_Pack( second, second_length, MPI_CHAR, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + free( second ); + + } + + } + } /* broadcast definitions buffer */ @@ -972,6 +1123,68 @@ static void share_definitions( AllData& alldata ) { if ( 0 != alldata.myRank ) { + /* functionIdNameMap.size() */ + uint64_t funcid_name_map_size; + MPI_Unpack( buffer, buffer_size, &buffer_pos, + &funcid_name_map_size, 1, MPI_LONG_LONG_INT, + MPI_COMM_WORLD ); + + /* functionIdNameMap */ + for ( uint64_t i= 0; i < funcid_name_map_size; i++ ) { + + /* functionIdNameMap.first */ + uint64_t first; + MPI_Unpack( buffer, buffer_size, &buffer_pos, &first, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* functionIdNameMap.second.length() */ + uint64_t second_length; + + MPI_Unpack( buffer, buffer_size, &buffer_pos, &second_length, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* functionIdNameMap.second */ + char* second= new char[ second_length ]; + assert( second ); + MPI_Unpack( buffer, buffer_size, &buffer_pos, second, + second_length, MPI_CHAR, MPI_COMM_WORLD ); + + alldata.functionIdNameMap[ first ]= second; + + delete [] second; + } + + /* counterIdNameMap.size() */ + uint64_t cntrid_name_map_size; + MPI_Unpack( buffer, buffer_size, &buffer_pos, + &cntrid_name_map_size, 1, MPI_LONG_LONG_INT, + MPI_COMM_WORLD ); + + /* counterIdNameMap */ + for ( uint64_t i= 0; i < cntrid_name_map_size; i++ ) { + + /* counterIdNameMap.first */ + uint64_t first; + MPI_Unpack( buffer, buffer_size, &buffer_pos, &first, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* counterIdNameMap.second.length() */ + uint64_t second_length; + + MPI_Unpack( buffer, buffer_size, &buffer_pos, &second_length, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* counterIdNameMap.second */ + char* second= new char[ second_length ]; + assert( second ); + MPI_Unpack( buffer, buffer_size, &buffer_pos, second, + second_length, MPI_CHAR, MPI_COMM_WORLD ); + + alldata.counterIdNameMap[ first ]= second; + + delete [] second; + } + /* collectiveOperationsToClasses.size() */ uint64_t collop_classes_map_size; MPI_Unpack( buffer, buffer_size, &buffer_pos, @@ -1018,11 +1231,46 @@ static void share_definitions( AllData& alldata ) { MPI_Unpack( buffer, buffer_size, &buffer_pos, &(alldata.timerResolution), 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* unpack additional definitions needed for CSV creation */ + if ( alldata.params.create_csv ) { + /* processIdNameMap.size() */ + uint64_t procid_name_map_size; + MPI_Unpack( buffer, buffer_size, &buffer_pos, + &procid_name_map_size, 1, MPI_LONG_LONG_INT, + MPI_COMM_WORLD ); + + /* processIdNameMap */ + for ( uint64_t i= 0; i < procid_name_map_size; i++ ) { + + /* processIdNameMap.first */ + uint64_t first; + MPI_Unpack( buffer, buffer_size, &buffer_pos, &first, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* processIdNameMap.second.length() */ + uint64_t second_length; + + MPI_Unpack( buffer, buffer_size, &buffer_pos, &second_length, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* processIdNameMap.second */ + char* second= new char[ second_length ]; + assert( second ); + MPI_Unpack( buffer, buffer_size, &buffer_pos, second, + second_length, MPI_CHAR, MPI_COMM_WORLD ); + + alldata.processIdNameMap[ first ]= second; + + delete [] second; + } + } } delete[] buffer; } +#endif /* OTFPROFILE_MPI */ static bool read_events( AllData& alldata, OTF_Reader* reader ) { @@ -1236,6 +1484,9 @@ bool CollectData( AllData& alldata ) { bool error= false; + /* start runtime measurement for collecting data */ + StartMeasurement( alldata, 1, true, "collect data" ); + /* open OTF file manager and reader */ OTF_FileManager* manager= @@ -1258,6 +1509,7 @@ bool CollectData( AllData& alldata ) { } +#ifdef OTFPROFILE_MPI /* broadcast error indicator to workers */ if ( SyncError( alldata, error, 0 ) ) { @@ -1272,6 +1524,7 @@ bool CollectData( AllData& alldata ) { share_definitions( alldata ); } +#endif /* OTFPROFILE_MPI */ /* either read data from events or statistics */ @@ -1296,8 +1549,10 @@ bool CollectData( AllData& alldata ) { } +#ifdef OTFPROFILE_MPI /* synchronize error indicator with workers */ SyncError( alldata, error ); +#endif /* OTFPROFILE_MPI */ } while( false ); @@ -1306,5 +1561,12 @@ bool CollectData( AllData& alldata ) { OTF_Reader_close( reader ); OTF_FileManager_close( manager ); + if ( !error ) { + + /* stop runtime measurement for collecting data */ + StopMeasurement( alldata, true, "collect data" ); + + } + return !error; } diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.h similarity index 100% rename from ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.h rename to ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/collect_data.h diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison.h new file mode 100644 index 0000000000..7262eaf66a --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison.h @@ -0,0 +1,25 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#ifndef COMPARISON_KMEANS_H +#define COMPARISON_KMEANS_H + + +#include + +#include "datastructs.h" + + +/* process comparison; analyze process properties to determine similar or + different behavior patterns */ + +/* do process comparison using the CLINKAGE algorithm */ +bool ProcessComparisonCLINKAGE( AllData& alldata, ostringstream& mapdata ); + +/* do process comparison using the K-means algorithm */ +bool ProcessComparisonKMEANS( AllData& alldata, ostringstream& mapdata ); + + +#endif /* COMPARISON_KMEANS_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison_clinkage.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison_clinkage.cpp new file mode 100644 index 0000000000..13160256ef --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison_clinkage.cpp @@ -0,0 +1,702 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz, Andre Groetzsch +*/ + +using namespace std; + +#include +#include +#include +#include +#include + +#include "comparison.h" + + +/* type for pairs in the distance map of CLINKAGE + * ('Pair' struct in datastructs.h doesnt contain a less operator that's needed for the map) */ +typedef pair StdPairOfRanks; + + +struct DPair { + + uint64_t a; + uint64_t b; + double d; // distance of a and b + + bool operator<(const DPair& p) const; + bool operator==(const DPair& r) const; + DPair(uint64_t a, uint64_t b, double d) : a(a),b(b),d(d) {} +}; + +bool DPair::operator<(const DPair& p) const +{return d < p.d;} + +bool DPair::operator==(const DPair& p) const +{return a==p.a && b==p.b;} + + +template +static bool vector_equal( const T* a, const T* b, uint32_t len ); + + +template +static bool vector_equal( const T* a, const T* b, uint32_t len ) { + + uint32_t index= 0; + while ( ( len > index ) && ( *a == *b ) ) { + + ++a; + ++b; + ++index; + } + + return ( len == index ); +} + + +/* normalized distance measure for clustering */ +static double ndist( const double* a, const double* b, uint32_t n){ + + double smin=0, smax=0; + for ( uint32_t i= 0; i < n; ++i ){ + + smin += min(*a, *b); + smax += max(*a, *b); + a++; + b++; + } + return 1 - smin / smax; // 1 - sum(pmin(a, b)) / sum(pmax(a, b)); +} + + +/* normalized distance measure for clustering * +static double ndist_calls( const double* a, const double* b, uint32_t n){ + + double dmax = 0; + + for ( uint32_t i= 0; i < n; ++i, ++a, ++b ){ + + if (max(*a, *b) > 0) dmax = max (dmax, min(*a, *b) / max(*a, *b)); + } + return 1 - dmax; +} +*/ + + +static bool erasekey(multiset& set, const DPair& key ) +{ + multiset::iterator iter; + + //cout << "try to erase pair (" << key.a << ", " << key.b << ")" << endl; + + pair::iterator, multiset::iterator> ret; + ret = set.equal_range(key); + + for (iter=ret.first; iter!=ret.second && !(*iter == key); ++iter); + + + if (iter!=ret.second) // so the dpair is found + { + //cout << "erasing pair (" << iter->a << ", " << iter->b << ")" << endl; + set.erase(iter); + return true; + } + + //cout << " dpair not found!" << endl; + return false; +} + + +static void NewPair(map& dist, multiset& dpairs, uint32_t& len, double& qt, + map< uint64_t, double* >::const_iterator& it1, map< uint64_t, double* >::const_iterator& it2){ + + /* it1->first is expected to be less than it2->first! */ + + /* compute distance */ + double d = ndist(it1->second, it2->second, len); + + /* save distance */ + dist[make_pair(it1->first, it2->first)] = d; + + /* insert pair to dpairs only if quality treshold is satisfied */ + if (d < qt) dpairs.insert(DPair(it1->first, it2->first, d)); +} + + + +static void CLINKAGE( const set< uint64_t >& members, const map< uint64_t, double* >& vectors, uint32_t len, + vector< set< uint64_t > >& ret_vector, double qt ) { + + //time_t startTime = time(NULL); + + map dist; // distance map + + multiset dpairs; // priority queue of dpairs + + set< uint64_t > unclustered; + + if (members.empty()) { // consider all ranks + + /* compute distance map, dpairs and set of all processes (all vectors) */ + for (map< uint64_t, double* >::const_iterator it1 = vectors.begin(); it1 != vectors.end(); ++it1 ){ + + unclustered.insert( it1->first ); + + for (map< uint64_t, double* >::const_iterator it2 = vectors.begin(); it2 != it1; ++it2 ){ + + NewPair(dist, dpairs, len, qt, it2, it1); // (it2->first < it1->first) + } + } + } + else { // consider members only + + /* compute distance map, dpairs and set of all processes */ + for ( set< uint64_t >::const_iterator it1 = members.begin(); it1 != members.end(); ++it1 ){ + + unclustered.insert( *it1 ); + + /* 'vectors' must not be declared as constant for using the element access operator [] + * alternative: use 'find! */ + + map< uint64_t, double* >::const_iterator itv1 = vectors.find(*it1); + + for ( set< uint64_t >::const_iterator it2 = members.begin(); it2 != it1; ++it2 ){ + + map< uint64_t, double* >::const_iterator itv2 = vectors.find(*it2); + + NewPair(dist, dpairs, len, qt, itv2, itv1); // (it2->first < it1->first) + } + } + } + + /* ALGORITHM */ + + map< uint64_t, set > clustered; // map of clusters +// map< uint64_t, uint64_t> > clustered; // map of clustering + + while (!dpairs.empty()) { + + /* plot current distance matrix * + cout << "\t \t"; + for (set< uint64_t >::const_iterator it_a = unclustered.begin(); it_a != unclustered.end(); ++it_a ) cout << *it_a << "\t"; + cout << endl; + for (set< uint64_t >::const_iterator it_a = unclustered.begin(); it_a != unclustered.end(); ++it_a ){ + + cout << "process " << *it_a << "\t "; + for (set< uint64_t >::const_iterator it_b = unclustered.begin(); it_b != unclustered.end(); ++it_b ){ + + if (*it_a > *it_b) printf("%.3f \t", dist[make_pair(*it_b, *it_a)]); + + else cout << "*\t"; + } + cout << endl; + } + cout << endl; */ + + multiset::iterator it = dpairs.begin(); + DPair topDPair = *it; + dpairs.erase(it); // erase at known position -> fast + + uint64_t a = topDPair.a, b = topDPair.b; + + //cout << "closest pair: (" << a << ", " << b << ") with distance: " << topDPair.d << endl; + + /* delete b from 'unclustered' */ + unclustered.erase(b); + + /* copy b and all elements in clustered[b] to clustered[a] */ + clustered[a].insert(b); + clustered[a].insert(clustered[b].begin(), clustered[b].end()); + /* clear b */ + clustered[b].clear(); + + //clustered[b] = a; // save the cluster assignment + + /* update distances */ + for (set< uint64_t >::const_iterator c = unclustered.begin(); c != unclustered.end(); ++c) { + + if (*c == a) continue; + + uint64_t min_ac = min(a, *c); + uint64_t max_ac = max(a, *c); + + double ac = dist[make_pair(min_ac, max_ac)]; + double bc = dist[make_pair(min(b, *c), max(b, *c))]; + + if ( ac < bc ) { + + if (ac < qt) erasekey(dpairs, DPair(min_ac, max_ac, ac)); + + dist[make_pair(min_ac, max_ac)] = bc; + + if (bc < qt) dpairs.insert(DPair(min_ac, max_ac, bc)); + } + } + + //cout << "--> " << b << " clustered in " << a << endl; + + /* erase dpairs a with a.a==b or a.b==b */ + for (set< uint64_t >::const_iterator it_a = unclustered.begin(); it_a != unclustered.end(); ++it_a){ + + if (*it_a == a || *it_a == b) continue; + + double d = dist[make_pair(min(b, *it_a), max(b, *it_a))]; + + if ( d < qt) erasekey(dpairs, DPair(min(b, *it_a), max(b, *it_a), d)); + + // else: pair doesn't exist! + } + + } /* now 'unclustered' contains the representatives of the clusters as result */ + + //time_t endTime = time(NULL); + + //cout << "clustering of " ; + + //if (members.empty()) cout << vectors.size(); + //else cout << members.size(); + + //cout << " processes with " << len << " functions " << "done in " << endTime-startTime + // << " seconds." << endl; + //cout << "determined clusters: " << unclustered.size() << " (quality threshold: " << qt << ")." << endl; + + /* clustering out */ + for ( set< uint64_t >::const_iterator it = unclustered.begin(); it != unclustered.end(); ++it ){ + + /* add unclustered *it to its cluster */ + clustered[*it].insert(*it); + + /* function out */ + ret_vector.push_back(clustered[*it]); + + /* on screen * + cout << "cluster " << *it << ": "; + for (set< uint64_t >::const_iterator it_b = clustered[*it].begin(); + it_b != clustered[*it].end(); ++it_b) { + cout << *it_b << " "; + } + cout << endl;*/ + } + + +} + + +/* CLINKAGE signatur without 'members' (no hard groups) */ + +static void CLINKAGE( const map< uint64_t, double* >& vectors, uint32_t len, + vector< set< uint64_t > >& ret_vector, double qt){ + + set< uint64_t > defaultset; + + CLINKAGE(defaultset, vectors, len, ret_vector, qt ); +} + + +bool ProcessComparisonCLINKAGE( AllData& alldata, ostringstream& mapdata ) { + + /* only the master performs the process comparison */ + if ( 0 != alldata.myRank ) { + + return true; + } + + /* set quality threshold */ + double qt = alldata.params.clustering.quality_threshold; + + + /* get vectors with number-of-calls for all functions in same order */ + + /* map process ID to vector of function call counts, + the values in the vector ar in same order -- it is sorded after + 'functionMapGlobal' which is guaranteed to contain all occuring function IDs */ + uint32_t vector_length= alldata.functionMapGlobal.size(); + map< uint64_t, uint64_t* > callcountPerRank; + map< uint64_t, double* > calltimesPerRank; + + if (!alldata.params.clustering.synth_data) { + + map< uint64_t, string >::const_iterator it= alldata.processIdNameMap.begin(); + map< uint64_t, string >::const_iterator itend= alldata.processIdNameMap.end(); + for ( ; it != itend; ++it ) { + + /* get the next vector to fill in the following loop */ + uint64_t* vc= callcountPerRank[ it->first ]= (uint64_t*) malloc( vector_length * sizeof(uint64_t) ); + assert( vc ); + double* vt= calltimesPerRank[ it->first ]= (double*) malloc( vector_length * sizeof(double) ); + assert( vt ); + + map< Pair, FunctionData, ltPair >::const_iterator ft; + map< Pair, FunctionData, ltPair >::const_iterator ftend= alldata.functionMapPerRank.end(); + + map< uint64_t, FunctionData >::const_iterator jt= alldata.functionMapGlobal.begin(); + map< uint64_t, FunctionData >::const_iterator jtend= alldata.functionMapGlobal.end(); + uint32_t index= 0; + for ( ; jt != jtend; ++jt ) { + + ft= alldata.functionMapPerRank.find( Pair( it->first, jt->first ) ); // iterators swapped! + uint64_t c= ( ft != ftend ) ? ft->second.count.sum : 0 ; + + double t= ( ft != ftend ) ? ft->second.excl_time.sum : 0 ; // in cpu ticks not seconds! + // double t= ( ft != ftend ) ? ft->second.excl_time.sum / alldata.timerResolution : 0 ; + + vc[index]= c ; + vt[index]= t ; + index++; + } + } + } + + /*{ + cout << " count vectors of size " << vector_length << " : " << endl; + map< uint64_t, uint64_t* >::const_iterator kt= callcountPerRank.begin(); + map< uint64_t, uint64_t* >::const_iterator ktend= callcountPerRank.end(); + for ( ;kt != ktend; ++kt ) { + + cout << " " << kt->first << " count : " ; + + uint32_t i= 0; + uint64_t* p= kt->second; + for ( ; i < vector_length; ++i, ++p ) { + + cout << *p << " "; + } + cout << endl; + } + }*/ + + + + /*{ + cout << " times vectors of size " << vector_length << " : " << endl; + map< uint64_t, double* >::const_iterator kt= calltimesPerRank.begin(); + map< uint64_t, double* >::const_iterator ktend= calltimesPerRank.end(); + for ( ;kt != ktend; ++kt ) { + cout << " " << kt->first << " times : " ; + uint32_t i= 0; + double* p= kt->second; + + for ( ; i < vector_length; ++i, ++p ) { + + cout << *p << " "; + } + cout << endl; + + + } + }*/ + + else { // -> alldata.params.synthData == true + + /* generate some synthetic data for testing the performance of the CLINK-algorithm */ + + vector_length = alldata.params.clustering.synth_funcs_num; // number of functions + + for ( uint64_t i=1; i <= alldata.params.clustering.synth_ranks_num; i++ ) { // number of processes + + double* vt= calltimesPerRank[ i ]= (double*) malloc( vector_length * sizeof(double) ); + assert( vt ); + + for ( uint64_t j=0; j > result_vector; + + CLINKAGE( calltimesPerRank, vector_length, result_vector, qt ); + + /* print cluster assignments */ + vector< set< uint64_t > >::const_iterator it= result_vector.begin(); + vector< set< uint64_t > >::const_iterator itend= result_vector.end(); + for ( ; it != itend ; ++it ) { + + set< uint64_t >::const_iterator jt= it->begin(); + set< uint64_t >::const_iterator jtend= it->end(); + for ( ; jt != jtend ; ++jt ) { + + mapdata << *jt << " "; + } + mapdata << endl; + } + + + if (!alldata.params.clustering.synth_data) { + + for ( map< uint64_t, uint64_t* >::const_iterator it = callcountPerRank.begin(); + it != callcountPerRank.end(); it++) free(it->second); + } // didn't allocate memory to callcountPerRank in the synthetic data case + + for ( map< uint64_t, double* >::const_iterator it = calltimesPerRank.begin(); + it != calltimesPerRank.end(); it++) free(it->second); + + return true; + } + + /* else: clustering with hard groups */ + + /* extract groups of processes that are identical with respect to hard properties. + Therefore, make candidate set, compare all processes with remaining candidates, + when processes match, create target sets and remove processes from canditates. + Target sets are labeled by their first */ + + map< uint64_t, set< uint64_t > > hard_groups; + + { + + /* candidate set with all entries */ + + set< uint64_t > candidates; + map< uint64_t, uint64_t* >::const_iterator jt= callcountPerRank.begin(); + map< uint64_t, uint64_t* >::const_iterator jtend= callcountPerRank.end(); + for ( ; jt != jtend; ++jt ) { + + candidates.insert( jt->first ); + } + + set< uint64_t >::const_iterator ctend= candidates.end(); + map< uint64_t, uint64_t* >::const_iterator itend= callcountPerRank.end(); + map< uint64_t, uint64_t* >::const_iterator ita= callcountPerRank.begin(); + map< uint64_t, uint64_t* >::const_iterator itb; + + for ( ; ita != itend; ++ita ) { + + /* if ita->first is not in candidate set, then skip */ + if ( ctend == candidates.find( ita->first ) ) continue; + + itb= ita; + ++itb; + if ( itb == itend ) break; + for ( ; itb != itend; ++itb ) { + + /* if itb->first is not in candidate set, then skip */ + if ( ctend == candidates.find( itb->first ) ) continue; + + /* compare vectors at ita and itb */ + + bool e= vector_equal< uint64_t >( ita->second, itb->second, vector_length ); + + if ( e ) { + + /*cout << " " << ita->first << " == " << itb->first << endl;*/ + candidates.erase( ita->first ); + candidates.erase( itb->first ); + + hard_groups[ ita->first ].insert( ita->first ); + hard_groups[ ita->first ].insert( itb->first ); + + } else { + + /*cout << " " << ita->first << " != " << itb->first << endl;*/ + } + + } + } + + } + + //cout << "number of processes: " << alldata.processIdNameMap.size() << endl; + //cout << "number of hard groups: " << hard_groups.size() << endl; + + /* + for (map< uint64_t, set< uint64_t > >::const_iterator iter=hard_groups.begin(); + iter!=hard_groups.end(); iter++){ + for (set< uint64_t >::const_iterator iter2= iter->second.begin(); + iter2!=iter->second.end(); iter2++) cout << *iter2 << " "; + cout << endl; + } + */ + + /*cout << " compare soft properties " << endl;*/ + + { + +#ifdef GNUPLOT_OUTPUT + + /* gnuplot output: scripts and data to examine clustering quality */ + /* generate two output files: gnuplot script and gnuplot input file */ + ofstream gnuplot_script, gnuplot_inputs, gnuplot_cluster; + gnuplot_script.open( "gnuplot_script.sh" ); + gnuplot_inputs.open( "gnuplot_inputs.txt" ); + gnuplot_cluster.open( "gnuplot_cluster.txt" ); + + gnuplot_script << "#!/usr/bin/gnuplot -persist\n\n"; + gnuplot_script << "set terminal pdf\n" ; + gnuplot_script << "set output \"gnuplot_clusters_quality.pdf\"\n"; + gnuplot_script << "# set terminal postscript eps enhanced color solid 18\n"; + gnuplot_script << "# set output \"gnuplot_clusters_quality.eps\"\n"; + gnuplot_script << "set key top Right\n"; + gnuplot_script << "set title \"cluster quailty: avg cluster radius per dimension\"\n"; + gnuplot_script << "set xlabel \"cluster bins\"\n"; + gnuplot_script << "set ylabel \"radius [ticks]\"\n\n"; + gnuplot_script << "plot [:][:] \\\n"; +#endif /* GNUPLOT_OUTPUT */ + + + map< uint64_t, set< uint64_t > >::const_iterator it= hard_groups.begin(); + map< uint64_t, set< uint64_t > >::const_iterator itend= hard_groups.end(); + for ( ; it != itend; ++it ) { + + + /*{ + uint32_t id= it->first; + cout << "Cluster " << id << " ( " << it->second.size() << " members ) : { " ; + + set< uint64_t >::const_iterator jt= it->second.begin(); + set< uint64_t >::const_iterator jtend= it->second.end(); + for ( ; jt != jtend ; ++jt ) { + + cout << *jt << " "; + } + cout << "}" << endl; + } */ + + + /* do not handle sets with < 6 members * + if ( 6 > it->second.size() ) { + + * + cout << " skip small collection { "; + set< uint64_t >::const_iterator jt= it->second.begin(); + set< uint64_t >::const_iterator jtend= it->second.end(); + for ( ; jt != jtend ; ++jt ) { + + cout << *jt << " "; + } + cout << "} " << endl;; + * + + continue; + } */ + +#ifdef GNUPLOT_OUTPUT + gnuplot_script << "'gnuplot_inputs.txt' using ($4):( ( " << id << "== $2 ) ? $13/$6 : 1/0 ) t \"" << id << "\" w lp, \\" << endl; + + gnuplot_cluster << endl << " id " << id << " : "; + set< uint64_t >::const_iterator kt= it->second.begin(); + set< uint64_t >::const_iterator ktend= it->second.end(); + for ( ; kt != ktend ; ++kt ) { + + gnuplot_cluster << *kt << " "; + } + gnuplot_cluster << endl; +#endif /* GNUPLOT_OUTPUT */ + + vector< set< uint64_t > > result_vector; + + /* kmeansquality quality; + + * use min( n/3, 16 ) clusters * + uint32_t n= it->second.size(); + n= ( n/3 < 16 ) ? n/3 : 16; + + bool ret= do_kmeans( n, 10, vector_length, + it->second, calltimesPerRank, + result_vector, &quality ); */ + + + CLINKAGE( it->second, calltimesPerRank, vector_length, result_vector, qt ); + + + // if ( ! ret ) continue; + + /* print cluster assignments and quality */ + { + + vector< set< uint64_t > >::const_iterator it= result_vector.begin(); + vector< set< uint64_t > >::const_iterator itend= result_vector.end(); + for ( ; it != itend ; ++it ) { + + /*cout << " { ";*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << " { "; +#endif /* GNUPLOT_OUTPUT */ + set< uint64_t >::const_iterator jt= it->begin(); + set< uint64_t >::const_iterator jtend= it->end(); + for ( ; jt != jtend ; ++jt ) { + + /*cout << *jt << " ";*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << *jt << " "; +#endif /* GNUPLOT_OUTPUT */ + + mapdata << *jt << " "; + + } + /*cout << "}";*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << "}"; +#endif /* GNUPLOT_OUTPUT */ + + mapdata << endl; + } + /*cout << " } " << endl << endl;*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << " } " << endl; +#endif /* GNUPLOT_OUTPUT */ + + mapdata << endl; + +/* + cout << " number_of_clusters " << + quality.number_of_clusters << endl; + cout << " number_of_iterations " << + quality.iterations_used << " <= " << + quality.iterations_max << endl; + + cout << " distance_vector_to_center " << + sqrt( quality.min_dist2_vec_to_cent ) << " / " << + sqrt( quality.avg_dist2_vec_to_cent ) << " / " << + sqrt( quality.max_dist2_vec_to_cent ) << " (min/avg/max)" << endl; + + cout << " distance_center_to_center " << + sqrt( quality.min_dist2_cent_to_cent ) << " / " << + sqrt( quality.avg_dist2_cent_to_cent ) << " / " << + sqrt( quality.max_dist2_cent_to_cent ) << " (min/avg/max)" << endl; +*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_inputs << " id " << id << + " num_clusters " << quality.number_of_clusters << + " vector_length " << vector_length << + " iterations " << quality.iterations_used << " max_iterations " << quality.iterations_max << + " distance_vector_to_center_min_avg_max " << + sqrt( quality.min_dist2_vec_to_cent ) << " " << + sqrt( quality.avg_dist2_vec_to_cent ) << " " << + sqrt( quality.max_dist2_vec_to_cent ) << " " << + " distance_center_to_center_min_avg_max " << + sqrt( quality.min_dist2_cent_to_cent ) << " " << + sqrt( quality.avg_dist2_cent_to_cent ) << " " << + sqrt( quality.max_dist2_cent_to_cent ) << endl; +#endif /* GNUPLOT_OUTPUT */ + + } + + } + + if (!alldata.params.clustering.synth_data) { + + for ( map< uint64_t, uint64_t* >::const_iterator it = callcountPerRank.begin(); + it != callcountPerRank.end(); it++) free(it->second); + } // didn't allocate memory to callcountPerRank in the synthetic data case + + for ( map< uint64_t, double* >::const_iterator it = calltimesPerRank.begin(); + it != calltimesPerRank.end(); it++) free(it->second); + + +#ifdef GNUPLOT_OUTPUT + + gnuplot_script << "0 t \"\"" << endl; + gnuplot_script.close(); + + gnuplot_inputs.close(); + gnuplot_cluster.close(); + +#endif /* GNUPLOT_OUTPUT */ + + } + + return true; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison_kmeans.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison_kmeans.cpp new file mode 100644 index 0000000000..0468b1948c --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/comparison_kmeans.cpp @@ -0,0 +1,741 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "comparison.h" + + +using namespace std; + + +struct kmeansquality { + + double min_dist2_vec_to_cent; + double max_dist2_vec_to_cent; + double avg_dist2_vec_to_cent; + + double min_dist2_cent_to_cent; + double max_dist2_cent_to_cent; + double avg_dist2_cent_to_cent; + + uint32_t iterations_used; + uint32_t iterations_max; + + uint32_t number_of_clusters; +}; + + +template +static bool vector_equal( const T* a, const T* b, uint32_t len ); + + +template +static bool vector_equal( const T* a, const T* b, uint32_t len ) { + + uint32_t index= 0; + while ( ( len > index ) && ( *a == *b ) ) { + + ++a; + ++b; + ++index; + } + + return ( len == index ); +} + + +static void vector_zero( double* v, uint32_t n ) { + + while ( 0 < n ) { + + *v= 0.0; + ++v; + --n; + } +} + + +static void vector_add( double* accu, const double* add, uint32_t n ) { + + while ( 0 < n ) { + + *accu += *add; + ++accu; + ++add; + --n; + } +} + + +static void vector_div( double* v, double div, uint32_t n ) { + + while ( 0 < n ) { + + *v= *v / div; + ++v; + --n; + } +} + + +/* +static void vector_print( const double* v, uint32_t n ) { + + uint8_t m= 10; + while ( ( 0 < n ) && ( 0 < m ) ) { + + cout << *v << " "; + ++v; + --n; + --m; + } + cout << endl; +} +*/ + + +/* computes euklidian distance ^2 of the vector distance */ +static double vector_dist_2( const double* a, const double* b, uint32_t n ) { + + double ret= 0.0; + + while ( 0 < n ) { + + double tmp= ( *a - *b ); + ret += tmp * tmp; + + ++a; + ++b; + --n; + } + + return ret; +} + + +/* do K-means clustering for the vectors in 'vectors' of length 'len' considering +only the members 'vectors' with a key contained in 's'. exactly 'n' clusters will +be created and at most 'max' iterations will be done. The result is stored in 'ret' +-- the 's.size()' input IDs are placed into the 'n' sets in the vector. If +'ret_quality' is not NULL, then the contained quality measures are filled for +the produced clustering */ +static bool do_kmeans( uint32_t n, uint32_t max, uint32_t len, + const set< uint64_t >& members, const map< uint64_t, double* >& vectors, + vector< set< uint64_t > >& ret_vector, kmeansquality* ret_quality ) { + + /* check if enough vectors present */ + if ( n >= members.size() ) { + + cerr << "ERROR: Too many clusters (" << n << ") for " << members.size() + << " vectors " << endl; + return false; + } + + /* initial cluster assignments -- one per cluster, remaining ones to first cluster */ + + /* allocate memory for cluster centroid vectors */ + double** centroids= (double**) malloc( n * sizeof(double*) + n * len * sizeof(double) ); + assert( centroids ); + + /* for every cluster centroid vector count how many original vectors contribute, + this is the number to divde by after all original vectors were added. */ + uint32_t* centroids_n= (uint32_t*) malloc( n * sizeof(uint32_t) ); + assert( centroids_n ); + + /* initialize 'centroids' pointer array and 'centroids_n' */ + double* p= (double*) ( centroids + n ); + for ( uint32_t i= 0; i < n; ++i ) { + + centroids[i]= p; + p += len; + + centroids_n[i]= 0; + } + + + /* for processing, keep for every member (uint64_t) the cluster index (uint32_t). + Only in the very end, translate this representation to + 'vector< set< uint64_t > >& ret_vector' */ + map< uint64_t, uint32_t > assignment; + { + set< uint64_t >::const_iterator it= members.begin(); + set< uint64_t >::const_iterator itend= members.end(); + uint32_t i= 0; + for ( ; it != itend; ++it ) { + + assignment[ *it ]= i % n; + centroids_n[ i % n ] += 1; + + /* + cout << " assign " << *it << " --> " << ( i % n ) << + " out of " << centroids_n[ i % n ] << " / " << n << endl; + */ + + ++i; + } + } + + bool change= true; + uint32_t m= 0; + while ( ( m < max ) && change ) { + + change= false; + + /* + cout << endl << "round " << m << "/" << max << endl << endl; + */ + + /* zeroing centroid vectors */ + for ( uint32_t i= 0; i < n; ++i ) { + + vector_zero( centroids[i], len ); + } + + /* print assignment */ + /* { + map< uint64_t, uint32_t >::const_iterator kt= assignment.begin(); + map< uint64_t, uint32_t >::const_iterator ktend= assignment.end(); + for ( ; kt != ktend; ++kt ) { + + cout << " assigned " << kt->first << " --> " << kt->second << + " out of " << centroids_n[ kt->second ] << endl; + } + } */ + + /* compute cluster centroids -- iterate over 'assignment', for every + assigned cluster, add associated vector to centroid, keep size of clusters */ + map< uint64_t, uint32_t >::const_iterator it= assignment.begin(); + map< uint64_t, uint32_t >::const_iterator itend= assignment.end(); + for ( ; it != itend; ++it ) { + + map< uint64_t, double* >::const_iterator ft= vectors.find( it->first ); + assert( vectors.end() != ft ); + + /* + cout << "before " << it->second << "\t " ; vector_print( centroids[ it->second ], len ); + cout << "add to " << it->second << "\t " ; vector_print( ft->second, len ); + */ + vector_add( centroids[ it->second ], ft->second, len ); + /* + cout << "after " << it->second << "\t " ; vector_print( centroids[ it->second ], len ); + cout << endl; + */ + } + + /* divide sum of vectors by the number of contributing vectors to get real centroids */ + for ( uint32_t i= 0; i < n; ++i ) { + + /* + cout << "centroid " << i << " sum :" << endl; + vector_print( centroids[i], len ); + */ + + /* + cout << "div vector " << i << " by " << centroids_n[ i ] << endl; + */ + vector_div( centroids[i], centroids_n[i], len ); + + /* + cout << "centroid " << i << ":" << endl; + vector_print( centroids[i], len ); + */ + } + + + /* for all vectors */ + map< uint64_t, uint32_t >::iterator jt= assignment.begin(); + map< uint64_t, uint32_t >::iterator jtend= assignment.end(); + for ( ; jt != jtend; ++jt ) { + + /* + cout << " check " << jt->first ; + */ + + map< uint64_t, double* >::const_iterator ft= vectors.find( jt->first ); + assert( vectors.end() != ft ); + const double* v= ft->second; + + /* previous assignment */ + uint32_t a= jt->second; + /* + cout << " currently assigned to " << a << endl; + */ + + + /* previous number of members in this cluster */ + uint32_t b= centroids_n[ a ]; + assert( 0 < b ); /* otherwise this cluster is empty which is illegal */ + + /* if only one member left then we cannot re-assign because empty clusters + are not allowed*/ + if ( 1 == b ) continue; + + uint32_t aa= (uint32_t) -1; + double d= numeric_limits::max( ); + + /* for all centroids */ + for ( uint32_t k= 0; k < n ; ++k ) { + + double dd= vector_dist_2( v, centroids[k], len ); + + /* + cout << " cent " << k << " d^2= " << dd ; + */ + if ( dd < d ) { + aa= k; + d= dd; + /* + cout << " * "; + */ + } + /* + cout << endl; + */ + } + + assert( aa != (uint32_t) -1 ); /* there must be a new value */ + + if ( aa != a ) { + + /* actual re-assignment */ + /* + cout << endl << " ### reassign " << jt->first << " from " << a << " to " << aa << " ### " << endl << endl; + */ + centroids_n[ a ]--; + centroids_n[ aa ]++; + jt->second= aa; + change= true; + } + } + + ++m; + } + + + /* final assignment of members to clusters */ + ret_vector.clear(); + ret_vector.resize( n, set() ); + { + map< uint64_t, uint32_t >::const_iterator it= assignment.begin(); + map< uint64_t, uint32_t >::const_iterator itend= assignment.end(); + + for ( ; it != itend ; ++it ) { + + ret_vector[ it->second ].insert( it->first ); + } + } + + /* print cluster assignments */ + + /* + cout << " #" << n << " clusters: { "; + vector< set< uint64_t > >::const_iterator it= ret_vector.begin(); + vector< set< uint64_t > >::const_iterator itend= ret_vector.end(); + for ( ; it != itend ; ++it ) { + + cout << "{ "; + set< uint64_t >::const_iterator jt= it->begin(); + set< uint64_t >::const_iterator jtend= it->end(); + for ( ; jt != jtend ; ++jt ) { + + cout << *jt << " "; + } + cout << "} "; + } + cout << "}"<< endl; + */ + + /* if 'ret_quality' is requested, compute some more things */ + if ( NULL != ret_quality ) { + + uint32_t cnt; + + ret_quality->min_dist2_vec_to_cent= numeric_limits::max( ); + ret_quality->max_dist2_vec_to_cent= numeric_limits::min( ); + ret_quality->avg_dist2_vec_to_cent= 0.0; + cnt= 0; + + map< uint64_t, uint32_t >::const_iterator it= assignment.begin(); + map< uint64_t, uint32_t >::const_iterator itend= assignment.end(); + for ( ; it != itend; ++it ) { + + map< uint64_t, double* >::const_iterator ft= vectors.find( it->first ); + assert( vectors.end() != ft ); + const double* v= ft->second; + + for ( uint32_t j= 0; j < n; ++j ) { + + double d2= vector_dist_2( v, centroids[j], len ); + + ret_quality->min_dist2_vec_to_cent= ( d2 < ret_quality->min_dist2_cent_to_cent ) ? d2 : ret_quality->min_dist2_cent_to_cent; + ret_quality->max_dist2_vec_to_cent= ( d2 > ret_quality->max_dist2_cent_to_cent ) ? d2 : ret_quality->max_dist2_cent_to_cent; + ret_quality->avg_dist2_vec_to_cent += d2; + cnt++; + } + } + ret_quality->avg_dist2_vec_to_cent= ret_quality->avg_dist2_vec_to_cent / cnt; + + ret_quality->min_dist2_cent_to_cent= numeric_limits::max( ); + ret_quality->max_dist2_cent_to_cent= numeric_limits::min( ); + ret_quality->avg_dist2_cent_to_cent= 0.0; + + cnt= 0; + for ( uint32_t i= 0; i < n; ++i ) { + for ( uint32_t j= i+1; j < n; ++j ) { + + if ( i == j ) continue; + + double d2= vector_dist_2( centroids[i], centroids[j], len ); + + ret_quality->min_dist2_cent_to_cent= ( d2 < ret_quality->min_dist2_cent_to_cent ) ? d2 : ret_quality->min_dist2_cent_to_cent; + ret_quality->max_dist2_cent_to_cent= ( d2 > ret_quality->max_dist2_cent_to_cent ) ? d2 : ret_quality->max_dist2_cent_to_cent; + ret_quality->avg_dist2_cent_to_cent += d2; + cnt++; + } + } + ret_quality->avg_dist2_cent_to_cent= ret_quality->avg_dist2_cent_to_cent / cnt; + + ret_quality->iterations_used= m; + ret_quality->iterations_max= max; + + ret_quality->number_of_clusters= n; + } + + free( centroids ); centroids= NULL; + free( centroids_n ); centroids_n= NULL; + + return true; +} + + +bool ProcessComparisonKMEANS( AllData& alldata, ostringstream& mapdata ) { + + /* only the master performs the process comparison */ + if ( 0 != alldata.myRank ) { + + return true; + } + + /* get vectors with number-of-calls for all functions in same order */ + + /* map process ID to vector of function call counts, + the values in the vector ar in same order -- it is sorded after + 'functionMapGlobal' which is guaranteed to contain all occuring function IDs */ + uint32_t vector_length= alldata.functionMapGlobal.size(); + map< uint64_t, uint64_t* > callcountPerRank; + map< uint64_t, double* > calltimesPerRank; + + { + map< uint64_t, string >::const_iterator it= alldata.processIdNameMap.begin(); + map< uint64_t, string >::const_iterator itend= alldata.processIdNameMap.end(); + for ( ; it != itend; ++it ) { + + /* get the next vector to fill in the following loop */ + uint64_t* vc= callcountPerRank[ it->first ]= (uint64_t*) malloc( vector_length * sizeof(uint64_t) ); + assert( vc ); + double* vt= calltimesPerRank[ it->first ]= (double*) malloc( vector_length * sizeof(double) ); + assert( vt ); + + map< Pair, FunctionData, ltPair >::const_iterator ft; + map< Pair, FunctionData, ltPair >::const_iterator ftend= alldata.functionMapPerRank.end(); + + map< uint64_t, FunctionData >::const_iterator jt= alldata.functionMapGlobal.begin(); + map< uint64_t, FunctionData >::const_iterator jtend= alldata.functionMapGlobal.end(); + uint32_t index= 0; + for ( ; jt != jtend; ++jt ) { + + ft= alldata.functionMapPerRank.find( Pair( it->first, jt->first ) ); + uint64_t c= ( ft != ftend ) ? ft->second.count.sum : 0 ; + double t= ( ft != ftend ) ? ft->second.excl_time.sum : 0 ; + + vc[index]= c ; + vt[index]= t ; + index++; + } + } + } + + /* + { + cout << " count vectors of size " << vector_length << " : " << endl; + map< uint64_t, uint64_t* >::const_iterator kt= callcountPerRank.begin(); + map< uint64_t, uint64_t* >::const_iterator ktend= callcountPerRank.end(); + for ( ;kt != ktend; ++kt ) { + + cout << " " << kt->first << " count : " ; + + uint32_t i= 0; + uint64_t* p= kt->second; + for ( ; i < vector_length; ++i, ++p ) { + + cout << *p << " "; + } + cout << endl; + } + } + */ + + /* + { + cout << " times vectors of size " << vector_length << " : " << endl; + map< uint64_t, double* >::const_iterator kt= calltimesPerRank.begin(); + map< uint64_t, double* >::const_iterator ktend= calltimesPerRank.end(); + for ( ;kt != ktend; ++kt ) { + cout << " " << kt->first << " times : " ; + uint32_t i= 0; + double* p= kt->second; + for ( ; i < vector_length; ++i, ++p ) { + + cout << *p << " "; + } + cout << endl; + + + } + } + */ + + /* extract groups of processes that are identical with respect to hard properties. + Therefore, make candidate set, compare all processes with remaining candidates, + when processes match, create target sets and remove processes from canditates. + Target sets are labeled by their first */ + + map< uint64_t, set< uint64_t > > hard_groups; + + { + + /* candidate set with all entries */ + + set< uint64_t > candidates; + map< uint64_t, uint64_t* >::const_iterator jt= callcountPerRank.begin(); + map< uint64_t, uint64_t* >::const_iterator jtend= callcountPerRank.end(); + for ( ; jt != jtend; ++jt ) { + + candidates.insert( jt->first ); + } + + + set< uint64_t >::const_iterator ctend= candidates.end(); + map< uint64_t, uint64_t* >::const_iterator itend= callcountPerRank.end(); + map< uint64_t, uint64_t* >::const_iterator ita= callcountPerRank.begin(); + map< uint64_t, uint64_t* >::const_iterator itb; + + for ( ; ita != itend; ++ita ) { + + /* if ita->first is not in candidate set, then skip */ + if ( ctend == candidates.find( ita->first ) ) continue; + + itb= ita; + ++itb; + if ( itb == itend ) break; + for ( ; itb != itend; ++itb ) { + + /* if itb->first is not in candidate set, then skip */ + if ( ctend == candidates.find( itb->first ) ) continue; + + /* compare vectors at ita and itb */ + + bool e= vector_equal< uint64_t >( ita->second, itb->second, vector_length ); + + if ( e ) { + + /*cout << " " << ita->first << " == " << itb->first << endl;*/ + candidates.erase( ita->first ); + candidates.erase( itb->first ); + + hard_groups[ ita->first ].insert( ita->first ); + hard_groups[ ita->first ].insert( itb->first ); + + } else { + + /*cout << " " << ita->first << " != " << itb->first << endl;*/ + } + + } + } + + } + + /*cout << " compare soft properties " << endl;*/ + + { + +#ifdef GNUPLOT_OUTPUT + + /* gnuplot output: scripts and data to examine clustering quality */ + /* generate two output files: gnuplot script and gnuplot input file */ + ofstream gnuplot_script, gnuplot_inputs, gnuplot_cluster; + gnuplot_script.open( "gnuplot_script.sh" ); + gnuplot_inputs.open( "gnuplot_inputs.txt" ); + gnuplot_cluster.open( "gnuplot_cluster.txt" ); + + gnuplot_script << "#!/usr/bin/gnuplot -persist\n\n"; + gnuplot_script << "set terminal pdf\n" ; + gnuplot_script << "set output \"gnuplot_clusters_quality.pdf\"\n"; + gnuplot_script << "# set terminal postscript eps enhanced color solid 18\n"; + gnuplot_script << "# set output \"gnuplot_clusters_quality.eps\"\n"; + gnuplot_script << "set key top Right\n"; + gnuplot_script << "set title \"cluster quailty: avg cluster radius per dimension\"\n"; + gnuplot_script << "set xlabel \"cluster bins\"\n"; + gnuplot_script << "set ylabel \"radius [ticks]\"\n\n"; + gnuplot_script << "plot [:][:] \\\n"; +#endif /* GNUPLOT_OUTPUT */ + + map< uint64_t, set< uint64_t > >::const_iterator it= hard_groups.begin(); + map< uint64_t, set< uint64_t > >::const_iterator itend= hard_groups.end(); + for ( ; it != itend ; ++it ) { + + /* + { + uint32_t id= it->first; + cout << "Cluster " << id << " ( " << it->second.size() << " members ) : { " ; + + set< uint64_t >::const_iterator jt= it->second.begin(); + set< uint64_t >::const_iterator jtend= it->second.end(); + for ( ; jt != jtend ; ++jt ) { + + cout << *jt << " "; + } + cout << "}" << endl; + } + */ + + /* do not handle sets with < 6 members */ + if ( 6 > it->second.size() ) { + + /* + cout << " skip small collection { "; + set< uint64_t >::const_iterator jt= it->second.begin(); + set< uint64_t >::const_iterator jtend= it->second.end(); + for ( ; jt != jtend ; ++jt ) { + + cout << *jt << " "; + } + cout << "} " << endl;; + */ + + continue; + } + +#ifdef GNUPLOT_OUTPUT + gnuplot_script << "'gnuplot_inputs.txt' using ($4):( ( " << id << "== $2 ) ? $13/$6 : 1/0 ) t \"" << id << "\" w lp, \\" << endl; + + gnuplot_cluster << endl << " id " << id << " : "; + set< uint64_t >::const_iterator kt= it->second.begin(); + set< uint64_t >::const_iterator ktend= it->second.end(); + for ( ; kt != ktend ; ++kt ) { + + gnuplot_cluster << *kt << " "; + } + gnuplot_cluster << endl; +#endif /* GNUPLOT_OUTPUT */ + + vector< set< uint64_t > > result_vector; + + kmeansquality quality; + + /* use min( n/3, 16 ) clusters */ + uint32_t n= it->second.size(); + n= ( n/3 < 16 ) ? n/3 : 16; + + bool ret= do_kmeans( n, 10, vector_length, + it->second, calltimesPerRank, + result_vector, &quality ); + + if ( ! ret ) continue; + + /* print cluster assignments and quality */ + { + + vector< set< uint64_t > >::const_iterator it= result_vector.begin(); + vector< set< uint64_t > >::const_iterator itend= result_vector.end(); + for ( ; it != itend ; ++it ) { + + /*cout << " { ";*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << " { "; +#endif /* GNUPLOT_OUTPUT */ + set< uint64_t >::const_iterator jt= it->begin(); + set< uint64_t >::const_iterator jtend= it->end(); + for ( ; jt != jtend ; ++jt ) { + + /*cout << *jt << " ";*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << *jt << " "; +#endif /* GNUPLOT_OUTPUT */ + + mapdata << *jt << " "; + + } + /*cout << "}";*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << "}"; +#endif /* GNUPLOT_OUTPUT */ + + mapdata << endl; + } + /*cout << " } " << endl << endl;*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_cluster << " } " << endl; +#endif /* GNUPLOT_OUTPUT */ + + mapdata << endl; + +/* + cout << " number_of_clusters " << + quality.number_of_clusters << endl; + cout << " number_of_iterations " << + quality.iterations_used << " <= " << + quality.iterations_max << endl; + + cout << " distance_vector_to_center " << + sqrt( quality.min_dist2_vec_to_cent ) << " / " << + sqrt( quality.avg_dist2_vec_to_cent ) << " / " << + sqrt( quality.max_dist2_vec_to_cent ) << " (min/avg/max)" << endl; + + cout << " distance_center_to_center " << + sqrt( quality.min_dist2_cent_to_cent ) << " / " << + sqrt( quality.avg_dist2_cent_to_cent ) << " / " << + sqrt( quality.max_dist2_cent_to_cent ) << " (min/avg/max)" << endl; +*/ +#ifdef GNUPLOT_OUTPUT + gnuplot_inputs << " id " << id << + " num_clusters " << quality.number_of_clusters << + " vector_length " << vector_length << + " iterations " << quality.iterations_used << " max_iterations " << quality.iterations_max << + " distance_vector_to_center_min_avg_max " << + sqrt( quality.min_dist2_vec_to_cent ) << " " << + sqrt( quality.avg_dist2_vec_to_cent ) << " " << + sqrt( quality.max_dist2_vec_to_cent ) << " " << + " distance_center_to_center_min_avg_max " << + sqrt( quality.min_dist2_cent_to_cent ) << " " << + sqrt( quality.avg_dist2_cent_to_cent ) << " " << + sqrt( quality.max_dist2_cent_to_cent ) << endl; +#endif /* GNUPLOT_OUTPUT */ + + } + } + +#ifdef GNUPLOT_OUTPUT + + gnuplot_script << "0 t \"\"" << endl; + gnuplot_script.close(); + + gnuplot_inputs.close(); + gnuplot_cluster.close(); + +#endif /* GNUPLOT_OUTPUT */ + + } + + return true; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.cpp new file mode 100644 index 0000000000..6991e37068 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.cpp @@ -0,0 +1,450 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#include +#include +#include +#include + +#include "create_csv.h" +#include "otfprofile.h" + +#include "OTF_Definitions.h" + + +/* define the following macro to fill-up per rank data to get equal data amounts +for each trace process/thread; this is useful to make the resulting CSV data +better comparable, but consider that it might cause a significant performance +impact */ +/*#define FILLUP_DATA*/ + + +using namespace std; + + +#ifdef FILLUP_DATA +/* fill-up per rank data based on the definitions */ +static void fillup_data( AllData& alldata ) { + + /* iterate over process ids/names map */ + for ( map< uint64_t, string >::const_iterator proc_it= + alldata.processIdNameMap.begin(); + proc_it != alldata.processIdNameMap.end(); proc_it++ ) { + + const uint64_t & proc_id= proc_it->first; + + /* add empty message data for process */ + alldata.messageMapPerRank[ proc_id ].add( MessageData() ); + + /* add empty collop. data for process */ + + alldata.collectiveMapPerRank[ + Pair( proc_id, OTF_COLLECTIVE_TYPE_BARRIER ) ]. + add( CollectiveData() ); + alldata.collectiveMapPerRank[ + Pair( proc_id, OTF_COLLECTIVE_TYPE_ONE2ALL ) ]. + add( CollectiveData() ); + alldata.collectiveMapPerRank[ + Pair( proc_id, OTF_COLLECTIVE_TYPE_ALL2ONE ) ]. + add( CollectiveData() ); + alldata.collectiveMapPerRank[ + Pair( proc_id, OTF_COLLECTIVE_TYPE_ALL2ALL ) ]. + add( CollectiveData() ); + + /* iterate over function ids/names map */ + for ( map< uint64_t, string >::const_iterator func_it= + alldata.functionIdNameMap.begin(); + func_it != alldata.functionIdNameMap.end(); func_it++ ) { + + const uint64_t & func_id= func_it->first; + + /* add empty function data for process/function */ + alldata.functionMapPerRank[ Pair( proc_id, func_id ) ]. + add( FunctionData() ); + + /* iterate over counter ids/names map */ + for ( map< uint64_t, string >::const_iterator cntr_it= + alldata.counterIdNameMap.begin(); + cntr_it != alldata.counterIdNameMap.end(); cntr_it++ ) { + + const uint64_t & cntr_id= cntr_it->first; + + /* add empty counter data for process/function/counter */ + alldata.counterMapPerFunctionRank[ + Triple( proc_id, func_id, cntr_id ) ].add( CounterData() ); + + } + + } + + } +} +#endif /* FILLUP_DATA */ + + +/* append function data to CSV file */ +static void write_func_data( AllData& alldata, ofstream& csvFile, + const string& csvFileName ) { + + assert( csvFile.good() ); + + VerbosePrint( alldata, 2, false, + " appending function data to file: %s\n", + csvFileName.c_str() ); + + static const string LINE_PREFIX= "==FUNCTION=="; + + if ( 0 == alldata.myRank ) { + + /* write headline */ + csvFile << LINE_PREFIX << ';' + << "Process;Function;Invocations;Excl. Time (s);Incl. Time (s)" << endl; + + } + + /* write function data */ + + map< Pair, FunctionData, ltPair >::iterator it= alldata.functionMapPerRank.begin(); + map< Pair, FunctionData, ltPair >::iterator itend= alldata.functionMapPerRank.end(); + while ( itend != it ) { + + const uint64_t& proc_id= it->first.a; + const uint64_t& func_id= it->first.b; + const uint64_t& count= it->second.count.cnt; + const double excl_time= it->second.excl_time.sum / alldata.timerResolution; + const double incl_time= it->second.incl_time.sum / alldata.timerResolution; + + const string& proc_name= alldata.processIdNameMap[ proc_id ]; + assert( 0 != proc_name.length() ); + + const string& func_name= alldata.functionIdNameMap[ func_id ]; + assert( 0 != func_name.length() ); + + csvFile << LINE_PREFIX << ';' + << proc_name << ';' + << func_name << ';' + << count << ';' + << excl_time << ';' + << incl_time << endl; + + it++; + + } +} + + +/* append counter data to CSV file */ +static void write_counter_data( AllData& alldata, ofstream& csvFile, + const string& csvFileName ) { + + assert( csvFile.good() ); + + VerbosePrint( alldata, 2, false, + " appending counter data to file: %s\n", + csvFileName.c_str() ); + + static const string LINE_PREFIX= "==COUNTER=="; + + if ( 0 == alldata.myRank ) { + + /* write headline */ + csvFile << endl << LINE_PREFIX << ';' + << "Process;Function;Counter;Excl. Rate;Incl. Rate" + << endl; + + } + + /* write counter data */ + + map< Triple, CounterData, ltTriple >::iterator it= alldata.counterMapPerFunctionRank.begin(); + map< Triple, CounterData, ltTriple >::iterator itend= alldata.counterMapPerFunctionRank.end(); + while ( itend != it ) { + + const uint64_t& proc_id= it->first.a; + const uint64_t& func_id= it->first.b; + const uint64_t& counter_id= it->first.c; + + const string& proc_name= alldata.processIdNameMap[ proc_id ]; + assert( 0 != proc_name.length() ); + + const string& counter_name= alldata.counterIdNameMap[ counter_id ]; + assert( 0 != counter_name.length() ); + + const string& func_name= alldata.functionIdNameMap[ func_id ]; + assert( 0 != func_name.length() ); + + map< Pair, FunctionData, ltPair >::const_iterator func_it= + alldata.functionMapPerRank.find( Pair( proc_id, func_id ) ); + assert( alldata.functionMapPerRank.end() != func_it ); + + double excl_rate= 0.0; + if ( 0.0 < func_it->second.excl_time.sum ) { + + excl_rate= it->second.excl_time.sum / + func_it->second.excl_time.sum * alldata.timerResolution; + + } + + double incl_rate= 0.0; + if ( 0.0 < func_it->second.incl_time.sum ) { + + incl_rate= it->second.incl_time.sum / + func_it->second.incl_time.sum * alldata.timerResolution; + + } + + csvFile << LINE_PREFIX << ';' + << proc_name << ';' + << func_name << ';' + << counter_name << ';' + << excl_rate << ';' + << incl_rate << endl; + + it++; + + } +} + + +/* append P2P message data to CSV file */ +static void write_p2p_data( AllData& alldata, ofstream& csvFile, + const string& csvFileName ) { + + assert( csvFile.good() ); + + VerbosePrint( alldata, 2, false, + " appending P2P message data to file: %s\n", + csvFileName.c_str() ); + + static const string LINE_PREFIX= "==P2P=="; + + if ( 0 == alldata.myRank ) { + + /* write headline */ + csvFile << endl << LINE_PREFIX << ';' + << "Process;Send Invocations;Recv. Invocations;Send Bytes;Recv. Bytes;Duration (s)" + << endl; + + } + + /* write P2P message data */ + + map< uint64_t, MessageData >::iterator it= alldata.messageMapPerRank.begin(); + map< uint64_t, MessageData >::iterator itend= alldata.messageMapPerRank.end(); + while ( itend != it ) { + + const uint64_t& proc_id= it->first; + + const uint64_t& count_send= it->second.count_send.cnt; + const uint64_t& count_recv= it->second.count_recv.cnt; + const uint64_t& bytes_send= it->second.bytes_send.sum; + const uint64_t& bytes_recv= it->second.bytes_recv.sum; + const double duration= it->second.duration_send.sum / alldata.timerResolution; + + const string& proc_name= alldata.processIdNameMap[ proc_id ]; + assert( 0 != proc_name.length() ); + + csvFile << LINE_PREFIX << ';' + << proc_name << ';' + << count_send << ';' + << count_recv << ';' + << bytes_send << ';' + << bytes_recv << ';' + << duration << endl; + + it++; + + } +} + + +/* append collective op. data to CSV file */ +static void write_collop_data( AllData& alldata, ofstream& csvFile, + const string& csvFileName ) { + + assert( csvFile.good() ); + + VerbosePrint( alldata, 2, false, + " appending collective op. data to file: %s\n", + csvFileName.c_str() ); + + static const string LINE_PREFIX= "==COLLOP=="; + + static map< uint64_t, string > op_class_names; + if ( op_class_names.empty() ) { + + op_class_names[ OTF_COLLECTIVE_TYPE_BARRIER ]= "BARRIER"; + op_class_names[ OTF_COLLECTIVE_TYPE_ONE2ALL ]= "ONE2ALL"; + op_class_names[ OTF_COLLECTIVE_TYPE_ALL2ONE ]= "ALL2ONE"; + op_class_names[ OTF_COLLECTIVE_TYPE_ALL2ALL ]= "ALL2ALL"; + + } + + if ( 0 == alldata.myRank ) { + + /* write headline */ + csvFile << endl << LINE_PREFIX << ';' + << "Process;Coll. Op;Send Invocations;Recv. Invocations;Send Bytes;Recv. Bytes;Duration (s)" + << endl; + + } + + /* write collop. data */ + + map< Pair, CollectiveData, ltPair >::iterator it= alldata.collectiveMapPerRank.begin(); + map< Pair, CollectiveData, ltPair >::iterator itend= alldata.collectiveMapPerRank.end(); + while ( itend != it ) { + + const uint64_t& proc_id= it->first.a; + const uint64_t& op_class= it->first.b; + + const uint64_t& count_send= it->second.count_send.cnt; + const uint64_t& count_recv= it->second.count_recv.cnt; + const uint64_t& bytes_send= it->second.bytes_send.sum; + const uint64_t& bytes_recv= it->second.bytes_recv.sum; + const double duration= it->second.duration_send.sum / alldata.timerResolution; + + const string& proc_name= alldata.processIdNameMap[ proc_id ]; + assert( 0 != proc_name.length() ); + + const string& op_class_name= op_class_names[ op_class ]; + assert( 0 != op_class_name.length() ); + + csvFile << LINE_PREFIX << ';' + << proc_id << ';' + << op_class_name << ';' + << count_send << ';' + << count_recv << ';' + << bytes_send << ';' + << bytes_recv << ';' + << duration << endl; + + it++; + + } +} + + +bool CreateCSV( AllData& alldata ) { + + bool error= false; + + /* start runtime measurement for creating CSV output */ + StartMeasurement( alldata, 1, true, "produce CSV output" ); + + VerbosePrint( alldata, 1, true, "producing CSV output\n" ); + + /* compose output file name */ + string csv_file_name= alldata.params.output_file_prefix + ".csv"; + + /* remove already existing output file */ + unlink( csv_file_name.c_str() ); + +#ifdef FILLUP_DATA + /* fill-up per rank data based on the definitions */ + fillup_data( alldata ); +#endif /* FILLUP_DATA */ + + /* statistics types */ + enum { + STAT_TYPE_FUNC, + STAT_TYPE_COUNTER, + STAT_TYPE_P2P, + STAT_TYPE_COLLOP, + STAT_TYPE_NUM + }; + + for( uint8_t type= 0; type < STAT_TYPE_NUM && !error; type++ ) { + +#ifdef OTFPROFILE_MPI + for( uint32_t rank= 0; rank < alldata.numRanks; rank++ ) { + + if ( alldata.myRank == rank ) { +#endif /* OTFPROFILE_MPI */ + + /* open CSV output file */ + + ofstream csv_file( csv_file_name.c_str(), ios_base::app ); + if ( !csv_file ) { + + cerr << "ERROR: Unable to open file '" << csv_file_name + << "' for writing." << endl; + + error= true; + + } else { + + /* write statistics */ + + csv_file.precision( 9 ); + + switch( type ) { + + case STAT_TYPE_FUNC: + if ( !alldata.params.clustering.enabled || + 0 == alldata.myRank ) { + + /* in case of additional clustering, the master + already has function statistics over *all* + processes; only the master writes its data */ + write_func_data( alldata, csv_file, + csv_file_name ); + } + break; + case STAT_TYPE_COUNTER: + write_counter_data( alldata, csv_file, + csv_file_name ); + break; + case STAT_TYPE_P2P: + write_p2p_data( alldata, csv_file, csv_file_name ); + break; + case STAT_TYPE_COLLOP: + write_collop_data( alldata, csv_file, + csv_file_name ); + break; + default: + assert( 0 ); + break; + + } + + } + + /* close CSV output file */ + csv_file.close(); + + if ( STAT_TYPE_NUM == type +1 ) { + + VerbosePrint( alldata, 2, true, " created file: %s\n", + csv_file_name.c_str() ); + + } + +#ifdef OTFPROFILE_MPI + } + + /* broadcast error indicator to workers */ + if ( SyncError( alldata, error, rank ) ) { + + break; + + } + + MPI_Barrier( MPI_COMM_WORLD ); + + } +#endif /* OTFPROFILE_MPI */ + + } + + if ( !error ) { + + /* stop runtime measurement for creating CSV output */ + StopMeasurement( alldata, false, "produce CSV output" ); + + } + + return !error; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.h new file mode 100644 index 0000000000..e4ce0115a8 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.h @@ -0,0 +1,17 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#ifndef CREATE_CSV_H +#define CREATE_CSV_H + + +#include "datastructs.h" + + +/* create CSV output */ +bool CreateCSV( AllData& alldata ); + + +#endif /* CREATE_CSV_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.cpp similarity index 87% rename from ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.cpp rename to ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.cpp index 24ef8ba83d..8f5f376998 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.cpp @@ -11,23 +11,32 @@ #include #include #include +#include +#include +#include #include +#include #include #include -using namespace std; - #include "create_latex.h" -#include "otfprofile-mpi.h" +#include "otfprofile.h" #include "OTF_inttypes.h" #include "OTF_Definitions.h" +#include "OTF_Platform.h" -#define FUNC_TABLE_LEN 50 -#define CTR_TABLE_LEN 50 -/* global constants to customize chart plots */ +using namespace std; + + +/* maximum number of process groups */ +const uint32_t Grouping::MAX_GROUPS= 16; + +/* global constants to customize tables and chart plots */ +static const int FUNC_TABLE_LEN= 50; +static const int CTR_TABLE_LEN= 50; static const string COLOR_SEND = "red"; static const string COLOR_RECV = "blue"; static const string COLOR_YBAR = "green"; @@ -39,10 +48,19 @@ static const double PLOT_WIDTH = 16; //cm static const double PLOT_HEIGHT = 8; //cm /* will be set before latex creation and can be used in all local functions */ -static bool clustered = false; +static bool grouped = false; static vector xLabels; static int xLabelNum = 0; +class SpaceSeparator: public std::numpunct +{ +public: + SpaceSeparator(std::size_t refs): std::numpunct(refs) {} +protected: + char do_thousands_sep() const { return ' '; } + std::string do_grouping() const { return "\03"; } +}; + /* The basic metric types for y bar charts */ enum metricType { INVOCATIONS, @@ -140,6 +158,21 @@ static uint64_t getScaleQuantifierLog2(double min, double max, char& unit) /* after half of the steps, the limit like K, M, G should be reached */ double limit = (min+max)/2; //steps/2*interval; + /*if(limit > (1 << 60)){ + unit = 'E'; + return (1 << 60); + } + + if(limit > (1 << 50)){ + unit = 'P'; + return (1 << 50); + } + + if(limit > (1 << 40)){ + unit = 'T'; + return (1 << 40); + }*/ + if(limit > (1 << 30)){ unit = 'G'; return (1 << 30); @@ -176,6 +209,18 @@ static uint64_t getScaleQuantifierLog10(double min, double max, char& unit, /* after half of the steps, the limit like K, M, G should be reached */ double limit = (min+max)/2; //steps/2*interval; + if(limit > 1e18){ + unit = 'E'; + unitString = "Exa"; + return (uint64_t)1e18; + } + + if(limit > 1e15){ + unit = 'P'; + unitString = "Peta"; + return (uint64_t)1e15; + } + if(limit > 1e12){ unit = 'T'; unitString = "Tera"; @@ -227,6 +272,25 @@ static void makeNiceScaleTicks(double& min, double& max, uint8_t& steps) //cout << "New min value: " << min << " New max value: " << max << endl; } +/* + * Try to create nice scale ticks. + * + * @param min reference to the minimum value of the scale + * @param max reference to the maximum value of the scale + * @param steps the number of tick values to be created + +static void makeNiceScaleTicks(uint64_t& min, uint64_t& max, uint8_t& steps) +{ + double interval = (max - min) / steps; + + if(min - interval < 0) min = 0; + else{ + min = (uint64_t)min; + } + + //cout << "New min value: " << min << " New max value: " << max << endl; +}*/ + /* * Converts the OTF collective definition to a string. * @@ -257,8 +321,8 @@ static void write_header(fstream& tex) tex << "\\usepackage{amssymb}" << endl; tex << "\\usepackage{longtable}" << endl; tex << "\\usepackage{ifthen}" << endl; - tex << "%\\usepackage{tikz}" << endl; - tex << "\\usepackage{pgfplots}" << endl << endl; + tex << "\\usepackage{pgfplots}" << endl; + tex << "\\pgfplotsset{compat=1.4}" << endl << endl; tex << "\\textwidth=16.1cm \\textheight=27.0cm \\topmargin=-1.8cm" << endl; tex << "\\oddsidemargin=0.1cm \\evensidemargin=0.1cm \\footskip=45pt" << endl; tex << endl; @@ -268,30 +332,42 @@ static void write_header(fstream& tex) tex << "\\pgfplotsset{" << endl; tex << "/pgfplots/log number format basis/.code 2 args={" << endl; tex << " \\ifdim #1 pt=2pt" << endl; - tex << " \\ifdim #2 pt<10pt" << endl; - tex << " \\pgfmathparse{#1^#2}" << endl; - tex << " \\pgfmathtruncatemacro\\r{\\pgfmathresult} \\r " << endl; - tex << " \\else" << endl; - tex << " \\ifdim #2 pt<20pt" << endl; - tex << " \\pgfmathparse{#1^(#2 - 10)}" << endl; - tex << " \\pgfmathprintnumber{\\pgfmathresult}K" << endl; + tex << " \\ifdim #2 pt>0.5pt" << endl; + tex << " \\ifdim #2 pt<10pt" << endl; + tex << " \\pgfmathparse{#1^#2}" << endl; + tex << " \\pgfmathtruncatemacro\\r{\\pgfmathresult} \\r " << endl; tex << " \\else" << endl; - tex << " \\ifdim #2 pt<30pt" << endl; - tex << " \\pgfmathparse{#1^(#2 - 20)}" << endl; - tex << " \\pgfmathprintnumber{\\pgfmathresult}M" << endl; - tex << " \\else" << endl; - tex << " \\ifdim #2 pt<40pt" << endl; - tex << " \\pgfmathparse{#1^(#2 - 30)}" << endl; - tex << " \\pgfmathprintnumber{\\pgfmathresult}G" << endl; + tex << " \\ifdim #2 pt<20pt" << endl; + tex << " \\pgfmathparse{#1^(#2 - 10)}" << endl; + tex << " \\pgfmathprintnumber{\\pgfmathresult}K" << endl; + tex << " \\else" << endl; + tex << " \\ifdim #2 pt<30pt" << endl; + tex << " \\pgfmathparse{#1^(#2 - 20)}" << endl; + tex << " \\pgfmathprintnumber{\\pgfmathresult}M" << endl; tex << " \\else" << endl; - tex << " \\ifdim #2 pt<50pt" << endl; - tex << " \\pgfmathparse{#1^(#2 - 40)}" << endl; - tex << " \\pgfmathprintnumber{\\pgfmathresult}T" << endl; + tex << " \\ifdim #2 pt<40pt" << endl; + tex << " \\pgfmathparse{#1^(#2 - 30)}" << endl; + tex << " \\pgfmathprintnumber{\\pgfmathresult}G" << endl; tex << " \\else" << endl; - tex << " >1P" << endl; - tex << " \\fi" << endl; + tex << " \\ifdim #2 pt<50pt" << endl; + tex << " \\pgfmathparse{#1^(#2 - 40)}" << endl; + tex << " \\pgfmathprintnumber{\\pgfmathresult}T" << endl; + tex << " \\else" << endl; + tex << " \\ifdim #2 pt<60pt" << endl; + tex << " \\pgfmathparse{#1^(#2 - 50)}" << endl; + tex << " \\pgfmathprintnumber{\\pgfmathresult}P" << endl; + tex << " \\else" << endl; + tex << " \\ifdim #2 pt<70pt" << endl; + tex << " \\pgfmathparse{#1^(#2 - 60)}" << endl; + tex << " \\pgfmathprintnumber{\\pgfmathresult}E" << endl; + tex << " \\else" << endl; + tex << " >1Z" << endl; + tex << " \\fi" << endl; + tex << " \\fi" << endl; + tex << " \\fi" << endl; + tex << " \\fi" << endl; tex << " \\fi" << endl; - tex << " \\fi" << endl; + tex << " \\fi" << endl; tex << " \\fi" << endl; tex << " \\fi" << endl; tex << " \\fi" << endl; @@ -510,13 +586,15 @@ static void write_functionTable(fstream& tex, struct AllData& alldata) tex << " \\bf Function & \\bf invocations[\\#] & " << "\\bf excl. time[sec] $\\nabla$ & \\bf incl. time[sec] \\\\" << endl; tex << " \\hline\\hline" << endl; - + /* write the sorted function table */ { std::multimap::const_reverse_iterator it = sortedMap.rbegin(); std::multimap::const_reverse_iterator itend = sortedMap.rend(); + SpaceSeparator facet(1); //1 - don't delete when done + std::locale prev = tex.imbue(std::locale(std::locale(), &facet)); /* for a given max number of functions */ while(itend != it){ @@ -538,8 +616,9 @@ static void write_functionTable(fstream& tex, struct AllData& alldata) /* draw a horizontal line every 3 function entries */ if((count % 3) == 0) tex << " \\hline" << endl; } - } + tex.imbue(prev); //restore previous locale + } tex << " \\hline" << endl; tex << "\\end{longtable}" << endl << endl; @@ -752,6 +831,11 @@ template static void write_ybarPlotHead( // @TODO: ymin == 0, min - max line cannot be drawn if(metricType == MSGLENGTH){ tex << " ymode=log,log basis y=2,ymin=\\ymin," << endl; + tex << " try min ticks log={8}," << endl; + + /* check for label overlapping */ + if((double)minMax.max > (double)8191) + tex << " extra y ticks={1}, extra y tick labels={1}," << endl; } /*** message duration y axis settings ***/ @@ -760,7 +844,7 @@ template static void write_ybarPlotHead( double ymax = (double)minMax.max; // @TODO: ymin == 0, min - max line cannot be drawn - if(ymax - ymin > 100 || (ymax - ymin > 0.01 && ymax < 1)){ + if(ymax - ymin > 1000 || (ymax - ymin > 0.01 && ymax < 1)){ // logarithmic mode tex << " ymode=log,log basis y=10,"; if(ymin <= 0){ @@ -820,7 +904,7 @@ template static void write_ybarPlotHead( } tex << "title=" << title; - if(clustered) tex << " (average)" << endl; + if(grouped) tex << " (average)" << endl; tex << ",ylabel={" << metric << "}," << endl; tex << "x tick label style={rotate=90,anchor=east,font=\\ttfamily\\footnotesize}," << endl; @@ -873,7 +957,7 @@ static void write_ybarPlotFoot(fstream& tex, uint8_t legend/* = 1*/) tex << "\\node [black,right] at (b.east) {receive};" << endl; } - if(clustered){ + if(grouped){ tex << "\\draw[|-|,color=" << COLOR_MINMAX << ",line width=1pt] (4,-0.2) -- (4,0.2)" << endl; tex << " node [right,xshift=2pt]{max}" << endl; tex << " node [below right,yshift=-3pt,xshift=2pt]{ min};" << endl; @@ -902,9 +986,9 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, /* iterator over data map */ std::map< uint64_t, MessageData >::const_iterator it = - alldata.messageMapPerCluster.begin(); + alldata.messageMapPerGroup.begin(); std::map< uint64_t, MessageData >::const_iterator itend = - alldata.messageMapPerCluster.end(); + alldata.messageMapPerGroup.end(); /* counter for the x axis ticks */ uint32_t i = 0; @@ -929,7 +1013,7 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, bool dur_avail = false; /* return, if there are no messages available */ - if(alldata.messageMapPerCluster.empty()) return; + if(alldata.messageMapPerGroup.empty()) return; /* * Get min and max values to choose correct y axis scaling. @@ -937,7 +1021,7 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, MinMaxMsgData minMax; while ( itend != it ) { - if(clustered){ + if(grouped){ /* invocations */ if(it->second.count_send.cnt){ @@ -957,7 +1041,7 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, } }else{ - /*** if processes are not clustered use the average values ***/ + /*** if processes are not grouped use the average values ***/ /* invocations */ if(it->second.count_send.cnt){ @@ -989,7 +1073,7 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, } /* reset iterator to write the data to file output */ - it = alldata.messageMapPerCluster.begin(); + it = alldata.messageMapPerGroup.begin(); write_ybarPlotHead(tex, OTF_COLLECTIVE_TYPE_UNKNOWN, INVOCATIONS, vector()/*xLabels*/, minMax.count); @@ -1069,8 +1153,8 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, tex << ss_count_recv.str() << "};" << endl; /* check if min/max values shall be written */ - if(clustered){ - it = alldata.messageMapPerCluster.begin(); i = 0; + if(grouped){ + it = alldata.messageMapPerGroup.begin(); i = 0; while ( itend != it ) { /*** send invocations ***/ @@ -1198,7 +1282,7 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, tex << " coordinates{" << endl; tex << ss_bytes_recv.str() << "};" << endl; - if(clustered) tex << ss_bytes_send_e.str() << ss_bytes_recv_e.str(); + if(grouped) tex << ss_bytes_send_e.str() << ss_bytes_recv_e.str(); if(dur_avail) write_ybarPlotFoot(tex, 0); @@ -1218,7 +1302,7 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, tex << " coordinates{" << endl; tex << ss_duration_send.str() << "};" << endl; - if(clustered) tex << ss_duration_send_e.str(); + if(grouped) tex << ss_duration_send_e.str(); write_ybarPlotFoot(tex, 1); } @@ -1227,19 +1311,19 @@ static void write_p2pAllPGFplots(fstream& tex, vector xLabels, } /* - * Creates a vector of the process or cluster labels. + * Creates a vector of the process or group labels. * * @param alldata structure containing all summarized profiling information - * @param xLabels the resulting vector of process/cluster labels + * @param xLabels the resulting vector of process/group labels */ static void getXAxisLabels(struct AllData& alldata, vector& xLabels) { - if(clustered){ + if(grouped){ for(map >::const_iterator it = - alldata.clustering.clustersToProcesses.begin(); - it != alldata.clustering.clustersToProcesses.end(); it++){ + alldata.grouping.groupsToProcesses.begin(); + it != alldata.grouping.groupsToProcesses.end(); it++){ - /* map the first process id of the cluster to the process name */ + /* map the first process id of the group to the process name */ string procFrom = alldata.processIdNameMap[*(it->second.begin())]; if(it->second.size() > 1){ string procTo = alldata.processIdNameMap[*(it->second.rbegin())]; @@ -1258,19 +1342,19 @@ static void getXAxisLabels(struct AllData& alldata, vector& xLabels) } /* - * Get process or cluster label from its ID. + * Get process or group label from its ID. * * @param alldata structure containing all summarized profiling information - * @param id the process or cluster id + * @param id the process or group id * @param label the label as string for the given ID */ -static void getClusterLabel(struct AllData& alldata, uint64_t id, - string& label) +static void getGroupLabel(struct AllData& alldata, uint64_t id, + string& label) { - if(clustered){ - set procs = alldata.clustering.clustersToProcesses.find(id)->second; + if(grouped){ + set procs = alldata.grouping.groupsToProcesses.find(id)->second; - /* map the first process id of the cluster to the process name */ + /* map the first process id of the group to the process name */ label = alldata.processIdNameMap[*(procs.begin())]; if(procs.size() > 1){ string procTo = alldata.processIdNameMap[*(procs.rbegin())]; @@ -1290,7 +1374,7 @@ static void getClusterLabel(struct AllData& alldata, uint64_t id, */ static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata) { - std::map msgMap = alldata.messageMapPerClusterPair; + std::map msgMap = alldata.messageMapPerGroupPair; std::map rankToPos; float scale = 0.7; @@ -1300,16 +1384,16 @@ static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata) uint64_t tres = alldata.timerResolution; - /* check, if clustered to set the dimension of the matrix */ - if(clustered){ - gridDim = alldata.clustering.numClusters(); + /* check, if grouped to set the dimension of the matrix */ + if(grouped){ + gridDim = alldata.grouping.numGroups(); }else{ gridDim = alldata.allProcesses.size(); } /* remove DEBUG output cout << "gridDim:" << gridDim << " processes_num:" << alldata.allProcesses.size() - << " cluster_num:" << alldata.clustering.numClusters() << endl; */ + << " group_num:" << alldata.grouping.numGroups() << endl; */ std::map::const_iterator it = msgMap.begin(); @@ -1329,14 +1413,14 @@ static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata) tex << "\\begin{center}" << endl; tex << "\\begin{tikzpicture} [step=1cm,scale=" << scale << ",every node/.style={scale=" << scale << "}]"; - if(clustered) tex << "\\small" << endl; + if(grouped) tex << "\\small" << endl; /* preprocess data */ uint64_t ctrInt = 0; while(it != itend){ double tmp; - /* get list of all ranks/clusters and map internal id for the grid position */ + /* get list of all ranks/groups and map internal id for the grid position */ /* check if already listed */ /* TODO: vector[ctrInt]=rankID ??? */ @@ -1346,7 +1430,7 @@ static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata) /* label the matrix */ string label; - getClusterLabel(alldata,it->first.a, label); + getGroupLabel(alldata,it->first.a, label); tex << "\\node[anchor=east] at (0," << gridDim-ctrInt-1 << ".5)" " {\\shortstack[r]{" << label << "}};" << endl; tex << "\\node[anchor=west,rotate=90] at (" << ctrInt << ".5," @@ -1407,7 +1491,7 @@ static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata) << " datarate: " << it->second.bytes_send.sum/it->second.duration_send.sum << endl; */ - if(0){ /* if clustered, how to get min max values??? */ + if(0){ /* if grouped, how to get min max values??? */ /* get maximum color */ get_color_gray(minDataRate, maxDataRate, it->second.bytes_send.max/it->second.duration_send.max*tres/div, r, g, b); @@ -1529,7 +1613,7 @@ static void write_p2pMsgRateMatrix(fstream& tex, struct AllData& alldata) * @param tex the latex output file stream * @param cclassType type of the collective class * @param metricType the metric type (needed to identify columns in data table) - * @param xnum number of clusters with values (number of rows in data table) + * @param xnum number of groups with values (number of rows in data table) */ static void write_CollectiveClassMetricPlot(fstream& tex, uint64_t cclassType, metric_t metricType, int xnum) @@ -1549,17 +1633,17 @@ static void write_CollectiveClassMetricPlot(fstream& tex, uint64_t cclassType, tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_SEND; if(cclassType != OTF_COLLECTIVE_TYPE_BARRIER) tex << ", xshift=-" << YBAR_SIZE; - tex << "] table[x=cluster,y=" << metric.c_str() << "Send] {\\" + tex << "] table[x=group,y=" << metric.c_str() << "Send] {\\" << classTitle.c_str() << "};" << endl; if(cclassType != OTF_COLLECTIVE_TYPE_BARRIER){ tex << "\\addplot[ybar, draw=black, mark=none, fill=" << COLOR_RECV << ", xshift=" << YBAR_SIZE << "] " - "table[x=cluster,y=" << metric.c_str() << "Recv] {\\" + "table[x=group,y=" << metric.c_str() << "Recv] {\\" << classTitle.c_str() << "};" << endl; } - if(clustered){ + if(grouped){ /* read the min max values from the table */ tex << "\\def \\min {0}" << endl; tex << "\\def \\max {0}" << endl; @@ -1576,7 +1660,7 @@ static void write_CollectiveClassMetricPlot(fstream& tex, uint64_t cclassType, /* write min/max values only, if they differ */ tex << "\\ifthenelse{\\equal{\\min}{\\max}}{}{" << endl; - tex << " \\pgfplotstablegetelem{" << i <<"}{cluster}\\of{\\" + tex << " \\pgfplotstablegetelem{" << i <<"}{group}\\of{\\" << classTitle.c_str() << "}" << endl; tex << " \\let\\xval=\\pgfplotsretval" << endl; @@ -1609,7 +1693,7 @@ static void write_CollectiveClassMetricPlot(fstream& tex, uint64_t cclassType, /* write min/max values only, if they differ */ tex << "\\ifthenelse{\\equal{\\min}{\\max}}{}{" << endl; - tex << " \\pgfplotstablegetelem{" << i <<"}{cluster}\\of{\\" + tex << " \\pgfplotstablegetelem{" << i <<"}{group}\\of{\\" << classTitle.c_str() << "}" << endl; tex << " \\let\\xval=\\pgfplotsretval" << endl; @@ -1635,7 +1719,7 @@ static void write_CollectiveClassMetricPlot(fstream& tex, uint64_t cclassType, * @param tex latex output stream * @param cclassType collective class type (see "OTF_Definitions.h") * @param xLabels vector of labels for the x axis (ascending) - * @param xnum number of clusters + * @param xnum number of groups * @param minMax the min/max values for all metrics of given collective class */ static void write_CollectiveClass(fstream& tex, uint64_t cclassType, @@ -1681,9 +1765,9 @@ static void write_collectives(fstream& tex, vector xLabels, { /* iterator over data map */ map::const_iterator it = - alldata.collectiveMapPerCluster.begin(); + alldata.collectiveMapPerGroup.begin(); map::const_iterator itend = - alldata.collectiveMapPerCluster.end(); + alldata.collectiveMapPerGroup.end(); uint64_t currClass = OTF_COLLECTIVE_TYPE_UNKNOWN; string classTitle = ""; @@ -1694,10 +1778,10 @@ static void write_collectives(fstream& tex, vector xLabels, /* create pgfplots table head */ tex << "\\pgfplotstableread{" << endl; - tex << "cluster cntSend cntRecv bytSend bytRecv durSend durRecv"; + tex << "group cntSend cntRecv bytSend bytRecv durSend durRecv"; - /* add min max markers, if clustered */ - if(clustered){ + /* add min max markers, if grouped */ + if(grouped){ tex << " cntSendMin cntSendMax cntRecvMin cntRecvMax bytSendMin bytSendMax" " bytRecvMin bytRecvMax durSendMin durSendMax durRecvMin durRecvMax"; } @@ -1711,7 +1795,7 @@ static void write_collectives(fstream& tex, vector xLabels, int xCtr = 0; MinMaxMsgData minMax; /* for every collective class (see "OTF_Definitions.h") */ - for(it = alldata.collectiveMapPerCluster.begin(); it != itend; it++){ + for(it = alldata.collectiveMapPerGroup.begin(); it != itend; it++){ /* map is sorted by collective class -- check if collective class changes */ if(it->first.a != currClass){ /* finish plot */ @@ -1730,17 +1814,17 @@ static void write_collectives(fstream& tex, vector xLabels, /* create pgfplots table head */ tex << "\\pgfplotstableread{" << endl; - tex << "cluster cntSend cntRecv bytSend bytRecv durSend durRecv"; + tex << "group cntSend cntRecv bytSend bytRecv durSend durRecv"; - /* add min max markers, if clustered */ - if(clustered){ + /* add min max markers, if grouped */ + if(grouped){ tex << " cntSendMin cntSendMax cntRecvMin cntRecvMax bytSendMin bytSendMax" " bytRecvMin bytRecvMax durSendMin durSendMax durRecvMin durRecvMax"; } tex << endl; } - /*** write data for the current cluster ***/ + /*** write data for the current group ***/ xCtr++; tex << it->first.b-1; @@ -1764,11 +1848,17 @@ static void write_collectives(fstream& tex, vector xLabels, }else tex << " 0"; if(it->second.bytes_send.cnt){ - tex << " " << (double)it->second.bytes_send.sum/it->second.bytes_send.cnt; + double val = (double)it->second.bytes_send.sum/it->second.bytes_send.cnt; + tex << " " << val; + if(val < minMax.bytes.min) minMax.bytes.min = val; + if(val > minMax.bytes.max) minMax.bytes.max = val; }else tex << " 0"; if(it->second.bytes_recv.cnt){ - tex << " " << (double)it->second.bytes_recv.sum/it->second.bytes_recv.cnt; + double val = (double)it->second.bytes_recv.sum/it->second.bytes_recv.cnt; + tex << " " << val; + if(val < minMax.bytes.min) minMax.bytes.min = val; + if(val > minMax.bytes.max) minMax.bytes.max = val; }else tex << " 0"; if(it->second.duration_send.cnt){ @@ -1785,8 +1875,8 @@ static void write_collectives(fstream& tex, vector xLabels, if(val > minMax.duration.max) minMax.duration.max = val; }else tex << " 0"; - /*** if processes are clustered ***/ - if(clustered){ + /*** if processes are grouped ***/ + if(grouped){ /*** invocations ***/ if(it->second.count_send.cnt){ double val = (double)it->second.count_send.min; @@ -1810,13 +1900,23 @@ static void write_collectives(fstream& tex, vector xLabels, /*** message length ***/ if(it->second.bytes_send.cnt){ - tex << " " << (double)it->second.bytes_send.min - << " " << (double)it->second.bytes_send.max; + double val = (double)it->second.bytes_send.min; + tex << " " << val; + if(val < minMax.bytes.min) minMax.bytes.min = val; + + val = (double)it->second.bytes_send.max; + tex << " " << val; + if(val < minMax.bytes.max) minMax.bytes.max = val; }else tex << " 0 0"; if(it->second.bytes_recv.cnt){ - tex << " " << (double)it->second.bytes_recv.min - << " " << (double)it->second.bytes_recv.max; + double val = (double)it->second.bytes_recv.min; + tex << " " << val; + if(val < minMax.bytes.min) minMax.bytes.min = val; + + val = (double)it->second.bytes_recv.max; + tex << " " << val; + if(val < minMax.bytes.max) minMax.bytes.max = val; }else tex << " 0 0"; /*** duration ***/ @@ -1884,12 +1984,14 @@ static void write_p2pMsgRateHist(fstream& tex, struct AllData& alldata) uint64_t maxMsgLen = 0; uint64_t minMsgLen = (uint64_t)-1; uint64_t maxCount = 0; + //uint64_t minCount = (uint64_t)-1; while(it != itend){ if(it->first.a > maxMsgRate) maxMsgRate = it->first.a; if(it->first.a < minMsgRate) minMsgRate = it->first.a; if(it->first.b > maxMsgLen) maxMsgLen = it->first.b; if(it->first.b < minMsgLen) minMsgLen = it->first.b; if(it->second.count.cnt > maxCount) maxCount = it->second.count.cnt; + //if(it->second.count.cnt < minCount) minCount = it->second.count.cnt; it++; } @@ -1981,7 +2083,11 @@ static void write_p2pMsgRateHist(fstream& tex, struct AllData& alldata) /* draw the colormap legend */ uint32_t colorsteps = 20; // if(colorsteps > maxCount) colorsteps = maxCount; - + + /*makeNiceScaleTicks(minCount, maxCount, colorsteps);*/ + SpaceSeparator facet(1); //1 - don't delete when done + std::locale prev = tex.imbue(std::locale(std::locale(), &facet)); + tex << "\\node at (" << colorsteps/2.0 + 1 << ", 0) {Number of Invocations};" << endl; //cout << "maxCount: " << maxCount << " colorsteps: " << colorsteps << endl; @@ -1995,17 +2101,22 @@ static void write_p2pMsgRateHist(fstream& tex, struct AllData& alldata) /* color box */ tex.precision(6); tex.setf(ios::fixed, ios::floatfield); + tex.imbue(prev); tex << "\\node[minimum size=0.95cm,anchor=south west] at (" << i << ",-1.5) [rectangle, fill={rgb,1:red," << r << " ;green," << g << ";blue," << b << "}] {};" << endl; /* number of invocation description */ tex << "\\node[anchor=east,rotate=90] at (" << i+0.5 << ",-1.5) {"; + + //tex.setf(ios::scientific); tex.precision(0); + tex.imbue(std::locale(std::locale(), &facet)); tex << i*interval << "};" << endl; } tex.precision(6); tex.setf(ios::floatfield); + tex.imbue(prev); tex << "\\end{tikzpicture}" << endl; tex << "\\end{center}" << endl; @@ -2022,6 +2133,9 @@ bool CreateTex( AllData& alldata ) { bool error= false; + /* start runtime measurement for creating LaTeX output */ + StartMeasurement( alldata, 1, false, "produce LaTeX output" ); + VerbosePrint( alldata, 1, true, "producing LaTeX output\n" ); string tex_file_name= alldata.params.output_file_prefix + ".tex"; @@ -2038,7 +2152,7 @@ bool CreateTex( AllData& alldata ) { tex_file.setf(/*ios::fixed, */ios::floatfield); tex_file.precision(6); - clustered= alldata.clustering.enabled; + grouped= alldata.grouping.enabled; /* write the document header (including the \begin{document} */ write_header(tex_file); @@ -2065,30 +2179,76 @@ bool CreateTex( AllData& alldata ) { VerbosePrint( alldata, 2, true, " created file: %s\n", tex_file_name.c_str() ); + if ( !error ) { + + /* stop runtime measurement for creating LaTeX output */ + StopMeasurement( alldata, false, "produce LaTeX output" ); + + } + #if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4 /* create PDF file, if desired */ if ( alldata.params.create_pdf ) { + /* start runtime measurement for creating PDF output */ + StartMeasurement( alldata, 1, false, "produce PDF output" ); + VerbosePrint( alldata, 1, true, "producing PDF output\n" ); /* compose pdflatex command */ - ostringstream cmd; - cmd << PDFTEX << " " << tex_file_name << " >/dev/null 2>&1"; + char cmd[1024]; + snprintf( cmd, sizeof( cmd ) - 1, PDFTEX" %s >/dev/null 2>&1", + tex_file_name.c_str() ); /* execute pdflatex command (two times) on TeX file */ for ( uint8_t i = 0; i < 2; i++ ) { VerbosePrint( alldata, 2, true, " %srunning command: %s\n", - (0 == i) ? "" : "re-", cmd.str().c_str() ); + (0 == i) ? "" : "re-", cmd ); - int rc= system( cmd.str().c_str() ); - if ( 0 != WEXITSTATUS( rc ) ) { + int rc= system( cmd ); + + /* evaluate exit status */ + + int es= ( -1 != rc ) ? WEXITSTATUS( rc ) : 0; + + /* command could not be executed; print warning message */ + if ( -1 == rc || 127 == es ) { + + ostringstream warn_msg; + + warn_msg << "Warning: Could not execute command '" + << cmd << "'"; + + if ( -1 == rc ) { + + warn_msg << " (" << strerror( errno ) << ")"; + + } + + warn_msg << "." << endl + << "Try to run this command manually in terminal to " + << "produce PDF output."; + + cerr << warn_msg.str() << endl; + break; + + /* command executed, but failed; abort */ + } else if ( 0 != es ) { cerr << "ERROR: Could not create PDF file from '" - << tex_file_name << "'." << endl; + << tex_file_name << "'. " + << PDFTEX << "returned with exit code " + << es << "." << endl; error= true; break; + /* command executed successfully the second time */ + } else if ( 0 == es && 1 == i ) { + + /* stop runtime measurement for creating PDF output */ + StopMeasurement( alldata, false, "produce PDF output" ); + } } diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.h similarity index 91% rename from ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.h rename to ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.h index f5705e3469..16474ec827 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.h +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_latex.h @@ -10,7 +10,7 @@ #include "datastructs.h" -/* generate PGF output */ +/* create LaTeX output */ bool CreateTex( AllData& alldata ); diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/datastructs.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/datastructs.h similarity index 63% rename from ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/datastructs.h rename to ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/datastructs.h index c5e6fe38f3..dd2da78ceb 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/datastructs.h +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/datastructs.h @@ -7,46 +7,101 @@ #define DATASTRUCTS_H -using namespace std; - #include #include +#include #include #include #include #include -#include "mpi.h" +#ifdef OTFPROFILE_MPI +# include "mpi.h" +#endif /* OTFPROFILE_MPI */ #include "OTF_inttypes.h" +using namespace std; + + +/* *** process clustering algorithms *** */ + +typedef enum { + + CLUSTER_ALG_CLINKAGE, + CLUSTER_ALG_KMEANS + +} ClusterAlgorithm; + + /* *** program parameters *** */ struct Params { + /* general parameters */ + static const uint32_t DEFAULT_MAX_FILE_HANDLES= 50; static const uint32_t DEFAULT_BUFFER_SIZE= 1024 * 1024; + static const uint32_t DEFAULT_MAX_GROUPS= 16; static const uint8_t DEFAULT_VERBOSE_LEVEL= 0; + static const bool DEFAULT_CREATE_CSV= false; + static const bool DEFAULT_CREATE_TEX= true; + static const bool DEFAULT_CREATE_PDF= true; static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; } uint32_t max_file_handles; uint32_t buffer_size; + uint32_t max_groups; uint8_t verbose_level; bool progress; bool read_from_stats; + bool create_csv; + bool create_tex; bool create_pdf; string input_file_prefix; string output_file_prefix; + /* process clustering related parameters */ + + struct Clustering { + + static const ClusterAlgorithm DEFAULT_ALGORITHM= CLUSTER_ALG_KMEANS; + static double DEFAULT_QUALITY_THRESHOLD() { return 0.1; } + static const string DEFAULT_MAP_FILE_NAME() { return "result.map"; } + + ClusterAlgorithm alg; + bool enabled; + bool shrink; + bool hard_grouping; + double quality_threshold; + + string map_file_name; + string shrink_output_prefix; + + bool synth_data; + uint32_t synth_ranks_num; + uint32_t synth_funcs_num; + + Clustering() + : alg(DEFAULT_ALGORITHM), enabled(false), shrink(false), + hard_grouping(false), + quality_threshold(DEFAULT_QUALITY_THRESHOLD()), + map_file_name(DEFAULT_MAP_FILE_NAME()), synth_data(false), + synth_ranks_num(0), synth_funcs_num(0) {} + + } clustering; + Params() : max_file_handles(DEFAULT_MAX_FILE_HANDLES), - buffer_size(DEFAULT_BUFFER_SIZE), + buffer_size(DEFAULT_BUFFER_SIZE), max_groups(DEFAULT_MAX_GROUPS), verbose_level(DEFAULT_VERBOSE_LEVEL), progress(false), - read_from_stats(false), create_pdf(true), + read_from_stats(false), create_csv(DEFAULT_CREATE_CSV), + create_tex(DEFAULT_CREATE_TEX), create_pdf(DEFAULT_CREATE_PDF), output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()) {} + }; @@ -58,12 +113,13 @@ struct Progress { static const uint64_t EVENTS_RECORD_LIMIT= 1000000; static const uint64_t STATS_RECORD_LIMIT= 100; - /* message tag to use for communication */ - static const int MSG_TAG= 500; - uint64_t cur_bytes; /* current bytes read */ uint64_t max_bytes; /* max. bytes readable */ +#ifdef OTFPROFILE_MPI + /* message tag to use for communication */ + static const int MSG_TAG= 500; + MPI_Request send_request; /* sender request handle */ uint64_t* recv_buffers; /* receive buffers */ @@ -73,40 +129,45 @@ struct Progress { uint64_t* rank_cur_bytes; /* current bytes read per rank (except rank 0) */ uint32_t ranks_left; /* root keeps track of ranks left to query */ +#endif /* OTFPROFILE_MPI */ }; /* *** runtime measurement *** */ -struct MeasureBlock { +struct Measurement { - /* routine to get a global timestamp */ -# define GETTIME() MPI_Wtime() + struct Scope { - double start_time; /* start timestamp of measurement block */ - double stop_time; /* stop timestamp of measurement block */ + double start_time; /* start timestamp of measurement scope */ + double stop_time; /* stop timestamp of measurement scope */ + const uint8_t verbose_level; /* verbose level required to + perform measurement */ - MeasureBlock() : start_time(-1.0), stop_time(-1.0) {} + Scope( const uint8_t _verbose_level ) + : start_time(-1.0), stop_time(-1.0), + verbose_level(_verbose_level) {} + }; - /* start runtime measurment */ - void start() { + /* are there any completed runtime measurement result? */ + bool have_data; - start_time= GETTIME(); - } + /* store per-measurement scope runtimes */ + map< string, Scope > scope_map; - /* stop runtime measurment */ - void stop() { + Measurement() : have_data(false) {} - assert( -1.0 != start_time ); - stop_time= GETTIME(); - } + /* get global timestamp in seconds */ + static double gettime() { - /* get result of runtime measurement */ - double duration() const { - - assert( -1.0 != start_time && -1.0 != stop_time ); - return stop_time - start_time; - } +#ifdef OTFPROFILE_MPI + return MPI_Wtime(); +#else /* OTFPROFILE_MPI */ + struct timeval tv; + gettimeofday( &tv, NULL ); + return (double)( tv.tv_sec * 1e6 + tv.tv_usec ) / 1.0e6; +#endif /* OTFPROFILE_MPI */ + } }; @@ -244,44 +305,48 @@ public: }; -/* manage clustering of processes (or ranks/threads/whatever) -- clustering -reduces the potentially unlimited numbers of processes to a fixed number of -clusters (or bins, groups, buckets, ... ). Instead of a per-process basis -global statisitics are collected per cluster. The processes can be spread -over the clusters either consecutively, or round-robin, or randomly, or in -any special scheme, e.g. separate clusters for GPU theads and host processes. -Therefore, the Clustering structure manages explicit mappings from cluster IDs to -process IDs. Every process belongs to one cluster exclusively. */ +/* manage grouping of processes (or ranks/threads/whatever) -- grouping +reduces the potentially unlimited numbers of processes to a fixed number of +groups (or bins, buckets, ... ). Instead of a per-process basis global +statisitics are collected per group. The processes can be spread over the groups +either consecutively, or round-robin, or randomly, or in any special scheme, +e.g. separate groups for GPU theads and host processes. Therefore, the Grouping +structure manages explicit mappings from group IDs to process IDs. +Every process belongs to one group exclusively. */ -struct Clustering { +struct Grouping { - static const uint32_t MAX_CLUSTERS= 16; + /* maximum number of groups + (limited by LaTeX output; defined in create_latex.cpp) */ + static const uint32_t MAX_GROUPS; - map< uint64_t, uint64_t > processesToClusters; - map< uint64_t, set > clustersToProcesses; + /* store process/group mappings */ + map< uint64_t, uint64_t > processesToGroups; + map< uint64_t, set > groupsToProcesses; + /* indicates whether grouping is enabled + (more processes than maximum number of groups) */ bool enabled; + Grouping() : enabled( false ) {} + ~Grouping() {} - Clustering() : enabled( false ) {} - ~Clustering() {} - - /* insert process into a cluster, return true if succeeded */ - bool insert( uint64_t cluster, uint64_t process ) { + /* insert process into a group, return true if succeeded */ + bool insert( uint64_t group, uint64_t process ) { /* insert the new entry if and only if there was no process with this ID before, - because every process can only be in one cluster */ + because every process can only be in one group */ pair< map< uint64_t, uint64_t >::const_iterator, bool> ret= - processesToClusters.insert( pair< uint64_t, uint64_t >( process, cluster ) ); + processesToGroups.insert( pair< uint64_t, uint64_t >( process, group ) ); if ( ret.second ) { - clustersToProcesses[ cluster ].insert( process ); + groupsToProcesses[ group ].insert( process ); - /* set indicator that clustering is enabled, if there are more than - one process within a cluster */ - if ( !enabled && 1 < clustersToProcesses[ cluster ].size() ) { + /* set indicator that grouping is enabled, if there are more than + one process within a group */ + if ( !enabled && 1 < groupsToProcesses[ group ].size() ) { enabled= true; } @@ -292,28 +357,28 @@ struct Clustering { return false; } - /* return the cluster ID for the given process ID, return 0 if not found */ - uint64_t process2cluster( uint64_t process ) const { + /* return the group ID for the given process ID, return 0 if not found */ + uint64_t process2group( uint64_t process ) const { - map< uint64_t, uint64_t >::const_iterator it= processesToClusters.find( process ); + map< uint64_t, uint64_t >::const_iterator it= processesToGroups.find( process ); - return ( processesToClusters.end() != it ) ? it->second : ( uint64_t) 0 ; + return ( processesToGroups.end() != it ) ? it->second : ( uint64_t) 0 ; } - /* return a const pointer to the set or NULL if there is no such cluster, + /* return a const pointer to the set or NULL if there is no such group, this is better than the [] operator which would create an empty set if a search goes negative */ - const set* cluster2processes( uint64_t cluster ) const { + const set* group2processes( uint64_t group ) const { - map< uint64_t, set >::const_iterator it= clustersToProcesses.find( cluster ); + map< uint64_t, set >::const_iterator it= groupsToProcesses.find( group ); - return ( clustersToProcesses.end() != it ) ? ( & it->second ) : NULL ; + return ( groupsToProcesses.end() != it ) ? ( & it->second ) : NULL ; } - /* return number of clusters */ - uint32_t numClusters( ) const { + /* return number of groups */ + uint32_t numGroups( ) const { - return clustersToProcesses.size(); + return groupsToProcesses.size(); } }; @@ -475,33 +540,36 @@ struct PendingCollective { struct AllData { + /* MPI-rank and number of analysis processes */ const uint32_t myRank; const uint32_t numRanks; +#ifdef OTFPROFILE_MPI + /* one instance of send/receive buffer to be re-used all the time */ + uint32_t packBufferSize; + char* packBuffer; +#endif /* OTFPROFILE_MPI */ + /* number and list of processes to be handled by every worker */ uint32_t myProcessesNum; uint32_t* myProcessesList; - /* one instance of send/receive buffer to be re-used all the time */ - uint32_t packbuffersize; - char* packbuffer; - /* program parameters */ Params params; /* progress information */ Progress progress; - /* store per-measure block runtimes */ - map< string, MeasureBlock > measureBlockMap; + /* runtime measurement */ + Measurement measurement; - /* clustering information for ranks */ - Clustering clustering; + /* grouping information for ranks */ + Grouping grouping; - /* trace context information; only significant on rank 0 */ + /* trace context information */ /* trace creator */ string creator; @@ -550,7 +618,7 @@ struct AllData { uint64_t timerResolution; /* key for OTF key-value-pairs with message matching information */ - uint64_t recvTimeKey; + uint64_t recvTimeKey; @@ -559,19 +627,27 @@ struct AllData { the following maps are for collecting individual data per trace rank, they will be summarized to the next set of maps */ - /* store per-function statistics over the ranks, Pair is */ + /* store per-function statistics over the ranks, Pair is + + in case of additional clustering, collect it to the master node such that + process clustering according to similar function call patterns can + be done */ map< Pair, FunctionData, ltPair > functionMapPerRank; - /* store per-counter statistics over the functions and ranks, - Triple is */ + /* store per-counter statistics over the functions and ranks, + Triple is */ map< Triple, CounterData, ltTriple > counterMapPerFunctionRank; - /* store send-recv statistics for P2P messages per communication pairs, - ATTENTION: Pair is and not ! Recently changed ! */ + /* store send-recv statistics for P2P messages per communication pairs, + Pair is */ map< Pair, MessageData, ltPair > messageMapPerRankPair; - /* store per-collop.-class statistics over the ranks, - Pair is */ + /* store send-recv statistics per rank without differenciating the + communication partners */ + map< uint64_t, MessageData > messageMapPerRank; + + /* store per-collop.-class statistics over the ranks, + Pair is */ map< Pair, CollectiveData, ltPair > collectiveMapPerRank; @@ -588,31 +664,37 @@ struct AllData { Pair is */ map< Pair, CounterData, ltPair > counterMapGlobal; - /* compact send-recv statistics for P2P messages per communicating clusters, - clusters are groups of neigbor ranks, - ATTENTION: Pair is and not ! Recently changed ! */ - map< Pair, MessageData, ltPair > messageMapPerClusterPair; + /* compact send-recv statistics for P2P messages per communicating groups, + groups are groups of neigbor ranks, + Pair is */ + map< Pair, MessageData, ltPair > messageMapPerGroupPair; - /* compact send-receive statistics per cluster without differenciating the - communication partners; cluster is a group of ranks */ - map< uint64_t, MessageData > messageMapPerCluster; + /* compact send-receive statistics per group without differenciating the + communication partners; group is a group of ranks */ + map< uint64_t, MessageData > messageMapPerGroup; /* store per-speed-bin statistics over the length-bins of P2P messages, Pair is where bin is log2() */ map< Pair, MessageSpeedData, ltPair > messageSpeedMapPerLength; - /* compact collective operation statistics per cluster; - cluster is a group of ranks, Pair is */ - map< Pair, CollectiveData, ltPair > collectiveMapPerCluster; + /* compact collective operation statistics per group; + group is a group of ranks, Pair is */ + map< Pair, CollectiveData, ltPair > collectiveMapPerGroup; - AllData( uint32_t my_rank, uint32_t num_ranks ) : - myRank(my_rank), numRanks(num_ranks), - myProcessesNum(0), myProcessesList(NULL), - packbuffersize(0), packbuffer(NULL), timerResolution(0), - recvTimeKey(0) {} + AllData( uint32_t my_rank= 0, uint32_t num_ranks= 1 ) : + myRank(my_rank), numRanks(num_ranks), myProcessesNum(0), + myProcessesList(NULL), timerResolution(0), recvTimeKey(0) { + +#ifdef OTFPROFILE_MPI + packBufferSize= 0; + packBuffer= NULL; +#endif /* OTFPROFILE_MPI */ + + } + ~AllData() { @@ -620,29 +702,35 @@ struct AllData { free( myProcessesList ); myProcessesList= NULL; - packbuffersize= 0; - free( packbuffer ); - packbuffer= NULL; - }; +#ifdef OTFPROFILE_MPI + packBufferSize= 0; + if ( packBuffer ) { + free( packBuffer ); + packBuffer= NULL; + } +#endif /* OTFPROFILE_MPI */ + } + +#ifdef OTFPROFILE_MPI char* guaranteePackBuffer( uint32_t size ) { - if ( packbuffersize < size ) { + if ( packBufferSize < size ) { - packbuffersize= size; - packbuffer= (char*) realloc( packbuffer, packbuffersize * sizeof(char) ); - assert( NULL != packbuffer ); + packBufferSize= size; + packBuffer= (char*) realloc( packBuffer, packBufferSize * sizeof(char) ); + assert( NULL != packBuffer ); } - return packbuffer; + return packBuffer; } char* freePackBuffer( ) { - free( packbuffer ); - packbuffer= NULL; - packbuffersize= 0; + free( packBuffer ); + packBuffer= NULL; + packBufferSize= 0; return NULL; } @@ -650,8 +738,9 @@ struct AllData { char* getPackBuffer( ) { - return packbuffer; + return packBuffer; } +#endif /* OTFPROFILE_MPI */ }; diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/mpi/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/mpi/Makefile.am new file mode 100644 index 0000000000..f34fe55a0d --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/mpi/Makefile.am @@ -0,0 +1,19 @@ +if AMBUILDBINARIES +bin_PROGRAMS = otfprofile-mpi +endif + +OTFPROFILESRCDIR = $(srcdir)/.. +include $(srcdir)/../Makefile.common + +CXX = $(MPICXX) + +INCLUDES = $(COMMONINCLUDES) $(MPI_INCLUDE_LINE) + +otfprofile_mpi_CXXFLAGS = -DOTFPROFILE_MPI $(COMMONCXXFLAGS) $(MPICXXFLAGS) +otfprofile_mpi_LDADD = $(COMMONLDADD) $(MPI_LIB_LINE) +otfprofile_mpi_DEPENDENCIES = $(COMMONDEPENDENCIES) +otfprofile_mpi_SOURCES = \ + $(COMMONSOURCES) \ + $(OTFPROFILESRCDIR)/reduce_data.h \ + $(OTFPROFILESRCDIR)/reduce_data.cpp + diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.cpp index 9f8bd4ed07..0077fcd12f 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.cpp @@ -1,718 +1,1239 @@ /* This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. - Authors: Andreas Knuepfer, Denis Huenich, Johannes Spazier + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz */ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef _OPENMP -# include -# define MY_THREAD omp_get_thread_num() -# define THREAD_NUM omp_get_num_threads() -#else -# define MY_THREAD 0 -# define THREAD_NUM 1 -#endif - -#if defined(HAVE_SYS_TIME_H) && HAVE_SYS_TIME_H -# include -#endif -#ifdef _WIN32 -# include -#endif #include +#include +#include +#include +#include +#include +#include +#include -#include "OTF_inttypes.h" +#include "otf.h" #include "OTF_Platform.h" -#include "Handler.h" -#include "Summary.h" -#include "Prodtex.h" -#include "CSVParse.h" +#include "collect_data.h" +#include "otfprofile.h" +#include "summarize_data.h" +#include "clustering.h" +#include "create_csv.h" +#include "create_latex.h" + + +#ifdef OTFPROFILE_MPI +# include "reduce_data.h" + + /* define the following macro to synchronize the error indicator with all + worker ranks (only significant for otfprofile-mpi) + + This enforces that all ranks will be terminated by calling MPI_Abort if + anyone fails. This is necessary to work around a bug that appears at least + with Open MPI where calling MPI_Abort on one task doesn't terminate all + other ranks. */ +# define SYNC_ERROR +#endif /* OTFPROFILE_MPI */ + +/* define the following macro to print result data to stdout */ +/*#define SHOW_RESULTS*/ + +/* define the following macro to enable support for synthetic input data for +CLINKAGE clustering (introduces options -R and -F ) */ +/*#define CLINKAGE_SYNTHDATA*/ + using namespace std; -#define HELPTEXT "" \ -" \n" \ -" otfprofile - generate a profile of a trace \n" \ -" in Latex or CSV format. \n" \ -" \n" \ -" options: \n" \ -" -h, --help show this help message \n" \ -" -V show OTF version \n" \ -" -b readbuffer size \n" \ -" -f max. number of filehandles to use \n" \ -" -i specify an input trace name \n" \ -" -csv specify an input csv-file trace name \n" \ -" (as produced by otfprofiler before), \n" \ -" don't use -i and -csv together \n" \ -" -o specify the path for the output files \n" \ -" -tex writes Latex output in different flavours: \n" \ -" (all,func,p2p,collop,none) \n" \ -" -notex disable Latex output \n" \ -" -nops disable Postscript output \n" \ -" -var also show statistic variance \n" \ -" -top max. number of functions shown (default 50) \n" \ -" -progress show progress information \n" \ -" -sum reads only summarized information, no events \n" \ -" -omp specify the number of threads which are used \n" \ -" while reading the otf-file parallel \n" \ -" Note: This option overrides the environment \n" \ -" variable OMP_NUM_THREADS, \n" \ -" only useful if compiled with OpenMP support \n" \ -" -lite ignore P2P and collective communication \n" \ -" (saves memory for highly parallel cases \n" \ -" \n" \ -" \n" \ - - -/* global variable to switch operation mode, see '-lite' command line switch */ -bool lite= false; - - -int main( int argc, const char** argv ) -{ - struct timeval tStart,tEnd; - - int i; - OTF_FileManager* manager; - OTF_Reader* reader; - OTF_HandlerArray* handlers; - int buffersize = 1024*1024; - int nfiles = 300; - string output_path("./"); - string output_dir; - char* file = NULL; - fstream out; - bool check = false; // if check == true then -i or -csv is set - bool csv_on = false; - vector counter_names; - global_data* gd = new global_data; - bool sum = false; // if true -> read summary - - /* show how many events were read */ - uint64_t global_events = 0; - double global_percent = 0; - - /* what to read? */ - int tex = TEX_ALL; - bool status = false; - bool file_rename = false; - bool ps = true; - gd->TOP_FUNC = 50; - gd->var = false; - gd->vis = false; - gd->prog = false; - gd->prog_start = (uint64_t) - 1; - gd->prog_end = 0; - gd->min_time = 0; - gd->max_time = (uint64_t) - 1; - gd->ticks = 1; - gd->clear_temp = false; - uint64_t ret_read; - - int num_p = 1; - global_data** data_array; - uint32_t* cpu2thread; - - char *OMP_NUM_THREADS = getenv("OMP_NUM_THREADS"); - if(OMP_NUM_THREADS != NULL) { - num_p = atoi(OMP_NUM_THREADS); - if(num_p == 0) { - num_p = 1; - } - } - - /* global variable: bool lite */ - lite= false; - - - if ( 1 >= argc ) { - - cout << HELPTEXT; - return 1; - } - - for(i = 1; i < argc; i++) - { - if ((0 == strcmp("-b", argv[i])) && (i+1 < argc)) - { - buffersize = atoi(argv[i+1]); - ++i; - } - else if ((0 == strcmp("-tex", argv[i])) && (i+1 < argc)) - { - if (0 == strcmp("all", argv[i+1])) - tex = TEX_ALL; - else if (0 == strcmp("allplot", argv[i+1])) - tex = TEX_ALLPLOT; - else if (0 == strcmp("func", argv[i+1])) - tex = TEX_FUNC; - else if (0 == strcmp("p2p", argv[i+1])) - tex = TEX_P2P; - else if (0 == strcmp("collop", argv[i+1])) - tex = TEX_COLLOP; - else if (0 == strcmp("none", argv[i+1])) - tex = TEX_OFF; - else - cerr << "\nWrong argument given after -tex." << endl; - ++i; - } - else if ( 0 == strcmp("-notex", argv[i]) ) - { - tex= TEX_OFF; - } - else if ((0 == strcmp("-f", argv[i])) && (i+1 < argc)) - { - nfiles = atoi(argv[i+1]); - ++i; - } -#if 0 - else if (0 == strcmp("-c", argv[i]) && (i+1 < argc)) - { - i++; - int test_end; - while((0 != (test_end = strcmp("end", argv[i]))) && (i+1 < argc)) - { - string s(strdup(argv[i])); - transform(s.begin(), s.end(), s.begin(), ::toupper); - counter_names.push_back(s); - i++; - } - if(test_end != 0) - { - cerr << "Parameter -c : There was missing an \"end\"" << endl; - exit(1); - } - } -#endif /* 0 */ - else if ((0 == strcmp("-min", argv[i])) && (i+1 < argc)) - { - gd->min_time = atoi(argv[i+1]); - ++i; - } - else if ((0 == strcmp("-max", argv[i])) && (i+1 < argc)) - { - gd->max_time = atoi(argv[i+1]); - ++i; - } - else if (0 == strcmp("-progress", argv[i])) - { - status = true; - } - else if (0 == strcmp("--help", argv[i]) || 0 == strcmp("-h", argv[i])) - { - cout << HELPTEXT; - return 0; - } - else if ( 0 == strcmp( "-V", argv[i] ) ) - { - - printf( "%u.%u.%u \"%s\"\n", OTF_VERSION_MAJOR, OTF_VERSION_MINOR, - OTF_VERSION_SUB, OTF_VERSION_STRING); - exit( 0 ); - } - else if ((0 == strcmp("-i", argv[i])) && (i+1 < argc)) - { - if(!check) - { - file = strdup(argv[i+1]); - check = true; - } - ++i; - } - else if ((0 == strcmp("-csv", argv[i])) && (i+1 < argc)) - { - if(!check) - { - file = strdup(argv[i+1]); - check = true; - csv_on = true; - } - ++i; - } - else if ((0 == strcmp("-o", argv[i])) && (i+1 < argc)) - { - output_path = string(strdup(argv[i+1])); - ++i; - } - else if ((0 == strcmp("-d", argv[i])) && (i+1 < argc)) - { - output_path = string(strdup(argv[i+1])); - file_rename = true; - ++i; - } - else if (0 == strcmp("-var", argv[i])) - { - gd->var = true; - } - else if (0 == strcmp("-vis", argv[i])) - { - gd->vis = true; - } - else if ((0 == strcmp("-top", argv[i])) && (i+1 < argc)) - { - gd->TOP_FUNC = atoi(argv[i+1]); - ++i; - } - else if (0 == strcmp("-sum", argv[i])) - { - sum = true; - } - else if (0 == strcmp("-nops", argv[i])) - { - ps = false; - } - else if (0 == strcmp("-omp", argv[i]) && (i+1 < argc)) - { -# ifdef _OPENMP - num_p = atoi(argv[i+1]); - ++i; -# else - cerr << "\nThe option \"-omp\" has no effect because you compiled without OpenMP-Support.\nInstall OpenMP and recompile otfprofile to use this option.\n" << endl; - ++i; -# endif - } - else if (0 == strcmp("-lite", argv[i])) - { - lite= true; - } - else - { - cerr << "ERROR: Unknown argument: " << argv[i] << endl;; - exit(1); - } - } - - if (nfiles < 1) - { - cerr << "ERROR: need at least one input, aborting" << endl; - exit(1); - } - - if(file == NULL) - { - cerr << "ERROR: no file given" << endl; - cerr << "To give a file, use \"-i filename\". " << endl; - exit(1); - } - -# ifdef _OPENMP - uint32_t num_threads = num_p; -# else - uint32_t num_threads = 1; -# endif - - gd->filename.assign(OTF_basename(file)); - output_dir = output_path; - if(!file_rename) - { - output_path.append(gd->filename); - } - gd->filename_path = output_path; - - if(csv_on) - { - Glob_Maps glob_maps; - parse_csv(gd->sum_container, file, glob_maps); - free(file); - } - else - { - manager= OTF_FileManager_open(nfiles); - assert(NULL != manager); - - handlers = OTF_HandlerArray_open(); - - reader = OTF_Reader_open(file, manager); - assert(NULL != reader); - - OTF_MasterControl* master = OTF_Reader_getMasterControl(reader); - assert(NULL != master); - uint32_t num_cpus = OTF_MasterControl_getrCount(master); - uint32_t num_files = OTF_MasterControl_getCount(master); - gd->num_cpu = num_cpus; - - if( (num_threads > num_files) && (num_threads <= num_cpus) ) { - cerr << "Warning: Number of working threads greater than number of files. That could reduce the performance." << endl; - } - - if(num_threads > num_cpus) { - num_threads = num_cpus; - cerr << "Remark: It is not possible to have more working threads than processes in the otf-file!" << endl; - } - - uint32_t current_reader = num_threads; -# ifdef _OPENMP - omp_set_num_threads(num_threads); -# endif - - /* get the list of processes that should be distributed over the analysis threads */ - - uint32_t* processlist; - uint32_t pos= 0; - processlist= (uint32_t*) malloc( num_cpus * sizeof(uint32_t) ); - assert( NULL != processlist ); - - for( uint32_t k= 0; k < num_files; k++ ) { - - OTF_MapEntry* entry; - entry= OTF_MasterControl_getEntryByIndex( master, k ); - assert( NULL != entry ); - - for( uint32_t m= 0; m < entry->n ; m++ ) { - - processlist[pos]= entry->values[m]; - ++pos; - } - } - - data_array = (global_data**) malloc(num_threads * sizeof(global_data*)); - cpu2thread = (uint32_t*) malloc(num_cpus * sizeof(uint32_t)); - - /* Definitons */ - OTF_Reader_setBufferSizes(reader, buffersize); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefCreator, OTF_DEFCREATOR_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFCREATOR_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefVersion, OTF_DEFVERSION_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFVERSION_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefTimerResolution, OTF_DEFTIMERRESOLUTION_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFTIMERRESOLUTION_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefFunction, OTF_DEFFUNCTION_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFFUNCTION_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefFunctionGroup, OTF_DEFFUNCTIONGROUP_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFFUNCTIONGROUP_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefProcess, OTF_DEFPROCESS_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFPROCESS_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefProcessGroup, OTF_DEFPROCESSGROUP_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFPROCESSGROUP_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefCollectiveOperation, OTF_DEFCOLLOP_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFCOLLOP_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefCounter, OTF_DEFCOUNTER_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_DEFCOUNTER_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleFunctionSummary, OTF_FUNCTIONSUMMARY_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_FUNCTIONSUMMARY_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleMessageSummary, OTF_MESSAGESUMMARY_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_MESSAGESUMMARY_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleCollopSummary, OTF_COLLOPSUMMARY_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) gd, OTF_COLLOPSUMMARY_RECORD); - - /* read definitions */ - ret_read = OTF_Reader_readDefinitions(reader, handlers); - if( ret_read == OTF_READ_ERROR ) { - cerr << "Cannot read definition file. It seems to be damaged. Abort." << endl; - return 0; - } - - gd->sum_container.adddef_Bin(1); - - /* read statistics */ - if(sum) { - ret_read = OTF_Reader_readStatistics(reader, handlers); - if( ret_read == OTF_READ_ERROR ) { - cerr << "Cannot read statistics. The tracefile seems to be damaged. Abort." << endl; - return 0; - } - } - - OTF_Reader_close(reader); - OTF_HandlerArray_close(handlers); - - OTF_FileManager_close(manager); - - if(!sum) { - - global_data data = *gd; - - /* CPU-Verteilung ermitteln */ - cout << "Threads: " << num_threads << endl; - uint32_t *threads = (uint32_t*) malloc(num_threads * sizeof(uint32_t)); - - int index = 0; - for(uint32_t k=0; k invers_proc_map; - - uint32_t maxfiles = ( nfiles / THREAD_NUM ) + - ( (uint32_t)MY_THREAD < (nfiles % (uint32_t)THREAD_NUM) ? 1 : 0 ); - - uint32_t start = 0; - uint32_t end = 0; -# ifdef _OPENMP - for(int k=0; k 0) { - read = OTF_Reader_readEventsUnsorted(reader, handlers); - if( read == OTF_READ_ERROR ) { - cerr << "Cannot read events. The tracefile seems to be damaged. Abort." << endl; - exit(0); - } - -# ifdef _OPENMP - if(status) { -# pragma omp barrier - } -# endif - if( (read <= 0) && (ready == false) ) { -# ifdef _OPENMP -# pragma omp critical(decrement) -# endif - { - current_reader--; - } - ready = true; - } - - if(ready == false) { - for(uint32_t k=start; k<=end; k++) { - Process *p = &(&data)->p_map[ processlist[k] ]; - p->calc_mbyte_per_sec(processlist[k], invers_proc_map, cpu2thread, data_array); - } - - if(status) { - events += read; - OTF_Reader_eventBytesProgress( reader, &min, &cur, &max ); - percent = 100.0 * ((double) (cur - min)) / ((double) (max - min)); -# ifdef _OPENMP -# pragma omp critical(progress) -# endif - { - global_events += read; - global_percent += percent; - } - } - } - - if ( status ) { -# ifdef _OPENMP -# pragma omp barrier -# endif - } - - if ( status && ready == false ) { -# ifdef _OPENMP -# pragma omp single nowait -# endif - { - global_percent += ( num_threads - current_reader ) * 100.0; - cerr << " " << (global_percent / (double) num_threads) << "% of all events read, events read: " << global_events << "\r"; - global_percent = 0; - } - } - } - - OTF_Reader_setRecordLimit( reader, (uint64_t) OTF_READ_MAXRECORDS ); - - OTF_Reader_close(reader); - OTF_HandlerArray_close(handlers); - OTF_FileManager_close(manager); - - set_time_sum_container(&data); - -# ifdef _OPENMP -# pragma omp barrier -# endif - for(uint32_t k=start; k<=end; k++) { - Process *p = &(&data)->p_map[ processlist[k] ]; - p->calc_mbyte_per_sec(processlist[k], invers_proc_map, cpu2thread, data_array); - } - -# ifdef _OPENMP -# pragma omp critical(merge) -# endif - { - gd->sum_container.mergeContainer(data.sum_container); - mergeProgTime(gd, &data); - } - -# ifdef _OPENMP -# pragma omp barrier -# endif - - } /* end parallel */ - - if(status) - { - cerr << "\n all events read" << endl; - - gettimeofday(&tEnd,0); - - cerr << "\n time used for profiling : " << - ((double) tEnd.tv_sec + (double) tEnd.tv_usec / 1000000) - - ( (double) tStart.tv_sec + (double) tStart.tv_usec / 1000000) << endl; - - cerr << "\n producing files..."; - } - - } // end !sum - free(file); - - out.setf(ios::fixed, ios::floatfield); - out.precision(6); - out.open((gd->filename_path + "_func.csv").c_str(), ios::out | ios::trunc); - gd->sum_container.csv_Function(out, 1); - out.close(); - out.open((gd->filename_path + "_p2p.csv").c_str(), ios::out | ios::trunc); - gd->sum_container.csv_P2P(out, 1); - out.close(); - out.open((gd->filename_path + "_collop.csv").c_str(), ios::out | ios::trunc); - gd->sum_container.csv_CollOp(out, 1); - out.close(); - out.open((gd->filename_path + "_data.csv").c_str(), ios::out | ios::trunc); - gd->sum_container.csv_Data(out, 1); - out.close(); - } - - if(tex != TEX_OFF) { - prod_tex(tex, gd, counter_names, sum); - - if(ps == true) { - string exe = "cd " + output_dir + "; latex -halt-on-error " + gd->filename + - "_result.tex > /dev/null 2>&1"; - int ret = system(exe.c_str()); - if(ret != 0) { - //cerr << " Warning: could not create ps-file! latex not installed." << endl; - } else { - exe = "cd " + output_dir + "; dvips " + gd->filename + "_result.dvi > /dev/null 2>&1"; - ret = system(exe.c_str()); - if(ret != 0) { - //cerr << " Warning: could not create ps-file! dvips not installed." << endl; - } - } - } - } - - cerr << "\tfinished." << endl; - - return 0; + +/* name of program executable */ +#ifdef OTFPROFILE_MPI + const string ExeName= "otfprofile-mpi"; +#else /* OTFPROFILE_MPI */ + const string ExeName= "otfprofile"; +#endif /* OTFPROFILE_MPI */ + + +/* parse command line options +return 0 if succeeded, 1 if help text or version showed, -1 if failed */ +static int parse_command_line( int argc, char** argv, AllData& alldata ); + +/* assign trace processes to analysis processes explicitly in order to allow +sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc. +in the future, return true if succeeded */ +static bool assign_procs( AllData& alldata ); + +#ifdef SHOW_RESULTS + /* show result data on stdout */ + static void show_results( const AllData& alldata ); +#endif /* SHOW_RESULTS */ + +/* show helptext */ +static void show_helptext( void ); + + +int main( int argc, char** argv ) { + + int ret= 0; + +#ifdef OTFPROFILE_MPI + /* start MPI */ + + int my_rank; + int num_ranks; + + MPI_Init( &argc, &argv ); + + MPI_Comm_rank(MPI_COMM_WORLD, &my_rank ); + MPI_Comm_size(MPI_COMM_WORLD, &num_ranks ); + + AllData alldata( my_rank, num_ranks ); +#else /* OTFPROFILE_MPI */ + AllData alldata; +#endif /* OTFPROFILE_MPI */ + + do { + + /* step 0: parse command line options */ + if ( 0 != ( ret= parse_command_line( argc, argv, alldata ) ) ) { + + if ( 1 == ret ) { + + ret= 0; + + } else { /* -1 == ret */ + + ret= 1; + + } + + break; + + } + + VerbosePrint( alldata, 1, true, "initializing\n" ); + + /* step 1: assign trace processes to analysis processes */ + if ( !assign_procs( alldata ) ) { + + ret= 1; + break; + + } + + /* step 2: collect data by reading input trace file */ + if ( !CollectData( alldata ) ) { + + ret= 1; + break; + + } + +#ifndef SHOW_RESULTS + if ( alldata.params.create_tex ) +#endif /* SHOW_RESULTS */ + { + /* step 3: summarize data; every analysis rank summarizes its local + data independently; only necessary if producing LaTeX output or + showing result data on stdout is enabled */ + if ( !SummarizeData( alldata ) ) { + + ret= 1; + break; + + } + + } + +#ifdef OTFPROFILE_MPI + if ( 1 < alldata.numRanks && + ( alldata.params.create_tex || + alldata.params.clustering.enabled ) ) { + + /* step 4: reduce data to master; summarized data for producing + LaTeX output; per-process/function statistics for additional + clustering */ + if ( !ReduceData( alldata ) ) { + + ret= 1; + break; + + } + + } +#endif /* OTFPROFILE_MPI */ + + /* step 5: produce outputs */ + + if ( alldata.params.create_csv ) { + + /* step 5.1: create CSV output */ + if ( !CreateCSV( alldata ) ) { + + ret= 1; + break; + + } + + } + + if ( alldata.params.create_tex && 0 == alldata.myRank ) { + + /* step 5.2: create LaTeX output */ + if ( !CreateTex( alldata ) ) { + + ret= 1; + break; + + } + + } + +#ifdef SHOW_RESULTS + /* step 5.3: show result data on stdout */ + + if ( 0 == alldata.myRank ) { + + show_results( alldata ); + + } +#endif /* SHOW_RESULTS */ + + if ( alldata.params.clustering.enabled ) { + + /* step 6: do additional process clustering */ + if ( !ProcessClustering( alldata ) ) { + + ret= 1; + break; + + } + + } + + } while( false ); + + if ( 0 == ret ) { + + if ( 0 == alldata.myRank ) { + + /* print runtime measurement results to stdout */ + PrintMeasurement( alldata ); + + } + + VerbosePrint( alldata, 1, true, "done\n" ); + + } + +#ifdef OTFPROFILE_MPI + /* either finalize or abort on error */ + if ( 0 == ret ) { + + MPI_Finalize(); + + } else { + + MPI_Abort( MPI_COMM_WORLD, ret ); + + } +#endif /* OTFPROFILE_MPI */ + + return ret; } + + +static int parse_command_line( int argc, char** argv, AllData& alldata ) { + + int ret= 0; + + Params& params= alldata.params; + + /* parse command line options */ + + enum { ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID }; + int parse_error= ERR_OK; + + int i; + + for ( i = 1; i < argc; i++ ) { + + /* -h, --help */ + if ( 0 == strcmp( "-h", argv[i] ) || + 0 == strcmp( "--help", argv[i] ) ) { + + if ( 0 == alldata.myRank ) { + + show_helptext(); + + } + + ret= 1; + break; + + /* -V */ + } else if ( 0 == strcmp( "-V", argv[i] ) ) { + + if ( 0 == alldata.myRank ) { + + printf( "%u.%u.%u \"%s\"\n", + OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB, + OTF_VERSION_STRING ); + + } + + ret= 1; + break; + + /* -v */ + } else if ( 0 == strcmp( "-v", argv[i] ) ) { + + params.verbose_level++; + + /* -p */ + } else if ( 0 == strcmp( "-p", argv[i] ) ) { + + params.progress= true; + + /* -f */ + } else if ( 0 == strcmp( "-f", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + int tmp= atoi( argv[i+1] ); + if ( 0 >= tmp ) { + + parse_error= ERR_ARG_INVALID; + break; + } + + params.max_file_handles= tmp; + i++; + + /* -b */ + } else if ( 0 == strcmp( "-b", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + int tmp= atoi( argv[i+1] ); + if ( 0 >= tmp ) { + + parse_error= ERR_ARG_INVALID; + break; + } + + params.buffer_size= tmp; + i++; + + /* -o */ + } else if ( 0 == strcmp( "-o", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + params.output_file_prefix= argv[++i]; + + /* -g */ + } else if ( 0 == strcmp( "-g", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + int tmp= atoi( argv[i+1] ); + if ( 1 > tmp || (int)Grouping::MAX_GROUPS < tmp ) { + + parse_error= ERR_ARG_INVALID; + break; + } + + params.max_groups= tmp; + i++; + + /* -c */ + } else if ( 0 == strcmp( "-c", argv[i] ) ) { + + params.clustering.enabled= true; + + /* --cluster */ + } else if ( 0 == strcmp( "--cluster", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + if ( 0 == strcmp( "CLINKAGE", argv[i+1] ) ) { + + params.clustering.alg= CLUSTER_ALG_CLINKAGE; + + } else if ( 0 == strcmp( "KMEANS", argv[i+1] ) ) { + + params.clustering.alg= CLUSTER_ALG_KMEANS; + + } else { + + parse_error= ERR_ARG_INVALID; + break; + + } + + params.clustering.enabled= true; + i++; + + /* -m */ + } else if ( 0 == strcmp( "-m", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + params.clustering.enabled= true; + params.clustering.map_file_name= argv[++i]; + + /* -s */ + } else if ( 0 == strcmp( "-s", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + params.clustering.enabled= true; + params.clustering.shrink= true; + params.clustering.shrink_output_prefix= argv[++i]; + + /* -H */ + } else if ( 0 == strcmp( "-H", argv[i] ) ) { + + params.clustering.enabled= true; + params.clustering.alg= CLUSTER_ALG_CLINKAGE; + params.clustering.hard_grouping= true; + + /* -q */ + } else if ( 0 == strcmp( "-q", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + double tmp= atof( argv[i+1] ); + if( 0.0 > tmp || 1.0 < tmp ) { + + parse_error= ERR_ARG_INVALID; + break; + } + + params.clustering.enabled= true; + params.clustering.alg= CLUSTER_ALG_CLINKAGE; + params.clustering.quality_threshold= tmp; + i++; + +#ifdef CLINKAGE_SYNTHDATA + /* -R */ + } else if ( 0 == strcmp( "-R", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + int tmp= atoi( argv[i+1] ); + if( 0 >= tmp ) { + + parse_error= ERR_ARG_INVALID; + break; + } + + params.clustering.enabled= true; + params.clustering.alg= CLUSTER_ALG_CLINKAGE; + params.clustering.synth_data= true; + params.clustering.synth_ranks_num= tmp; + i++; + + /* -F */ + } else if ( 0 == strcmp( "-F", argv[i] ) ) { + + if ( argc - 1 == i ) { + + parse_error= ERR_ARG_MISSING; + break; + + } + + int tmp= atoi( argv[i+1] ); + if( 0 >= tmp ) { + + parse_error= ERR_ARG_INVALID; + break; + } + + params.clustering.enabled= true; + params.clustering.alg= CLUSTER_ALG_CLINKAGE; + params.clustering.synth_data= true; + params.clustering.synth_funcs_num= tmp; + i++; +#endif /* CLINKAGE_SYNTHDATA */ + + /* --stat */ + } else if ( 0 == strcmp( "--stat", argv[i] ) ) { + + params.read_from_stats= true; + + /* --csv */ + } else if ( 0 == strcmp( "--csv", argv[i] ) ) { + + params.create_csv= true; + + /* --nocsv */ + } else if ( 0 == strcmp( "--nocsv", argv[i] ) ) { + + params.create_csv= false; + + /* --tex */ + } else if ( 0 == strcmp( "--tex", argv[i] ) ) { + + params.create_tex= true; + + /* --notex */ + } else if ( 0 == strcmp( "--notex", argv[i] ) ) { + + params.create_tex= false; + +#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4 + /* --pdf */ + } else if ( 0 == strcmp( "--pdf", argv[i] ) ) { + + params.create_tex= true; + params.create_pdf= true; + + /* --nopdf */ + } else if ( 0 == strcmp( "--nopdf", argv[i] ) ) { + + params.create_pdf= false; +#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */ + + /* input file or unknown option */ + } else { + + if ( 0 == params.input_file_prefix.length() ) { + + char* tmp= OTF_stripFilename( argv[i] ); + if ( tmp ) { + + params.input_file_prefix= tmp; + free( tmp ); + + } + + } else { + + parse_error= ERR_OPT_UNKNOWN; + break; + + } + + } + + } + + /* show specific message on error */ + if ( ERR_OK != parse_error ) { + + if ( 0 == alldata.myRank ) { + + switch( parse_error ) { + + case ERR_OPT_UNKNOWN: + + cerr << "ERROR: Unknown option '" << argv[i] << "'." + << endl; + break; + + case ERR_ARG_MISSING: + + cerr << "ERROR: Expected argument for option '" << argv[i] + << "'." << endl; + break; + + case ERR_ARG_INVALID: + + cerr << "ERROR: Invalid argument for option '" << argv[i] + << "'." << endl; + break; + + default: + + break; + + } + + } + + ret= -1; + + /* show help text if no input trace file is given */ + } else if ( 0 == params.input_file_prefix.length() ) { + + if ( 0 == alldata.myRank ) { + + show_helptext(); + + } + + ret= 1; + + } + + return ret; +} + + +static bool assign_procs( AllData& alldata ) { + + bool error= false; + + OTF_FileManager* manager= NULL; + OTF_MasterControl* master= NULL; + + if ( 0 == alldata.myRank ) { + + /* the master reads OTF master control of input trace file */ + + manager= OTF_FileManager_open( 1 ); + assert( manager ); + + master= OTF_MasterControl_new( manager ); + assert( master ); + + int master_read_ret= + OTF_MasterControl_read( master, + alldata.params.input_file_prefix.c_str() ); + + /* that's the first access to the input trace file; show tidy error + message if failed */ + if ( 0 == master_read_ret ) { + + cerr << "ERROR: Unable to open file '" + << alldata.params.input_file_prefix << ".otf' for reading." + << endl; + error= true; + } + } + +#ifdef OTFPROFILE_MPI + /* broadcast error indicator to workers because Open MPI had all + ranks except rank 0 waiting endlessly in the MPI_Recv, when the '.otf' file + was absent. */ + if ( SyncError( alldata, error, 0 ) ) { + + return false; + } +#endif /* OTFPROFILE_MPI */ + + if ( 0 == alldata.myRank ) { + + /* fill the global array of processes */ + + alldata.myProcessesNum= OTF_MasterControl_getrCount( master ); + alldata.myProcessesList= + (uint32_t*)malloc( alldata.myProcessesNum * sizeof(uint32_t) ); + assert( alldata.myProcessesList ); + + uint32_t i= 0; + uint32_t j= 0; + + while( true ) { + + OTF_MapEntry* entry = + OTF_MasterControl_getEntryByIndex( master, i ); + + if( NULL == entry) break; + + for ( uint32_t k= 0; k< entry->n; k++ ) { + + alldata.myProcessesList[j]= entry->values[k]; + j++; + } + + i++; + } + assert( alldata.myProcessesNum == j ); + + /* close OTF master control and file manager */ + OTF_MasterControl_close( master ); + OTF_FileManager_close( manager ); + + /* DEBUG */ + /*cerr << "processes in trace: "; + for ( uint32_t k= 0; k < alldata.myProcessesNum; k++ ) { + + cerr << alldata.myProcessesList[k] << " "; + } + cerr << endl;*/ + } + + /* now we may re-arrange the process list for a better layout + - note that this layout is optimal to re-use OTF streams + if there are multiple processes per stream + - one may read the OTF definitions to know how to re-arrange */ + +#ifdef OTFPROFILE_MPI + if ( 0 == alldata.myRank ) { + + /* get number of ranks per worker, send to workers */ + + /* remaining ranks and remaining workers */ + uint32_t r_ranks= alldata.myProcessesNum; + uint32_t r_workers= alldata.numRanks; + + uint32_t pos= 0; + bool warn_for_empty= true; + for ( int w= 0; w < (int)alldata.numRanks; w++ ) { + + uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ? + ( r_ranks / r_workers +1 ) : ( r_ranks / r_workers ); + + if ( ( 0 == n ) && warn_for_empty ) { + + cerr << "Warning: more analysis ranks than trace processes, " + << "ranks " << w << " to " << alldata.numRanks -1 + << " are unemployed" << endl; + + warn_for_empty= false; + } + + if ( 0 == w ) { + + /* for master itself simply truncate processesList, + don't send and receive */ + alldata.myProcessesNum= n; + + } else { + + MPI_Send( &n, 1, MPI_INT, w, 2, MPI_COMM_WORLD ); + + MPI_Send( alldata.myProcessesList + pos, n, MPI_INT, + w, 3, MPI_COMM_WORLD ); + + } + + pos += n; + r_ranks -= n; + r_workers -= 1; + } + + } else { /* 0 != alldata.myRank */ + + /* workers receive number and sub-list of their ranks to process */ + + alldata.myProcessesNum= 0; + + MPI_Status status; + + MPI_Recv( &alldata.myProcessesNum, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, + &status ); + + alldata.myProcessesList= (uint32_t*)malloc( + alldata.myProcessesNum * sizeof(uint32_t) ); + assert( alldata.myProcessesList ); + + MPI_Recv( alldata.myProcessesList, alldata.myProcessesNum, MPI_INT, 0, + 3, MPI_COMM_WORLD, &status ); + + } + + /* DEBUG */ + /*cerr << " worker " << alldata.myRank << " handles: "; + for ( uint32_t k= 0; k < alldata.myProcessesNum; k++ ) { + + cerr << alldata.myProcessesList[k] << " "; + } + cerr << endl;*/ +#endif /* OTFPROFILE_MPI */ + + return !error; +} + + +#ifdef SHOW_RESULTS +static void show_results( const AllData& alldata ) { + +# define PRINT_MIN_MAX_AVG(v,u) (v.cnt) << " x avg " << ((double)(v.sum))/(v.cnt) << "(" << (v.min) << "-" << (v.max) << ") " << u + + cout << endl << " global data per function: " << endl; + { + map< uint64_t, FunctionData >::const_iterator it= alldata.functionMapGlobal.begin(); + map< uint64_t, FunctionData >::const_iterator itend= alldata.functionMapGlobal.end(); + while ( itend != it ) { + + cout << " global function " << it->first << " -> " ; + if ( it->second.count.cnt ) { + cout << "\t"<< + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") << + " exc: " << PRINT_MIN_MAX_AVG(it->second.excl_time,"[t]") << + " inc: " << PRINT_MIN_MAX_AVG(it->second.incl_time,"[t]") << endl; + } + + it++; + } + } + + cout << endl << " global counter data per function: " << endl; + { + map< Pair, FunctionData, ltPair >::const_iterator it= alldata.counterMapGlobal.begin(); + map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.counterMapGlobal.end(); + while ( itend != it ) { + + cout << " global counter " << it->first.a << " per function " << it->first.b << " -> " << endl; + if ( it->second.count.cnt ) { + cout << "\t"<< + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]"); + cout << " exc: "; + if ( it->second.excl_time.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.excl_time,"[#]"); + } else { + cout << "0 [#]"; + } + cout << " inc: "; + if ( it->second.incl_time.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.incl_time,"[#]"); + } else { + cout << "0 [#]"; + } + cout << endl; + } + + it++; + } + } + + cout << endl << " global message data per group pair: " << endl; + { + map< Pair, MessageData >::const_iterator it= alldata.messageMapPerGroupPair.begin(); + map< Pair, MessageData >::const_iterator itend= alldata.messageMapPerGroupPair.end(); + while ( itend != it ) { + + if ( it->second.count_send.cnt ) { + cout << "\tsent " << it->first.a << " --> " << it->first.b << + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]"); + cout << " byt: "; + if ( it->second.bytes_send.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]"); + } else { + cout << "0 [b]"; + } + cout << " dur: "; + if ( it->second.duration_send.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]"); + } else { + cout << "0 [s]"; + } + cout << endl; + } + if ( it->second.count_recv.cnt ) { + cout << "\trecv " << it->first.a << " <-- " << it->first.b << + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]"); + cout << " byt: "; + if ( it->second.bytes_recv.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]"); + } else { + cout << "0 [b]"; + } + cout << " dur: "; + if ( it->second.duration_recv.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]"); + } else { + cout << "0 [s]"; + } + cout << endl; + } + + it++; + } + } + + cout << endl << " global message data per group: " << endl; + { + map< uint64_t, MessageData >::const_iterator it= alldata.messageMapPerGroup.begin(); + map< uint64_t, MessageData >::const_iterator itend= alldata.messageMapPerGroup.end(); + while ( itend != it ) { + + cout << " msg of group " << it->first << " -> " << endl; + if ( it->second.count_send.cnt ) { + cout << "\tsent" << + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]"); + cout << " byt: "; + if ( it->second.bytes_send.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]"); + } else { + cout << "0 [b]"; + } + cout << " dur: "; + if ( it->second.duration_send.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]"); + } else { + cout << "0 [s]"; + } + cout << endl; + } + if ( it->second.count_recv.cnt ) { + cout << "\trecv" << + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]"); + cout << " byt: "; + if ( it->second.bytes_recv.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]"); + } else { + cout << "0 [b]"; + } + cout << " dur: "; + if ( it->second.duration_recv.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]"); + } else { + cout << "0 [s]"; + } + cout << endl; + } + + it++; + } + } + + cout << endl << " global message speed per length: " << endl; + { + map< Pair, MessageSpeedData, ltPair >::const_iterator it= alldata.messageSpeedMapPerLength.begin(); + map< Pair, MessageSpeedData, ltPair >::const_iterator itend= alldata.messageSpeedMapPerLength.end(); + while ( itend != it ) { + + cout << " msg of speed-bin " << it->first.a << " length-bin " << it->first.b << " -> "; + if ( it->second.count.cnt ) { + cout << "\t" << + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count,"[#]") << endl; + } + + it++; + } + } + + cout << endl << " global collective data per group: " << endl; + { + map< Pair, CollectiveData, ltPair >::const_iterator it= alldata.collectiveMapPerGroup.begin(); + map< Pair, CollectiveData, ltPair >::const_iterator itend= alldata.collectiveMapPerGroup.end(); + while ( itend != it ) { + + cout << " collop of class " << it->first.a << " group " << it->first.b << " -> " << endl; + if ( it->second.count_send.cnt ) { + cout << "\tsent" << + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_send,"[#]"); + cout << " byt: "; + if ( it->second.bytes_send.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.bytes_send,"[b]"); + } else { + cout << "0 [b]"; + } + cout << " dur: "; + if ( it->second.duration_send.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.duration_send,"[t]"); + } else { + cout << "0 [s]"; + } + cout << endl; + } + if ( it->second.count_recv.cnt ) { + cout << "\trecv" << + " cnt: " << PRINT_MIN_MAX_AVG(it->second.count_recv,"[#]"); + cout << " byt: "; + if ( it->second.bytes_recv.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.bytes_recv,"[b]"); + } else { + cout << "0 [b]"; + } + cout << " dur: "; + if ( it->second.duration_recv.cnt ) { + cout << PRINT_MIN_MAX_AVG(it->second.duration_recv,"[t]"); + } else { + cout << "0 [s]"; + } + cout << endl; + } + + it++; + } + } +} +#endif /* SHOW_RESULTS */ + + +static void show_helptext() { + + cout << endl + << " " << ExeName << " - generate a profile of a trace in LaTeX format." << endl + << endl + << " Syntax: " << ExeName << " [options] " << endl + << endl + << " options:" << endl + << " -h, --help show this help message" << endl + << " -V show OTF version" << endl + << " -v increase output verbosity" << endl + << " (can be used more than once)" << endl + << " -p show progress" << endl + << " -f max. number of filehandles available per rank" << endl + << " (default: " << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl + << " -b set buffersize of the reader" << endl + << " (default: " << Params::DEFAULT_BUFFER_SIZE << ")" << endl + << " -o specify the prefix of output file(s)" << endl + << " (default: " << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl + << " -g max. number of process groups in LaTeX output" << endl + << " (range: 1-" << Grouping::MAX_GROUPS << ", default: " << Params::DEFAULT_MAX_GROUPS << ")" << endl + << " -c, --cluster[ ]" << endl + << " do additional clustering of processes/threads using" << endl + << " comparison algorithm (KMEANS or CLINKAGE)" << endl + << " (default comparison algorithm: "; +if( Params::Clustering::DEFAULT_ALGORITHM == CLUSTER_ALG_CLINKAGE ) + cout << "CLINKAGE)" << endl; +else + cout << "KMEANS)" << endl; + cout << " -m write cluster mapping to " << endl + << " (implies -c, default: " << Params::Clustering::DEFAULT_MAP_FILE_NAME() << ")" << endl + << " -s call otfshrink to apply the cluster mapping to" << endl + << " input trace and produce a new trace named " << endl + << " with symbolic links to the original (implies -c)" << endl + << " -H use hard groups for CLINKAGE clustering" << endl + << " (implies --cluster CLINKAGE)" << endl + << " -q <0-1> quality threshold for CLINKAGE clustering" << endl + << " (implies --cluster CLINKAGE, default: " << Params::Clustering::DEFAULT_QUALITY_THRESHOLD() << ")" << endl + << " --stat read only summarized information, no events" << endl + << " --[no]csv enable/disable producing CSV output" << endl + << " (default: " << ( Params::DEFAULT_CREATE_CSV ? "enabled" : "disabled" ) << ")" << endl + << " --[no]tex enable/disable producing LaTeX output" << endl + << " (default: " << ( Params::DEFAULT_CREATE_TEX ? "enabled" : "disabled" ) << ")" << endl +#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4 + << " --[no]pdf enable/disable producing PDF output" << endl + << " (implies --tex if enabled, default: " << ( Params::DEFAULT_CREATE_PDF ? "enabled" : "disabled" ) << ")" << endl +#else /* PDFTEX && HAVE_PGFPLOTS_1_4 */ + << endl + << " PDF creation requires the PGFPLOTS package version >1.4" << endl + << " http://sourceforge.net/projects/pgfplots/ " << endl +#endif /* !PDFTEX || !HAVE_PGFPLOTS_1_4 */ + << endl; +} + + +void VerbosePrint( AllData& alldata, uint8_t level, bool master_only, + const char* fmt, ... ) { + + if ( alldata.params.verbose_level >= level ) { + + va_list ap; + + va_start( ap, fmt ); + +#ifdef OTFPROFILE_MPI + if ( !master_only ) { + + char msg[1024]; + + /* prepend current rank to message */ + snprintf( msg, sizeof( msg ) -1, "[%u] ", alldata.myRank ); + vsnprintf( msg + strlen( msg ), sizeof( msg ) -1, fmt, ap ); + + /* print message */ + printf( "%s ", msg ); + + } + else +#endif /* OTFPROFILE_MPI */ + { + if ( 0 == alldata.myRank ) { + + vprintf( fmt, ap ); + + } + + } + + va_end( ap ); + + } +} + + +void StartMeasurement( AllData& alldata, uint8_t verbose_level, + bool sync, const string& scope_name ) { + +#ifdef OTFPROFILE_MPI + if ( sync ) MPI_Barrier( MPI_COMM_WORLD ); +#endif /* OTFPROFILE_MPI */ + + /* search for measurement scope by its name; fail if already exists */ + map< string, Measurement::Scope >::iterator it= + alldata.measurement.scope_map.find( scope_name ); + assert( it == alldata.measurement.scope_map.end() ); + + /* insert new measurement scope to map */ + Measurement::Scope& scope= alldata.measurement.scope_map.insert( + make_pair( scope_name, Measurement::Scope( verbose_level ) ) ).first->second; + + /* start measurement on master if verbose level is high enough */ + + if ( 0 == alldata.myRank && + alldata.params.verbose_level >= verbose_level ) { + + scope.start_time= Measurement::gettime(); + + } +} + + +void StopMeasurement( AllData& alldata, bool sync, const string& scope_name ) { + +#ifdef OTFPROFILE_MPI + if ( sync ) MPI_Barrier( MPI_COMM_WORLD ); +#endif /* OTFPROFILE_MPI */ + + /* search for measurement scope by its name */ + map< string, Measurement::Scope >::iterator it= + alldata.measurement.scope_map.find( scope_name ); + assert( it != alldata.measurement.scope_map.end() ); + + Measurement::Scope& scope= it->second; + + /* stop measurement */ + + if ( 0 == alldata.myRank && + alldata.params.verbose_level >= scope.verbose_level ) { + + assert( -1.0 != scope.start_time ); + scope.stop_time= Measurement::gettime(); + + alldata.measurement.have_data= true; + + } +} + + +void PrintMeasurement( AllData& alldata, const string& scope_name ) { + + assert( 0 == alldata.myRank ); + + /* either print measurement result of certain scope or print results of all + measured scopes */ + + if ( 0 != scope_name.length() ) { + + /* search for measurement scope by its name */ + map< string, Measurement::Scope >::const_iterator it= + alldata.measurement.scope_map.find( scope_name ); + assert( it != alldata.measurement.scope_map.end() ); + + const Measurement::Scope& scope= it->second; + + /* print measurement result on stdout */ + + if ( alldata.params.verbose_level >= scope.verbose_level && + -1.0 != scope.start_time && -1.0 != scope.stop_time ) { + + cout << " " << scope_name << ": " + << scope.stop_time - scope.start_time << "s" << endl; + + } + + } else if ( alldata.measurement.have_data ) { + + cout << "runtime measurement results:" << endl; + + /* iterate over all measurement scopes */ + for ( map< string, Measurement::Scope >::const_iterator it= + alldata.measurement.scope_map.begin(); + it != alldata.measurement.scope_map.end(); it++ ) { + + /* print measurement result */ + PrintMeasurement( alldata, it->first ); + + } + + } +} + + +uint64_t Logi( uint64_t x, uint64_t b ) { + + assert( b > 1 ); + + uint64_t c= 1; + uint64_t i= 0; + + while( c <= x ) { + + c*= b; + i++; + } + + return i; +} + + +#ifdef OTFPROFILE_MPI +bool SyncError( AllData& alldata, bool& error, uint32_t root ) { + +#ifdef SYNC_ERROR + if ( 1 < alldata.numRanks ) { + + int buf= ( error ) ? 1 : 0; + + /* either broadcast the error indicator from one rank (root) + or reduce them from all */ + + if ( root != (uint32_t)-1 ) { + + MPI_Bcast( &buf, 1, MPI_INT, (int)root, MPI_COMM_WORLD ); + + error= ( 1 == buf ); + + } else { + + int recv_buf; + + MPI_Allreduce( &buf, &recv_buf, 1, MPI_INT, MPI_MAX, + MPI_COMM_WORLD ); + + error= ( 1 == recv_buf ); + + } + + } +#endif /* SYNC_ERROR */ + + return error; +} +#endif /* OTFPROFILE_MPI */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.h new file mode 100644 index 0000000000..5753dfaa14 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/otfprofile.h @@ -0,0 +1,51 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#ifndef OTFPROFILE_H +#define OTFPROFILE_H + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif /* HAVE_CONFIG_H */ + +#include "datastructs.h" + + +/* print verbose message to stdout + (- do print message only if current verbose level is >= level + - if master_only is true only the master will print the message) */ +void VerbosePrint( AllData& alldata, uint8_t level, bool master_only, + const char* fmt, ... ); + +/* start runtime measurement of certain scope + (- perform measurement only if current verbose level is >= verbose_level + - if sync is true synchronize all workers before start measurement) */ +void StartMeasurement( AllData& alldata, uint8_t verbose_level, bool sync, + const string& scope_name ); + +/* stop runtime measurement of certain scope + (if sync is true synchronize all workers before stop measurement) */ +void StopMeasurement( AllData& alldata, bool sync, const string& scope_name ); + +/* print measurement results to stdout + (if scope_name is not specified print results of all measured scopes) */ +void PrintMeasurement( AllData& alldata, const string& scope_name= "" ); + +/* logarithm to base b for unsigned 64-bit integer x */ +uint64_t Logi( uint64_t x, uint64_t b= 2 ); + +#ifdef OTFPROFILE_MPI +/* synchronize error indicator with all worker ranks + (either broadcast from one rank (root) or reduce from all) */ +bool SyncError( AllData& alldata, bool& error, uint32_t root= (uint32_t)-1 ); +#endif /* OTFPROFILE_MPI */ + + +/* name of program executable */ +extern const string ExeName; + + +#endif /* OTFPROFILE_H */ diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/reduce_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/reduce_data.cpp new file mode 100644 index 0000000000..4f73510b0d --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/reduce_data.cpp @@ -0,0 +1,760 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#include +#include + +#include "otfprofile.h" +#include "reduce_data.h" + + +using namespace std; + + +/* fence between statistics parts within the buffer for consistency checking */ +enum { FENCE= 0xDEADBEEF }; + + +/* pack the local alldata into a buffer, return buffer */ +static char* pack_worker_data( AllData& alldata, uint32_t sizes[10] ) { + + uint64_t fence= FENCE; + uint32_t num_fences= 1; + + /* get the sizes of all parts that need to be transmitted */ + + for ( uint32_t i= 1; i < 10; i++ ) { + + sizes[i]= 0; + } + + if ( alldata.params.create_tex ) { + + sizes[1]= alldata.functionMapGlobal.size(); /* map< uint64_t, FunctionData > functionMapGlobal; */ + num_fences++; + sizes[2]= alldata.counterMapGlobal.size(); /* map< Pair, CounterData, ltPair > counterMapGlobal; */ + num_fences++; + sizes[3]= alldata.messageMapPerGroupPair.size(); /* map< Pair, MessageData, ltPair > messageMapPerGroupPair; */ + num_fences++; + sizes[4]= alldata.messageMapPerGroup.size(); /* map< uint64_t, MessageData > messageMapPerGroup; */ + num_fences++; + sizes[5]= alldata.messageSpeedMapPerLength.size(); /* map< Pair, MessageSpeedData, ltPair > messageSpeedMapPerLength; */ + num_fences++; + sizes[6]= alldata.collectiveMapPerGroup.size(); /* map< Pair, CollectiveData, ltPair > collectiveMapPerGroup; */ + num_fences++; + } + + if ( alldata.params.clustering.enabled ) { + + sizes[7]= alldata.functionMapPerRank.size(); /* map< Pair, FunctionData, ltPair > */ + num_fences++; + } + + /* get bytesize multiplying all pieces */ + + uint32_t bytesize= 0; + int s1, s2; + + MPI_Pack_size( num_fences, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + bytesize += s1; + + MPI_Pack_size( sizes[1] * 7, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + MPI_Pack_size( sizes[1] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); + bytesize += s1 + s2; + + MPI_Pack_size( sizes[2] * 8, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + MPI_Pack_size( sizes[2] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); + bytesize += s1 + s2; + + MPI_Pack_size( sizes[3] * 20, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + MPI_Pack_size( sizes[3] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); + bytesize += s1 + s2; + + MPI_Pack_size( sizes[4] * 19, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + MPI_Pack_size( sizes[4] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); + bytesize += s1 + s2; + + MPI_Pack_size( sizes[5] * 6, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + bytesize += s1; + + MPI_Pack_size( sizes[6] * 20, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + MPI_Pack_size( sizes[6] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); + bytesize += s1 + s2; + + MPI_Pack_size( sizes[7] * 8, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &s1 ); + MPI_Pack_size( sizes[7] * 6, MPI_DOUBLE, MPI_COMM_WORLD, &s2 ); + bytesize += s1 + s2; + + /* get the buffer */ + sizes[0]= bytesize; + char* buffer= alldata.guaranteePackBuffer( bytesize ); + + /* pack parts */ + int position= 0; + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + if ( alldata.params.create_tex ) { + + /* pack functionMapGlobal */ + { + map< uint64_t, FunctionData >::const_iterator it= alldata.functionMapGlobal.begin(); + map< uint64_t, FunctionData >::const_iterator itend= alldata.functionMapGlobal.end(); + for ( ; it != itend; ++it ) { + + MPI_Pack( (void*) &it->first, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + alldata.functionMapGlobal.clear(); + } + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + /* pack counterMapGlobal */ + { + map< Pair, CounterData, ltPair >::const_iterator it= alldata.counterMapGlobal.begin(); + map< Pair, CounterData, ltPair >::const_iterator itend= alldata.counterMapGlobal.end(); + for ( ; it != itend; ++it ) { + + MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + alldata.counterMapGlobal.clear(); + } + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + /* pack messageMapPerGroupPair */ + { + map< Pair, MessageData, ltPair >::const_iterator it= alldata.messageMapPerGroupPair.begin(); + map< Pair, MessageData, ltPair >::const_iterator itend= alldata.messageMapPerGroupPair.end(); + for ( ; it != itend; ++it ) { + + MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.bytes_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.bytes_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.duration_send.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.duration_recv.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + alldata.messageMapPerGroupPair.clear(); + } + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + /* pack messageMapPerGroup */ + { + map< uint64_t, MessageData >::const_iterator it= alldata.messageMapPerGroup.begin(); + map< uint64_t, MessageData >::const_iterator itend= alldata.messageMapPerGroup.end(); + for ( ; it != itend; ++it ) { + + MPI_Pack( (void*) &it->first, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.bytes_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.bytes_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.duration_send.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.duration_recv.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + alldata.messageMapPerGroup.clear(); + } + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + /* pack messageSpeedMapPerLength */ + { + map< Pair, MessageSpeedData, ltPair >::const_iterator it= alldata.messageSpeedMapPerLength.begin(); + map< Pair, MessageSpeedData, ltPair >::const_iterator itend= alldata.messageSpeedMapPerLength.end(); + for ( ; it != itend; ++it ) { + + MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + alldata.messageSpeedMapPerLength.clear(); + } + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + /* pack collectiveMapPerGroup */ + { + map< Pair, CollectiveData, ltPair >::const_iterator it= alldata.collectiveMapPerGroup.begin(); + map< Pair, CollectiveData, ltPair >::const_iterator itend= alldata.collectiveMapPerGroup.end(); + for ( ; it != itend; ++it ) { + + MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.bytes_send.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.bytes_recv.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.duration_send.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_send.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.duration_recv.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.duration_recv.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + alldata.collectiveMapPerGroup.clear(); + } + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + + if ( alldata.params.clustering.enabled ) { + + /* pack functionMapPerRank */ + + map< Pair, FunctionData, ltPair >::const_iterator it= alldata.functionMapPerRank.begin(); + map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.functionMapPerRank.end(); + for ( ; it != itend; ++it ) { + + MPI_Pack( (void*) &it->first.a, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->first.b, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.count.min, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.max, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.sum, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.count.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.excl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.excl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + + MPI_Pack( (void*) &it->second.incl_time.min, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.max, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.sum, 1, MPI_DOUBLE, buffer, bytesize, &position, MPI_COMM_WORLD ); + MPI_Pack( (void*) &it->second.incl_time.cnt, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + + /* in case of producing CSV output do not clear map because it is + needed later */ + if ( !alldata.params.create_csv ) { + + alldata.functionMapPerRank.clear(); + } + + /* extra check that doesn't cost too much */ + MPI_Pack( (void*) &fence, 1, MPI_LONG_LONG_INT, buffer, bytesize, &position, MPI_COMM_WORLD ); + } + + return buffer; +} + + +/* prepare alldata for unpack, return buffer of sufficient size */ +static char* prepare_worker_data( AllData& alldata, uint32_t sizes[10] ) { + + uint32_t bytesize= sizes[0]; + + return alldata.guaranteePackBuffer( bytesize ); +} + +/* unpack the received worker data and add it to the local alldata */ +static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) { + + uint64_t fence; + + /* unpack parts */ + int position= 0; + char* buffer= alldata.getPackBuffer( ); + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + + if ( alldata.params.create_tex ) { + + /* unpack functionMapGlobal */ + for ( uint32_t i= 0; i < sizes[1]; i++ ) { + + uint64_t func; + FunctionData tmp; + + MPI_Unpack( buffer, sizes[0], &position, &func, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + alldata.functionMapGlobal[ func ].add( tmp ); + } + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + + /* unpack counterMapGlobal */ + for ( uint32_t i= 0; i < sizes[2]; i++ ) { + + uint64_t a; + uint64_t b; + CounterData tmp; + + MPI_Unpack( buffer, sizes[0], &position, &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + alldata.counterMapGlobal[ Pair( a, b ) ].add( tmp ); + } + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + + /* unpack messageMapPerGroupPair */ + for ( uint32_t i= 0; i < sizes[3]; i++ ) { + + uint64_t a; + uint64_t b; + MessageData tmp; + + MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + alldata.messageMapPerGroupPair[ Pair(a,b) ].add( tmp ); + } + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + + /* unpack messageMapPerGroup */ + for ( uint32_t i= 0; i < sizes[4]; i++ ) { + + uint64_t a; + MessageData tmp; + + MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + alldata.messageMapPerGroup[ a ].add( tmp ); + } + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + + /* unpack messageSpeedMapPerLength */ + for ( uint32_t i= 0; i < sizes[5]; i++ ) { + + uint64_t a; + uint64_t b; + MessageSpeedData tmp; + + MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + alldata.messageSpeedMapPerLength[ Pair(a,b) ].add( tmp ); + } + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + + /* unpack collectiveMapPerGroup */ + for ( uint32_t i= 0; i < sizes[6]; i++ ) { + + uint64_t a; + uint64_t b; + CollectiveData tmp; + + MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.count_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.bytes_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_send.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &tmp.duration_recv.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + alldata.collectiveMapPerGroup[ Pair(a,b) ].add( tmp ); + } + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + } + + if ( alldata.params.clustering.enabled ) { + + /* unpack functionMapPerRank */ + for ( uint32_t i= 0; i < sizes[7]; i++ ) { + + uint64_t a; + uint64_t b; + FunctionData tmp; + + MPI_Unpack( buffer, sizes[0], &position, (void*) &a, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, (void*) &b, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.min, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.max, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.sum, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.count.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.excl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.min, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.max, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.sum, 1, MPI_DOUBLE, MPI_COMM_WORLD ); + MPI_Unpack( buffer, sizes[0], &position, &tmp.incl_time.cnt, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + alldata.functionMapPerRank[ Pair(a,b) ].add( tmp ); + } + + /* extra check that doesn't cost too much */ + fence= 0; + MPI_Unpack( buffer, sizes[0], &position, &fence, 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + assert( FENCE == fence ); + } +} + + +bool ReduceData( AllData& alldata ) { + + bool error= false; + + assert( 1 < alldata.numRanks ); + + /* start runtime measurement for reducing data */ + StartMeasurement( alldata, 1, true, "reduce data" ); + + VerbosePrint( alldata, 1, true, "reducing data\n" ); + + /* implement reduction myself because MPI and C++ STL don't play with + each other */ + + /* how many rounds until master has all the data? */ + uint32_t num_rounds= Logi( alldata.numRanks ) -1; + uint32_t round_no= 0; + uint32_t round= 1; + while ( round < alldata.numRanks ) { + + round_no++; + + if ( 1 == alldata.params.verbose_level ) { + + VerbosePrint( alldata, 1, true, " round %u / %u\n", + round_no, num_rounds ); + } + + uint32_t peer= alldata.myRank ^ round; + + /* if peer rank is not there, do nothing but go on */ + if ( peer >= alldata.numRanks ) { + + round= round << 1; + continue; + } + + /* send to smaller peer, receive from larger one */ + uint32_t sizes[10]; + char* buffer; + + if ( alldata.myRank < peer ) { + + MPI_Status status; + + MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD, + &status ); + + // DEBUG + //cout << " round " << round << " recv " << peer << "--> " << + //alldata.myRank << " with " << + //sizes[0] << " bytes, " << + //sizes[1] << ", " << + //sizes[2] << ", " << + //sizes[3] << ", " << + //sizes[4] << "" << endl << flush; + + buffer= prepare_worker_data( alldata, sizes ); + + VerbosePrint( alldata, 2, false, + "round %u / %u: receiving %u bytes from rank %u\n", + round_no, num_rounds, sizes[0], peer ); + + MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD, + &status ); + + unpack_worker_data( alldata, sizes ); + + } else { + + buffer= pack_worker_data( alldata, sizes ); + + // DEBUG + //cout << " round " << round << " send " << alldata.myRank << + //" --> " << peer << " with " << + //sizes[0] << " bytes, " << + //sizes[1] << ", " << + //sizes[2] << ", " << + //sizes[3] << ", " << + //sizes[4] << "" << endl << flush; + + VerbosePrint( alldata, 2, false, + "round %u / %u: sending %u bytes to rank %u\n", + round_no, num_rounds, sizes[0], peer ); + + MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD ); + + MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5, + MPI_COMM_WORLD ); + + /* every work has to send off its data at most once, + after that, break from the collective reduction operation */ + break; + } + + round= round << 1; + } + + alldata.freePackBuffer(); + + /* synchronize error indicator with workers */ + /*SyncError( alldata, error );*/ + + if ( !error ) { + + /* stop runtime measurement for reducing data */ + StopMeasurement( alldata, true, "reduce data" ); + } + + return !error; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/reduce_data.h similarity index 100% rename from ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.h rename to ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/reduce_data.h diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/summarize_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/summarize_data.cpp new file mode 100644 index 0000000000..528c2504c8 --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/summarize_data.cpp @@ -0,0 +1,379 @@ +/* + This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011. + Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz +*/ + +#include +#include + +#include "otfprofile.h" +#include "summarize_data.h" + + +using namespace std; + + +static void get_grouping( AllData& alldata ) { + + uint32_t r_processes= alldata.allProcesses.size(); + uint32_t r_groups= alldata.params.max_groups; + + set< Process, ltProcess >::iterator pos= alldata.allProcesses.begin(); + + for ( uint32_t c= 0; + c < Grouping::MAX_GROUPS && 0 < r_processes; c++ ) { + + uint32_t n= + ( ( r_processes / r_groups ) * r_groups < r_processes ) ? + ( r_processes / r_groups + 1 ) : ( r_processes / r_groups ); + + for ( uint32_t i= 0; i < n; i++ ) { + + bool inserted= alldata.grouping.insert( c+1, pos->process ); + assert( inserted ); + + pos++; + r_processes--; + + } + + r_groups--; + + } +} + + +#ifdef OTFPROFILE_MPI +static void share_grouping( AllData& alldata ) { + + assert( 1 < alldata.numRanks ); + + char* buffer; + int buffer_size= 0; + int buffer_pos= 0; + + if ( 0 == alldata.myRank ) { + + /* get size needed to send grouping information to workers */ + + int size; + + /* alldata.grouping.groupsToProcesses.size() + firsts */ + MPI_Pack_size( 1 + alldata.grouping.groupsToProcesses.size(), + MPI_LONG_LONG_INT, MPI_COMM_WORLD, &size ); + buffer_size+= size; + + /* alldata.grouping.groupsToProcesses.second.size() + second */ + for ( map< uint64_t, set >::const_iterator it= + alldata.grouping.groupsToProcesses.begin(); + it != alldata.grouping.groupsToProcesses.end(); it++ ) { + + MPI_Pack_size( 1 + it->second.size(), MPI_LONG_LONG_INT, + MPI_COMM_WORLD, &size ); + buffer_size+= size; + + } + + } + + /* broadcast buffer size */ + MPI_Bcast( &buffer_size, 1, MPI_INT, 0, MPI_COMM_WORLD ); + + /* allocate buffer */ + buffer= new char[ buffer_size ]; + assert( buffer ); + + /* pack grouping information to buffer */ + + if ( 0 == alldata.myRank ) { + + /* alldata.grouping.groupsToProcesses.size() */ + uint64_t clust_proc_map_size= + alldata.grouping.groupsToProcesses.size(); + MPI_Pack( &clust_proc_map_size, 1, MPI_LONG_LONG_INT, buffer, + buffer_size, &buffer_pos, MPI_COMM_WORLD ); + + /* alldata.grouping.groupsToProcesses */ + for ( map< uint64_t, set >::const_iterator it= + alldata.grouping.groupsToProcesses.begin(); + it != alldata.grouping.groupsToProcesses.end(); it++ ) { + + /* alldata.grouping.groupsToProcesses.first */ + uint64_t group= it->first; + MPI_Pack( &group, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* alldata.grouping.groupsToProcesses.second.size() */ + uint64_t processes_size= it->second.size(); + MPI_Pack( &processes_size, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + /* alldata.grouping.groupsToProcesses.second */ + for ( set::const_iterator it2= it->second.begin(); + it2 != it->second.end(); it2++ ) { + + uint64_t process= *it2; + MPI_Pack( &process, 1, MPI_LONG_LONG_INT, buffer, buffer_size, + &buffer_pos, MPI_COMM_WORLD ); + + } + + } + + } + + /* broadcast definitions buffer */ + MPI_Bcast( buffer, buffer_size, MPI_PACKED, 0, MPI_COMM_WORLD ); + + /* unpack grouping information from buffer */ + + if ( 0 != alldata.myRank ) { + + /* alldata.grouping.groupsToProcesses.size() */ + uint64_t clust_proc_map_size; + MPI_Unpack( buffer, buffer_size, &buffer_pos, &clust_proc_map_size, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* alldata.grouping.groupsToProcesses */ + for ( uint64_t i= 0; i < clust_proc_map_size; i++ ) { + + /* alldata.grouping.groupsToProcesses.first */ + uint64_t group; + MPI_Unpack( buffer, buffer_size, &buffer_pos, &group, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* alldata.grouping.groupsToProcesses.second.size() */ + uint64_t processes_size; + MPI_Unpack( buffer, buffer_size, &buffer_pos, &processes_size, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + /* alldata.grouping.groupsToProcesses.second */ + for ( uint64_t j= 0; j < processes_size; j++ ) { + + uint64_t process; + MPI_Unpack( buffer, buffer_size, &buffer_pos, &process, 1, + MPI_LONG_LONG_INT, MPI_COMM_WORLD ); + + bool inserted= alldata.grouping.insert( group, process ); + assert( inserted ); + + } + + } + + } + + delete[] buffer; +} +#endif /* OTFPROFILE_MPI */ + + +bool SummarizeData( AllData& alldata ) { + + bool error= false; + + /* start runtime measurement for summarizing data */ + StartMeasurement( alldata, 1, true, "summarize data" ); + + /* rank 0 gets grouping information */ + + if ( 0 == alldata.myRank ) { + + get_grouping( alldata ); + + } + +#ifdef OTFPROFILE_MPI + /* share grouping information to workers */ + + if ( 1 < alldata.numRanks ) { + + share_grouping( alldata ); + + } +#endif /* OTFPROFILE_MPI */ + + /* macro to set min, max to sum before summarizing */ +# define MINMAX2SUM(v) \ + if( 0 != (v).cnt ) { \ + (v).cnt = 1; \ + (v).min= (v).max= (v).sum; \ + } else { \ + (v).cnt = 0; \ + /* (v).min= OTF_UINT64_MAX; (v).max= 0; \ + ^^^ this is set already by the constructor and never touched \ + if (v).cnt == 0. Therefore, it is ignored when computing min/max \ + further on. */ \ + } + + /* summarize map ( rank x func ) to map ( func ) */ + { + map< Pair, FunctionData, ltPair >::const_iterator it= alldata.functionMapPerRank.begin(); + map< Pair, FunctionData, ltPair >::const_iterator itend= alldata.functionMapPerRank.end(); + while ( itend != it ) { + + const uint64_t& func= it->first.b; + + alldata.functionMapGlobal[ func ].add( it->second ); + it++; + } + + /* in case of additional clustering or producing CSV output do not + clear map ( rank x func ) because it is needed later */ + if ( !alldata.params.clustering.enabled && + !alldata.params.create_csv ) { + + alldata.functionMapPerRank.clear(); + } + } + + /* summarize map ( rank x func x counter ) to map ( counter x func ) */ + { + map< Triple, CounterData, ltTriple >::const_iterator it= alldata.counterMapPerFunctionRank.begin(); + map< Triple, CounterData, ltTriple >::const_iterator itend= alldata.counterMapPerFunctionRank.end(); + while ( itend != it ) { + + const uint64_t& func= it->first.b; + const uint64_t& counter= it->first.c; + + alldata.counterMapGlobal[ Pair( counter, func ) ].add( it->second ); + it++; + } + + /* in case of producing CSV output do not clear + map ( rank x func x counter ) because it is needed later */ + if ( !alldata.params.create_csv ) { + + alldata.counterMapPerFunctionRank.clear(); + } + } + + /* summarize map ( rank x rank ) to map ( group x group ) */ + { + map< Pair, MessageData, ltPair >::const_iterator it= alldata.messageMapPerRankPair.begin(); + map< Pair, MessageData, ltPair >::const_iterator itend= alldata.messageMapPerRankPair.end(); + while ( itend != it ) { + + uint64_t group_a= it->first.a; + uint64_t group_b= it->first.b; + + /* get copy of message data in order to keep original data + unchanged for CSV output */ + MessageData data= it->second; + + if ( alldata.grouping.enabled ) { + + /* convert process IDs to group IDs */ + + group_a= alldata.grouping.process2group( group_a ); + assert( 0 != group_a ); + group_b= alldata.grouping.process2group( group_b ); + assert( 0 != group_b ); + + } + + MINMAX2SUM( data.count_send ); + MINMAX2SUM( data.count_recv ); + MINMAX2SUM( data.bytes_send ); + MINMAX2SUM( data.bytes_recv ); + MINMAX2SUM( data.duration_send ); + MINMAX2SUM( data.duration_recv ); + + alldata.messageMapPerGroupPair[ Pair( group_a, group_b ) ].add( data ); + it++; + } + alldata.messageMapPerRankPair.clear(); + } + + /* summarize map ( rank ) to map ( group ) */ + { + map< uint64_t, MessageData >::const_iterator it= alldata.messageMapPerRank.begin(); + map< uint64_t, MessageData >::const_iterator itend= alldata.messageMapPerRank.end(); + while ( itend != it ) { + + uint64_t group= it->first; + + /* get copy of message data in order to keep original data + unchanged for CSV output */ + MessageData data= it->second; + + if ( alldata.grouping.enabled ) { + + /* convert process ID to group ID */ + group= alldata.grouping.process2group( group ); + assert( 0 != group ); + + } + + MINMAX2SUM( data.count_send ); + MINMAX2SUM( data.count_recv ); + MINMAX2SUM( data.bytes_send ); + MINMAX2SUM( data.bytes_recv ); + MINMAX2SUM( data.duration_send ); + MINMAX2SUM( data.duration_recv ); + + alldata.messageMapPerGroup[ group ].add( data ); + it++; + } + + /* in case of producing CSV output do not clear map ( rank ) + because it is needed later */ + if ( !alldata.params.create_csv ) { + + alldata.messageMapPerRank.clear(); + } + } + + /* summarize map ( rank x class ) to map ( class x group ) */ + { + map< Pair, CollectiveData, ltPair >::iterator it= alldata.collectiveMapPerRank.begin(); + map< Pair, CollectiveData, ltPair >::iterator itend= alldata.collectiveMapPerRank.end(); + while ( itend != it ) { + + uint64_t group= it->first.a; + const uint64_t& op_class= it->first.b; + + /* get copy of collective op. data in order to keep original data + unchanged for CSV output */ + CollectiveData data= it->second; + + if ( alldata.grouping.enabled ) { + + /* convert process ID to group ID */ + group= alldata.grouping.process2group( group ); + assert( 0 != group ); + + } + + MINMAX2SUM( data.count_send ); + MINMAX2SUM( data.count_recv ); + MINMAX2SUM( data.bytes_send ); + MINMAX2SUM( data.bytes_recv ); + MINMAX2SUM( data.duration_send ); + MINMAX2SUM( data.duration_recv ); + + alldata.collectiveMapPerGroup[ Pair( op_class, group ) ].add( data ); + it++; + } + + /* in case of producing CSV output do not clear map ( class x rank ) + because it is needed later */ + if ( !alldata.params.create_csv ) { + + alldata.collectiveMapPerRank.clear(); + } + } + +#ifdef OTFPROFILE_MPI + /* synchronize error indicator with workers */ + /*SyncError( alldata, error );*/ +#endif /* OTFPROFILE_MPI */ + + if ( !error ) { + + /* stop runtime measurement for summarizing data */ + StopMeasurement( alldata, true, "summarize data" ); + } + + return !error; +} diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/summarize_data.h similarity index 100% rename from ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.h rename to ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/summarize_data.h diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.cpp index 280747e9a7..ce31dcd9a0 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.cpp @@ -4,35 +4,83 @@ */ #include "Handler.h" +#include +#include -/* most of these hanlders act like copyhandlers, except handleDefProcess and handleDefProcessGroup */ +int handleDefProcess (void *userData, uint32_t stream, uint32_t process, + const char *name, uint32_t parent, OTF_KeyValueList* list) { -int handleDefinitionComment (void *userData, uint32_t stream, const char *comment) { - - return ( 0 == OTF_Writer_writeDefinitionComment ( (OTF_Writer*) userData, stream, comment) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} -int handleDefTimerResolution (void *userData, uint32_t stream, uint64_t ticksPerSecond) { - - return ( 0 == OTF_Writer_writeDefTimerResolution ( (OTF_Writer*) userData, stream, ticksPerSecond) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefProcess (void *userData, uint32_t stream, uint32_t process, const char *name, uint32_t parent) { - + int ret; firstarg *first = (firstarg*) userData; - if ( first->procMap.end() == first->procMap.find(process) ) { + if ( ( cpuMap.end() == cpuMap.find( process ) ) != inverse ) { + + /* process was replaced, drop definition */ return OTF_RETURN_OK; } - return ( 0 == OTF_Writer_writeDefProcess ( (OTF_Writer*) first->writer, stream, process, name, parent) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; + /* disable parent process specification if the original parent process was dropped */ + if ( parent ) { + + if ( ( cpuMap.end() == cpuMap.find( parent ) ) != inverse ) { + + parent= 0; + } + } + + /* if process represents other processes, modify its name */ + + map< uint32_t, set< uint32_t > >::const_iterator ft= replacementMap.find( process ); + map< uint32_t, set< uint32_t > >::const_iterator ftend= replacementMap.end(); + if ( ftend != ft && 0 < ft->second.size() ) { + + uint32_t len= ft->second.size() +1; + /* copy of name, truncate at 99 characters */ + char newname[100]; + /* alter process name, append hint about the number of processes replaced by this + one including itself */ + snprintf( newname, 100, "%s #%u", name, len ); + + uint32_t* substitutes= (uint32_t*) malloc( len * sizeof(uint32_t) ); + assert( substitutes ); + uint32_t* p= substitutes; + *p= process; ++p; + + set< uint32_t >::const_iterator it= ft->second.begin(); + set< uint32_t >::const_iterator itend= ft->second.end(); + for ( ; it != itend; ++it ) { + + *p= *it; ++p; + } + + ret= OTF_Writer_writeDefProcessSubstitutes( (OTF_Writer*) first->writer, + stream, process, len, substitutes, NULL ); + if ( 0 == ret ) return OTF_RETURN_ABORT; + + free( substitutes ); + substitutes= NULL; + + ret= OTF_Writer_writeDefProcessKV ( (OTF_Writer*) first->writer, + stream, process, newname, parent, list); + if ( 0 == ret ) return OTF_RETURN_ABORT; + + } else { + + ret= OTF_Writer_writeDefProcessKV ( (OTF_Writer*) first->writer, + stream, process, name, parent, list); + if ( 0 == ret ) return OTF_RETURN_ABORT; + } + + return OTF_RETURN_OK; } -int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, const char *name, uint32_t numberOfProcs, const uint32_t *procs) { + +int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, + const char *name, uint32_t numberOfProcs, const uint32_t *procs, + OTF_KeyValueList* list) { + firstarg *first = (firstarg*) userData; @@ -41,7 +89,9 @@ int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, int ret; for(uint32_t i = 0; i < numberOfProcs; i++) { - if ( first->procMap.end() != first->procMap.find(procs[i]) ) { + + if ( ( cpuMap.end() == cpuMap.find( procs[i] ) ) == inverse ) { + mod_procs[mod_numberOfProcs] = procs[i]; mod_numberOfProcs++; } @@ -53,8 +103,8 @@ int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, return OTF_RETURN_OK; } - ret = ( 0 == OTF_Writer_writeDefProcessGroup ( (OTF_Writer*) first->writer, stream, procGroup, name, - mod_numberOfProcs, mod_procs) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; + ret = ( 0 == OTF_Writer_writeDefProcessGroupKV ( (OTF_Writer*) first->writer, stream, procGroup, name, + mod_numberOfProcs, mod_procs, list) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; delete[] mod_procs; @@ -62,71 +112,17 @@ int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, } -int handleDefFunction (void *userData, uint32_t stream, uint32_t func, const char *name, uint32_t funcGroup, uint32_t source) { - return ( 0 == OTF_Writer_writeDefFunction ( (OTF_Writer*) userData, stream, func, name, funcGroup, source) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefFunctionGroup (void *userData, uint32_t stream, uint32_t funcGroup, const char *name) { - - return ( 0 == OTF_Writer_writeDefFunctionGroup ( (OTF_Writer*) userData, stream, funcGroup, name) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefCollectiveOperation (void *userData, uint32_t stream, uint32_t collOp, const char *name, uint32_t type) { - - return ( 0 == OTF_Writer_writeDefCollectiveOperation ( (OTF_Writer*) userData, stream, collOp, name, type) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefCounter (void *userData, uint32_t stream, uint32_t counter, const char *name, uint32_t properties, uint32_t counterGroup, const char *unit) { - - return ( 0 == OTF_Writer_writeDefCounter ( (OTF_Writer*) userData, stream, counter, name, properties, - counterGroup, unit) ) ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefCounterGroup (void *userData, uint32_t stream, uint32_t counterGroup, const char *name) { - - return ( 0 == OTF_Writer_writeDefCounterGroup ( (OTF_Writer*) userData, stream, counterGroup, name) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefScl (void *userData, uint32_t stream, uint32_t source, uint32_t sourceFile, uint32_t line) { - - return ( 0 == OTF_Writer_writeDefScl ( (OTF_Writer*) userData, stream, source, sourceFile, line) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefSclFile (void *userData, uint32_t stream, uint32_t sourceFile, const char *name) { - - return ( 0 == OTF_Writer_writeDefSclFile ( (OTF_Writer*) userData, stream, sourceFile, name) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefCreator (void *userData, uint32_t stream, const char *creator) { - - return ( 0 == OTF_Writer_writeDefCreator ( (OTF_Writer*) userData, stream, creator) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefVersion (void *userData, uint32_t stream, uint8_t major, uint8_t minor, uint8_t sub, const char *string) { - - /* this is deprecated and not necessary at all */ - /*return ( 0 == OTF_Writer_writeOtfVersion ( (OTF_Writer*) userData, stream) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK;*/ - - return OTF_RETURN_OK; -} - -int handleDefFile (void *userData, uint32_t stream, uint32_t token, const char *name, uint32_t group) { - - return ( 0 == OTF_Writer_writeDefFile ( (OTF_Writer*) userData, stream, token, name, group) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; -} - -int handleDefFileGroup (void *userData, uint32_t stream, uint32_t token, const char *name) { - - return ( 0 == OTF_Writer_writeDefFileGroup ( (OTF_Writer*) userData, stream, token, name) ) - ? OTF_RETURN_ABORT : OTF_RETURN_OK; +int handleDefProcessSubstitutes (void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, + const uint32_t* procs, OTF_KeyValueList* list) { + + + /* it isn't clear yet how to handle this definition; abort for now */ + + cerr << endl << "Conflict: The input trace already contains process substitution information." + "This probably means that it has been created by otfshrink and cannot be processed again. " + "Please start with the original trace instead." << endl << endl; + + return OTF_RETURN_ABORT; } diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.h index 9efdc81bac..e883dd6556 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.h +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Handler.h @@ -8,47 +8,26 @@ using namespace std; +#include "otfshrink.h" /* required as FirstHandlerArg later on */ typedef struct { + OTF_Writer *writer; - map procMap; + + /* currntly, 'writer' is the only member but I want to keep this + struct to transport everything which is now in global variables */ + } firstarg; -int handleDefinitionComment (void *userData, uint32_t stream, const char *comment); - -int handleDefTimerResolution (void *userData, uint32_t stream, uint64_t ticksPerSecond); - int handleDefProcess (void *userData, uint32_t stream, uint32_t process, - const char *name, uint32_t parent); + const char *name, uint32_t parent, OTF_KeyValueList* list); int handleDefProcessGroup (void *userData, uint32_t stream, uint32_t procGroup, - const char *name, uint32_t numberOfProcs, const uint32_t *procs); - -int handleDefFunction (void *userData, uint32_t stream, uint32_t func, const char *name, - uint32_t funcGroup, uint32_t source); - -int handleDefFunctionGroup (void *userData, uint32_t stream, uint32_t funcGroup, const char *name); - -int handleDefCollectiveOperation (void *userData, uint32_t stream, uint32_t collOp, - const char *name, uint32_t type); - -int handleDefCounter (void *userData, uint32_t stream, uint32_t counter, const char *name, - uint32_t properties, uint32_t counterGroup, const char *unit); - -int handleDefCounterGroup (void *userData, uint32_t stream, uint32_t counterGroup, const char *name); - -int handleDefScl (void *userData, uint32_t stream, uint32_t source, uint32_t sourceFile, uint32_t line); - -int handleDefSclFile (void *userData, uint32_t stream, uint32_t sourceFile, const char *name); - -int handleDefCreator (void *userData, uint32_t stream, const char *creator); - -int handleDefVersion (void *userData, uint32_t stream, uint8_t major, uint8_t minor, - uint8_t sub, const char *string); - -int handleDefFile (void *userData, uint32_t stream, uint32_t token, const char *name, uint32_t group); - -int handleDefFileGroup (void *userData, uint32_t stream, uint32_t token, const char *name); + const char *name, uint32_t numberOfProcs, const uint32_t *procs, + OTF_KeyValueList* list); +int handleDefProcessSubstitutes (void* userData, uint32_t stream, + uint32_t representative, uint32_t numberOfProcs, + const uint32_t* procs, OTF_KeyValueList* list); diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Makefile.am index 825d5c5303..566bba12f8 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Makefile.am +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/Makefile.am @@ -8,6 +8,7 @@ endif otfshrink_LDADD = $(top_builddir)/otflib/libotf.la otfshrink_DEPENDENCIES = $(otfaux_LDADD) otfshrink_SOURCES = \ + otfshrink.h \ Handler.h \ otfshrink.cpp \ Handler.cpp diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/otfshrink.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/otfshrink.cpp index 976597a228..92c3c3ab51 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/otfshrink.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/otfshrink.cpp @@ -7,10 +7,13 @@ #include "config.h" #endif - +#include #include #include +#include +#include + #include #include #include @@ -39,141 +42,311 @@ " -V show OTF version \n" \ " -i specify the input trace file \n" \ " -o specify the output file \n" \ -" -l a space-separated list of processes \n" \ -" to show, \n" \ -" e.g. -l 1 2 3-4 8-5 \n" \ -" -v a space-separated list of processes \n" \ -" NOT to show, \n" \ -" see -l for exact syntax \n" \ -" -s display all selected processes, \n" \ -" no files are created (simulation mode), \n" \ -" modes: (l)ist, (r)ange or (t)able \n" \ +" -l \"\" a space-separated list of processes in quotes \n" \ +" to enable, i.e. keep in the copy, \n" \ +" e.g. '-l \"1 2 3 4 8 5\"' \n" \ +" -v invert setting from '-l', \n" \ +" i.e. deactivate/exclude listed processes \n" \ +" -m \"\" map all listed processes to one representative\n" \ +" and remove all remaining ones \n" \ +" must not be mixed with '-l' and '-v' \n" \ +" -f read multiple '-m' lists from the given file \n" \ +" one list/group per line, empty lines allowed \n" \ +" -s simulation mode: display all selected \n" \ +" processes, no files are created, \n" \ +" display modes: (l)ist, (r)ange or (t)able \n" \ " defaut: range \n" \ " \n" \ +" Multiple instances of '-l', '-m', and '-f' may be used \n" \ -map cpuMap; -int write_master(string input, string output, bool invers, bool show, int sim_mode); + +#include "otfshrink.h" + + +/* well, we have some global variables */ + + +/* this map contains the specified process IDs which are +either to keep or to drop depending on 'mode' */ +set< uint32_t > cpuMap; +bool inverse= false; + + +/* map key is the id of the representative, the value set contains all ids of +processes to be replaced by the representative (including the key) */ +map< uint32_t, set< uint32_t > > replacementMap; + +#define MODE_DEFAULT 0 +#define MODE_NORMAL 1 +#define MODE_INVERSE 2 +#define MODE_MAP 3 + +uint32_t mode= MODE_DEFAULT; /* MODE_DEFAULT, MODE_NORMAL, MODE_INVERSE, MODE_MAP */ + + +bool simulation = false; +int sim_mode = RANGE_MODE; +string input_path; +string input_file; +string input_folder; +string output_file; +string output_folder; +string output_path; + + + +int write_master(string input, string output, bool show, int sim_mode); int display_processes(firstarg *first, int sim_mode); -void free_all_pointers(char *ch_i, firstarg *first, OTF_MapEntry *entries, OTF_Writer *writer, +void free_all_pointers(char *ch_i, firstarg *first, OTF_MapEntry** entries, OTF_Writer *writer, OTF_Reader *reader, OTF_HandlerArray *handlers, OTF_FileManager *manager, OTF_MasterControl *master, OTF_MasterControl *new_master); -int main (int argc, char* argv[]) { - - char *pwd = NULL; - bool enable = true; - bool invers_mode = true; - bool mode_set = false; - bool simulation = false; - int sim_mode = RANGE_MODE; - size_t found; +int parse_parameters( int argc, char* argv[] ); - string arg = "-l"; - string input_path; - string input_file; - string input_folder; - string output_file; - string output_folder; - string output_path; - if ( argc <= 1 ) { - cout << HELPTEXT << endl; - return 0; - } +int parse_replacement_file( const char* filename ); +int parse_replacement_line( char* line ); +int parse_list_line( char* line ); + + +int parse_replacement_file( const char* filename ) { + + FILE* f= fopen( filename, "r" ); + assert( f ); + + /* the commeted-out parts are for using getline which is much safer than fgets. + Unfortunately, getline is not avail on Mac and whatnot, even though it is in the + POSIX standard after it started as a GNU extension + + Maybe introduce a HAVE_GETLINE autoconf test sometime + */ + + /* + char* line= NULL; + size_t len= 0; + */ + + const size_t len= 100000; + char line[len]; + + /* + while ( -1 != getline( &line, &len, f ) ) { + */ + while ( NULL != fgets( line, len, f ) ) { + + if ( 0 != parse_replacement_line( line ) ) return 1; + } + + fclose( f ); + /* + free( line ); + */ + + return 0; +} + + +int parse_replacement_line( char* line ) { + + char* tmp; + char* token; + const char* delim= " {}\t\n"; + + token= strtok_r( line, delim, &tmp ); + if ( NULL == token ) return 0 ; // ignore blank lines without an error + + int64_t id= strtoll( token, NULL, 10 ); + if ( 0 >= id ) { + + cerr << "Error: could not parse '" << token << "', abort" << endl; + return 1; + } + set& s= replacementMap[ id ]; + + /* do not add first entry in */ + + while ( NULL != ( token= strtok_r( NULL, delim, &tmp ) ) ) { + + id= strtoll( token, NULL, 10 ); + if ( 0 >= id ) { + + cerr << "Error: could not parse '" << token << "', abort" << endl; + return 1; + } + + s.insert( id ); + } + + return 0; +} + + +int parse_list_line( char* line ) { + + + char* tmp; + char* token; + const char* delim= " {}\t\n"; + + token= strtok_r( line, delim, &tmp ); + if ( NULL == token ) return 0 ; // ignore blank lines without an error + + int64_t id= strtoll( token, NULL, 10 ); + if ( 0 >= id ) { + cerr << "Error: could not parse '" << token << "', abort" << endl; + return 1; + } + cpuMap.insert( id ); + + + while ( NULL != ( token= strtok_r( NULL, delim, &tmp ) ) ) { + + id= strtoll( token, NULL, 10 ); + if ( 0 >= id ) { + + cerr << "Error: could not parse '" << token << "', abort" << endl; + return 1; + } + + cpuMap.insert( id ); + } + + return 0; +} + + +int parse_parameters( int argc, char* argv[] ) { + /* check for parameter list */ for ( int i = 1; i < argc; i++ ) { if ( 0 == strcmp("-h", argv[i]) || 0 == strcmp("--help", argv[i]) ) { - cout << HELPTEXT << endl; - return 0; - } else if ( 0 == strcmp( "-V", argv[i] ) ) { + cout << HELPTEXT << endl; + exit( 0 ); + } + } + + for ( int i = 1; i < argc; i++ ) { + + if ( 0 == strcmp( "-V", argv[i] ) ) { printf( "%u.%u.%u \"%s\"\n", OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB, OTF_VERSION_STRING); exit( 0 ); - } else if ( 0 == strcmp("-l", argv[i]) ) { - bool is_hyphen = false; - string min; - string max; - uint32_t left; - uint32_t right; + } else if ( 0 == strcmp( "-m", argv[i] ) ) { - if ( ((i+1) >= argc) || ( argv[i+1][0] == '-') ) { - cerr << "At least one argument expected after " << arg << endl; + + if ( ( i +1 >= argc ) || ( '-' == argv[i+1][0] ) ) { + + cerr << "Error: No argument given after " << argv[i] << endl; return 1; } - while ( (i+1) < argc ) { - if ( argv[i+1][0] == '-' ) break; - i++; + if ( MODE_NORMAL == mode ) { - is_hyphen = false; - min = ""; - max = ""; - for (uint32_t j = 0; j < strlen(argv[i]); j++) { - if ( argv[i][j] > 47 && argv[i][j] < 58 ) { - if ( is_hyphen ) { - max += argv[i][j]; - } else { - min += argv[i][j]; - } + cerr << "Error: must not mix '-l' and '-m'" << endl; + return 1; + } - } else if ( (argv[i][j] == '-') && (is_hyphen == false) && (argv[i][j+1] != '\0') ) { - is_hyphen = true; - max = ""; + if ( MODE_INVERSE == mode ) { - } else { - cerr << "Error: Wrong argument after " << arg << endl; - return 1; - } - } + cerr << "Error: must not mix '-v' and '-m'" << endl; + return 1; + } - sscanf( min.c_str(), "%u", &left ); - if( max != "") { - sscanf( max.c_str(), "%u", &right ); - } + mode= MODE_MAP; + inverse= true; - if ( ! is_hyphen ) { - right = left; - } - - if ( left > right ) { - int tmp = left; - left = right; - right = tmp; - } - for ( uint64_t k = left; k <= right; k++ ) { - cpuMap[ (uint32_t)k ] = enable; - } + int ret= parse_replacement_line( argv[i+1] ); + if ( 0 != ret ) { + + cerr << "Error parsing '"<< argv[i+1] << "'" << endl; + return 1; + } + + ++i; + + } else if ( 0 == strcmp( "-f", argv[i] ) ) { + + + if ( ( i +1 >= argc ) || ( '-' == argv[i+1][0] ) ) { + + cerr << "Error: No argument given after " << argv[i] << endl; + return 1; } - enable = true; - arg = "-l"; - if ( ! mode_set ) { - invers_mode = false; - mode_set = true; + if ( MODE_NORMAL == mode ) { + + cerr << "Error: must not mix '-l' and '-m'" << endl; + return 1; + } + + if ( MODE_INVERSE == mode ) { + + cerr << "Error: must not mix '-v' and '-m'" << endl; + return 1; + } + + mode= MODE_MAP; + inverse= true; + + + int ret= parse_replacement_file( argv[i+1] ); + if ( 0 != ret ) { + + cerr << "Error parsing '"<< argv[i+1] << "'" << endl; + return 1; + } + + ++i; + + } else if ( 0 == strcmp("-l", argv[i]) ) { + + if ( ( i +1 >= argc ) || ( '-' == argv[i+1][0] ) ) { + + cerr << "Error: No argument given after " << argv[i] << endl; + return 1; } + if ( MODE_MAP == mode ) { + + cerr << "Error: must not mix '-m' and '-l'" << endl; + return 1; + } + + if ( MODE_DEFAULT == mode ) { + + mode= MODE_NORMAL; + } + + int ret= parse_list_line( argv[i+1] ); + if ( 0 != ret ) { + + cerr << "Error parsing '"<< argv[i+1] << "'" << endl; + return 1; + } + + ++i; + } else if ( 0 == strcmp("-v", argv[i]) ) { - enable = false; - if ( ! mode_set ) { - invers_mode = true; - mode_set = true; - } - arg = "-v"; - strcpy(argv[i], "-l"); - i--; + + if ( MODE_MAP == mode ) { + + cerr << "Error: must not mix '-m' and '-v'" << endl; + return 1; + } + + mode= MODE_INVERSE; + inverse= true; } else if ( 0 == strcmp("-i", argv[i]) ) { - if (i+1 >= argc) { - cerr << "Error: Option " << argv[i] << " expect exactly 1 argument." << endl; - return 1; - } - if ( argv[i+1][0] == '-' ) { + + if ( ( i +1 >= argc ) || ( '-' == argv[i+1][0] ) ) { + cerr << "Error: No argument given after " << argv[i] << endl; return 1; } @@ -182,11 +355,9 @@ int main (int argc, char* argv[]) { input_path = argv[i]; } else if ( 0 == strcmp("-o", argv[i]) ) { - if (i+1 >= argc) { - cerr << "Error: Option " << argv[i] << " expect exactly 1 argument." << endl; - return 1; - } - if ( argv[i+1][0] == '-' ) { + + if ( ( i +1 >= argc ) || ( '-' == argv[i+1][0] ) ) { + cerr << "Error: No argument given after " << argv[i] << endl; return 1; } @@ -219,13 +390,56 @@ int main (int argc, char* argv[]) { } + return 0; +} + + + +int main ( int argc, char* argv[] ) { + +// char *pwd = NULL; + size_t found; + + if ( argc <= 1 ) { + cout << HELPTEXT << endl; + return 0; + } + + + int ret= parse_parameters( argc, argv ); + if ( 0 != ret ) return 100; + + + map< uint32_t, set< uint32_t > >::const_iterator it= replacementMap.begin(); + map< uint32_t, set< uint32_t > >::const_iterator itend= replacementMap.end(); + for ( ; it != itend ; ++it ) { + + /*cout << " " << it->first << " : ";*/ + + set< uint32_t >::const_iterator jt= it->second.begin(); + set< uint32_t >::const_iterator jtend= it->second.end(); + + for ( ; jt != jtend ; ++jt ) { + + /*cout << *jt << " ";*/ + + cpuMap.insert( *jt ); + } + + /*cout << endl;*/ + } + /* string operations to handle input and output path */ /* check if -i was set */ if ( input_path.empty() ) { cerr << "Error: No input file given." << endl; - return 1; + return 101; } + +#if 0 /* the current working directory seems unnecessary, because we want + local links by default, no global paths in links */ + /* get current working directory */ pwd = new char[OTF_PATH_MAX]; *pwd = '\0'; @@ -233,7 +447,7 @@ int main (int argc, char* argv[]) { if ( pwd == NULL) { cerr << "Error: Path length greater than the maximum." << endl; delete[] pwd; - return 1; + return 102; } /* make absolute path - necessary to create a symbolic link later on */ @@ -247,6 +461,8 @@ int main (int argc, char* argv[]) { delete[] pwd; +#endif /* 0 */ + /* input strings */ /* search for ".otf" and cut it off if found */ found = input_path.find_last_of("."); @@ -261,7 +477,12 @@ int main (int argc, char* argv[]) { if (found != string::npos) { input_folder = input_path.substr(0, found + 1); input_file = input_path.substr(found + 1); - } + + } else { + + // keep input_folder empty + input_file = input_path; + } /* output strings */ /* search for ".otf" and cut it off if found */ @@ -277,18 +498,24 @@ int main (int argc, char* argv[]) { if (found != string::npos) { output_folder = output_path.substr(0, found + 1); output_file = output_path.substr(found + 1); - } + + } else { + + // keep output_folder empty + output_file = output_path; + } /* check if output directory exists */ - if ( access(output_folder.c_str(), F_OK) ) { - cerr << "Error: Directory " << output_folder << " does not exist!" << endl; - return 1; + if ( ! output_folder.empty() && access(output_folder.c_str(), F_OK) ) { + + cerr << "Error: Directory '" << output_folder << "' does not exist!" << endl; + return 103; } /* check if input and output path were identical */ if ( input_path == output_path ) { cerr << "Error: The input and output file cannot be indentical." << endl; - return 1; + return 104; } /* make output path if some information are missing */ @@ -303,10 +530,12 @@ int main (int argc, char* argv[]) { /*** end string operations ***/ /* create symbolic links, definiton file and master file */ - return write_master(input_path ,output_path, invers_mode, simulation, sim_mode); + return write_master( input_path, output_path, simulation, sim_mode ); } -int write_master(string input, string output, bool invers, bool show, int sim_mode) { + + +int write_master(string input, string output, bool show, int sim_mode) { /* create symbolic links, definiton file and master file */ @@ -321,7 +550,7 @@ int write_master(string input, string output, bool invers, bool show, int sim_mo string file_suffix[4] = {".events", ".snaps", ".stats", ".marker"}; - OTF_MapEntry *entries = NULL; + OTF_MapEntry** entries = NULL; OTF_Writer *writer = NULL; OTF_Reader *reader = NULL; OTF_HandlerArray *handlers = NULL; @@ -342,35 +571,48 @@ int write_master(string input, string output, bool invers, bool show, int sim_mo cerr << "Error while reading tracefile. No entries in file found." << endl; } CLEAR_EVERYTHING - return 2; + return 110; } /* modifies the MapEntries of otf masterfile according to entries in cpuMap*/ - entries = new OTF_MapEntry[num_args]; + entries= (OTF_MapEntry**) malloc( num_args * sizeof(OTF_MapEntry*) ); + assert( NULL != entries ); + for(uint32_t i = 0; i < num_args; i++) { - entries[i] = *(OTF_MasterControl_getEntry(master, i+1)); - - for(uint32_t j = 0; j < entries[i].n; j++) { - if ( cpuMap.end() == cpuMap.find(entries[i].values[j]) ) { - if ( ! invers ) { - entries[i].values[j] = 0; - } - } else { - if ( cpuMap[entries[i].values[j]] == 0 ) { - entries[i].values[j] = 0; - } - } - } - } + entries[i]= OTF_MasterControl_getEntryByIndex( master, i ); + assert( NULL != entries[i] ); + + for(uint32_t j = 0; j < entries[i]->n; j++) { + if ( ( cpuMap.end() == cpuMap.find( entries[i]->values[j] ) ) == inverse ) { + + /* either ( not_in_list in inverse_mode ) or + ( found_in_list in normal_mode ) --> keep entry + */ + + // cerr << " keep " << entries[i]->values[j] << endl; + + } else { + + /* either ( not_in_list in normal_mode ) or + ( found_in_list in inverse_mode ) --> mark to ignore it + */ + + // cerr << " drop " << entries[i]->values[j] << endl; + + entries[i]->values[j] = 0; + } + } + } /* create new empty master and symbolic links */ new_master = OTF_MasterControl_new(manager); for(uint32_t i = 0; i < num_args; i++) { append = false; - for(uint32_t j = 0; j < entries[i].n; j++) { - if(entries[i].values[j] > 0) { - OTF_MasterControl_append(new_master, i+1, entries[i].values[j]); - first->procMap[ entries[i].values[j] ] = true; + for(uint32_t j = 0; j < entries[i]->n; j++) { + if(entries[i]->values[j] > 0) { + + OTF_MasterControl_append(new_master, entries[i]->argument, entries[i]->values[j]); +// first->procMap[ entries[i]->values[j] ] = true; append = true; } } @@ -381,13 +623,13 @@ int write_master(string input, string output, bool invers, bool show, int sim_mo } /* create symbolic links */ - sprintf(ch_i, "%x", i+1); - + sprintf(ch_i, "%x", entries[i]->argument ); + for(int k = 0; k < 4; k++) { file = input + string(".") + ch_i + file_suffix[k] + string(".z"); s_link = output + string(".") + ch_i + file_suffix[k] + string(".z"); - + if ( ! access(file.c_str(), F_OK) ) { if ( ! access(s_link.c_str(), F_OK) ) { if ( unlink(s_link.c_str()) ) { @@ -401,7 +643,9 @@ int write_master(string input, string output, bool invers, bool show, int sim_mo CLEAR_EVERYTHING return 2; } + } else { + file = input + string(".") + ch_i + file_suffix[k]; s_link = output + string(".") + ch_i + file_suffix[k]; @@ -452,92 +696,31 @@ int write_master(string input, string output, bool invers, bool show, int sim_mo handlers = OTF_HandlerArray_open(); first->writer = writer; + OTF_HandlerArray_getCopyHandler( handlers, writer ); - /* kind of copyhandler which replicate all definitons */ - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefinitionComment, OTF_DEFINITIONCOMMENT_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFINITIONCOMMENT_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefTimerResolution, OTF_DEFTIMERRESOLUTION_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFTIMERRESOLUTION_RECORD); - - /* this definition is not copied but modified */ OTF_HandlerArray_setHandler(handlers, (OTF_FunctionPointer*) handleDefProcess, OTF_DEFPROCESS_RECORD); OTF_HandlerArray_setFirstHandlerArg(handlers, (void*) first, OTF_DEFPROCESS_RECORD); - /* this definition is not copied but modified */ OTF_HandlerArray_setHandler(handlers, (OTF_FunctionPointer*) handleDefProcessGroup, OTF_DEFPROCESSGROUP_RECORD); OTF_HandlerArray_setFirstHandlerArg(handlers, (void*) first, OTF_DEFPROCESSGROUP_RECORD); OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefFunction, OTF_DEFFUNCTION_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFFUNCTION_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefFunctionGroup, OTF_DEFFUNCTIONGROUP_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFFUNCTIONGROUP_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefCollectiveOperation, OTF_DEFCOLLOP_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFCOLLOP_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefCounter, OTF_DEFCOUNTER_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFCOUNTER_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefCounterGroup, OTF_DEFCOUNTERGROUP_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFCOUNTERGROUP_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefScl, OTF_DEFSCL_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFSCL_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefSclFile, OTF_DEFSCLFILE_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFSCLFILE_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefCreator, OTF_DEFCREATOR_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFCREATOR_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefVersion, OTF_DEFVERSION_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFVERSION_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefFile, OTF_DEFFILE_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFFILE_RECORD); - - OTF_HandlerArray_setHandler(handlers, - (OTF_FunctionPointer*) handleDefFileGroup, OTF_DEFFILEGROUP_RECORD); - OTF_HandlerArray_setFirstHandlerArg(handlers, - (void*) writer, OTF_DEFFILEGROUP_RECORD); + (OTF_FunctionPointer*) handleDefProcessSubstitutes, OTF_DEFPROCESSSUBSTITUTES_RECORD); read = OTF_Reader_readDefinitions (reader, handlers); if( read == OTF_READ_ERROR ) { cerr << "An error occurred while reading the tracefile. It seems to be damaged. Abort." << endl; CLEAR_EVERYTHING - return 1; + return 111; } + /* in mapping mode write additional definitions */ + + /* set the writer's master to the modified master instance */ /* closing the writer at the end writes the new master file to harddisk */ OTF_Writer_setMasterControl(writer, new_master); @@ -550,28 +733,36 @@ int write_master(string input, string output, bool invers, bool show, int sim_mo int display_processes(firstarg *first, int sim_mode) { - map::iterator it; + set::iterator it; /* cout << "Note: You are in simulation mode at the moment. No files are created.\n" << endl; */ - if ( first->procMap.size() < 1) { + if ( cpuMap.size() < 1) { cout << "You exclude all processes!" << endl; return 0; } - cout << "You choose the following processes: " << endl; + if ( inverse ) { + + cout << "You choose to disable following processes: " << endl; + + } else { + + cout << "You choose to enable following processes: " << endl; + } + if ( sim_mode == LIST_MODE ) { - for (it = first->procMap.begin(); it != first->procMap.end(); ++it) { - cout << it->first << endl; + for (it = cpuMap.begin(); it != cpuMap.end(); ++it) { + cout << *it << endl; } } else if ( sim_mode == RANGE_MODE ) { - it = first->procMap.begin(); - int cur = it->first; - int start = cur; - int end = 0; - for (it = ++it ; it != first->procMap.end(); ++it) { - if ( (cur + 1) == it->first) { + it = cpuMap.begin(); + uint32_t cur = *it; + uint32_t start = cur; + uint32_t end = 0; + for (it = ++it ; it != cpuMap.end(); ++it) { + if ( (cur + 1) == *it ) { cur++; } else { end = cur; @@ -580,7 +771,7 @@ int display_processes(firstarg *first, int sim_mode) { } else { cout << start << endl; } - cur = it->first; + cur = *it; start = cur; end = 0; } @@ -591,8 +782,8 @@ int display_processes(firstarg *first, int sim_mode) { } else if ( sim_mode == TABLE_MODE ) { int i = 0; - for (it = first->procMap.begin(); it != first->procMap.end(); ++it) { - cout << it->first << "\t"; + for (it = cpuMap.begin(); it != cpuMap.end(); ++it) { + cout << *it << "\t"; i++; if (i == 8) { cout << endl; @@ -608,7 +799,7 @@ int display_processes(firstarg *first, int sim_mode) { return 0; } -void free_all_pointers(char *ch_i, firstarg *first, OTF_MapEntry *entries, OTF_Writer *writer, +void free_all_pointers(char *ch_i, firstarg *first, OTF_MapEntry** entries, OTF_Writer *writer, OTF_Reader *reader, OTF_HandlerArray *handlers, OTF_FileManager *manager, OTF_MasterControl *master, OTF_MasterControl *new_master) { @@ -623,7 +814,6 @@ void free_all_pointers(char *ch_i, firstarg *first, OTF_MapEntry *entries, OTF_W } if(entries != NULL) { - delete[] entries; entries = NULL; } diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/otfshrink.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/otfshrink.h new file mode 100644 index 0000000000..29d72c66fa --- /dev/null +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfshrink/otfshrink.h @@ -0,0 +1,15 @@ +#ifndef OTFSHRINK_H +#define OTFSHRINK_H + + +#include +#include +using namespace std; + + +extern set< uint32_t > cpuMap; +extern bool inverse; + +extern map< uint32_t, set< uint32_t > > replacementMap; + +#endif /* OTFSHRINK_H */ diff --git a/ompi/contrib/vt/vt/include/vt_user.h b/ompi/contrib/vt/vt/include/vt_user.h index 376cb06448..943bf2127d 100644 --- a/ompi/contrib/vt/vt/include/vt_user.h +++ b/ompi/contrib/vt/vt/include/vt_user.h @@ -44,4 +44,6 @@ # include "vt_wrap_pthread.h" #endif /* VTRACE_PTHREAD */ +#include + #endif /* _VT_USER_H */ diff --git a/ompi/contrib/vt/vt/rfg/rfg_filter.c b/ompi/contrib/vt/vt/rfg/rfg_filter.c index bd57945191..1386f6c636 100644 --- a/ompi/contrib/vt/vt/rfg/rfg_filter.c +++ b/ompi/contrib/vt/vt/rfg/rfg_filter.c @@ -10,7 +10,6 @@ #include #include -#define STRBUF_SIZE 0x400 /* buffer size for strings */ #define MAX_LINE_LEN 0x20000 /* max file line length */ /* data structure for filter assignments */ @@ -248,7 +247,7 @@ int RFG_Filter_readDefFile( RFG_Filter* filter, int rank, uint8_t* rank_off ) /* read lines */ while( !l_rank_off && !parse_err && - get_deffile_content_line( filter, line, MAX_LINE_LEN - 1, &pos ) ) + get_deffile_content_line( filter, line, MAX_LINE_LEN, &pos ) ) { int32_t climit; char* p; @@ -257,15 +256,17 @@ int RFG_Filter_readDefFile( RFG_Filter* filter, int rank, uint8_t* rank_off ) lineno++; /* remove newline */ - if( strlen(line) > 0 && line[strlen(line)-1] == '\n' ) line[strlen(line)-1] = '\0'; + /* remove leading and trailing spaces from line */ vt_strtrim( line ); + /* continue if line is empty */ if( strlen( line ) == 0 ) continue; + /* continue if line is a comment */ if( line[0] == '#' ) continue; @@ -398,7 +399,7 @@ int RFG_Filter_readDefFile( RFG_Filter* filter, int rank, uint8_t* rank_off ) p = strtok( line, ";" ); do { - char pattern[STRBUF_SIZE]; + char* pattern; if( !p ) { @@ -406,14 +407,15 @@ int RFG_Filter_readDefFile( RFG_Filter* filter, int rank, uint8_t* rank_off ) break; } - strcpy( pattern, p ); - + pattern = strdup( p ); vt_strtrim( pattern ); /* add call limit assignment */ if( strlen( pattern ) > 0 && includes_current_rank ) RFG_Filter_add( filter, pattern, climit ); + free( pattern ); + } while( ( p = strtok( 0, ";" ) ) ); } } diff --git a/ompi/contrib/vt/vt/rfg/rfg_groups.c b/ompi/contrib/vt/vt/rfg/rfg_groups.c index addcab3e0d..6f8a085ec9 100644 --- a/ompi/contrib/vt/vt/rfg/rfg_groups.c +++ b/ompi/contrib/vt/vt/rfg/rfg_groups.c @@ -9,7 +9,6 @@ #include #include -#define STRBUF_SIZE 0x400 /* buffer size for strings */ #define MAX_LINE_LEN 0x20000 /* max file line length */ /* data structure for group assignments */ @@ -103,7 +102,7 @@ int RFG_Groups_setDefFile( RFG_Groups* groups, const char* deffile ) int RFG_Groups_readDefFile( RFG_Groups* groups ) { FILE* f; - char* orgline; + char* line; uint32_t lineno = 0; uint8_t parse_err = 0; @@ -122,8 +121,8 @@ int RFG_Groups_readDefFile( RFG_Groups* groups ) return 0; } - orgline = ( char* )malloc( MAX_LINE_LEN * sizeof( char ) ); - if( orgline == NULL ) + line = ( char* )malloc( MAX_LINE_LEN * sizeof( char ) ); + if( line == NULL ) { fclose( f ); return 0; @@ -131,36 +130,29 @@ int RFG_Groups_readDefFile( RFG_Groups* groups ) /* read lines */ - while( !parse_err && fgets( orgline, MAX_LINE_LEN - 1, f ) ) + while( !parse_err && fgets( line, MAX_LINE_LEN, f ) ) { - char group[STRBUF_SIZE]; + char* group; char* p; - char* line; - - /* remove newline */ - - if( strlen(orgline) > 0 && orgline[strlen(orgline)-1] == '\n' ) - orgline[strlen(orgline)-1] = '\0'; - - /* copy line so that the original line keep alive */ - - line = strdup( orgline ); + /* increment line number */ lineno++; - if( strlen( line ) == 0 ) - { - free( line ); - continue; - } + /* remove newline */ + if( strlen( line ) > 0 && line[strlen(line)-1] == '\n' ) + line[strlen(line)-1] = '\0'; + /* remove leading and trailing spaces from line */ vt_strtrim( line ); - if( line[0] == '#' ) - { - free( line ); + /* continue if line is empty */ + if( strlen( line ) == 0 ) continue; - } + + /* continue if line is a comment */ + if( line[0] == '#' ) + continue; + /* search for '=' e.g. "GROUP=func1;func2;func3" @@ -171,7 +163,6 @@ int RFG_Groups_readDefFile( RFG_Groups* groups ) if( p == NULL ) { parse_err = 1; - free( line ); break; } @@ -181,7 +172,8 @@ int RFG_Groups_readDefFile( RFG_Groups* groups ) */ *p = '\0'; - strcpy( group, line ); + + group = strdup( line ); vt_strtrim( group ); /* split remaining line at ';' to get pattern */ @@ -189,7 +181,7 @@ int RFG_Groups_readDefFile( RFG_Groups* groups ) p = strtok( p+1, ";" ); do { - char pattern[STRBUF_SIZE]; + char* pattern; if( !p ) { @@ -197,8 +189,7 @@ int RFG_Groups_readDefFile( RFG_Groups* groups ) break; } - strcpy( pattern, p ); - + pattern = strdup( p ); vt_strtrim( pattern ); /* add group assignment */ @@ -206,18 +197,20 @@ int RFG_Groups_readDefFile( RFG_Groups* groups ) if( strlen( pattern ) > 0 ) RFG_Groups_addAssign( groups, group, pattern ); + free( pattern ); + } while( ( p = strtok( 0, ";" ) ) ); - free( line ); + free( group ); } if( parse_err ) { - fprintf( stderr, "%s:%u: Could not parse line '%s'\n", - groups->deffile, lineno, orgline ); + fprintf( stderr, "%s:%u: Could not be parsed\n", + groups->deffile, lineno ); } - free( orgline ); + free( line ); fclose( f ); diff --git a/ompi/contrib/vt/vt/tools/vtdyn/Makefile.am b/ompi/contrib/vt/vt/tools/vtdyn/Makefile.am index dafa7a51ec..d90b5a7125 100644 --- a/ompi/contrib/vt/vt/tools/vtdyn/Makefile.am +++ b/ompi/contrib/vt/vt/tools/vtdyn/Makefile.am @@ -6,7 +6,7 @@ if AMBUILDBINARIES bin_PROGRAMS = vtdyn endif -INCLUDES = $(DYNIINCDIR) -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/rfg +INCLUDES = $(DYNIINCDIR) -I$(top_srcdir)/include -I$(top_builddir)/include -I$(top_srcdir)/vtlib -I$(top_srcdir)/rfg vtdyn_SOURCES = \ vt_dyn.h \ diff --git a/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.cc b/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.cc index 8d332293ee..b8363d1011 100644 --- a/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.cc +++ b/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.cc @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include #include @@ -28,34 +30,6 @@ #include "BPatch_snippet.h" #include "BPatch_statement.h" -// macros -// (TODO: replace by inline functions) -// - -// macro to print verbose message -#define VPRINT(level, text) \ - if( Params.verbose_level >= level ) \ - std::cout << ExeName << ": [" << ExePid << "]: " << text << std::endl; - -// macro to remove newline character from string -#define CHOMP(str) { \ - if( str[strlen(str)-1] == '\n' ) \ - str[strlen(str)-1] = '\0'; } - -// macro to strip whitespace from string -#define TRIM(str) { \ - int _trim_start_idx_ = 0; \ - int _trim_stop_idx_ = strlen( str ); \ - int i, j; \ - if( strlen( str ) > 0 ) { \ - for( i = 0; i < (int)strlen( str ) \ - && str[i] == ' '; i++ ) _trim_start_idx_++; \ - for( i = (int)strlen( str ) - 1; i >= 0 \ - && str[i] == ' '; i-- ) _trim_stop_idx_--; \ - for( j = 0, i = _trim_start_idx_; i < _trim_stop_idx_; i++, j++ ) \ - str[j] = str[i]; \ - str[j] = '\0'; } } - // local functions // @@ -65,6 +39,9 @@ static bool parseCommandLine( int argc, char ** argv ); // show usage text static void showUsage( void ); +// print verbose message +static void vPrint( uint8_t level, const char * fmt, ... ); + // global variables // @@ -221,9 +198,11 @@ parseCommandLine( int argc, char ** argv ) { Params.ignore_no_dbg = true; } + + // hidden options - only for using within the VampirTrace library + // + // -p, --pid - // NOTE: these options are hidden - only for using within the VampirTrace - // library to attach the mutator to a running process // else if( strcmp( argv[i], "-p" ) == 0 || strcmp( argv[i], "--pid" ) == 0 ) @@ -239,6 +218,13 @@ parseCommandLine( int argc, char ** argv ) Params.mutatee_pid = atoi( argv[++i] ); Params.mode = MODE_ATTACH; } + // --nodetach + // + else if( strcmp( argv[i], "--nodetach" ) == 0 ) + { + Params.detach = false; + } + // [arguments ...] // else @@ -277,6 +263,7 @@ showUsage() << " (can be used more than once)" << std::endl << std::endl << " -q, --quiet Enable quiet mode." << std::endl + << " (only emergency output)" << std::endl << std::endl << " -o, --output FILE Rewrite instrumented executable to specified pathname." << std::endl << std::endl @@ -291,6 +278,26 @@ showUsage() << std::endl; } +static void +vPrint( uint8_t level, const char * fmt, ... ) +{ + va_list ap; + + if( Params.verbose_level >= level ) + { + va_start( ap, fmt ); + + char msg[1024] = ""; + + snprintf( msg, sizeof( msg ) - 1, "%s: [%d]: ", ExeName.c_str(), ExePid ); + vsnprintf( msg + strlen( msg ), sizeof( msg ) - 1, fmt, ap ); + + printf( "%s", msg ); + + va_end( ap ); + } +} + //////////////////// class MutatorC //////////////////// // public methods @@ -330,10 +337,31 @@ MutatorC::run() // instrument functions // - VPRINT( 1, "Instrumenting functions" ); + vPrint( 1, "Instrumenting functions\n" ); - for( uint32_t i = 0; i < inst_funcs.size() && !error; i++ ) - error = !instrumentFunction( inst_funcs[i] ); + for( uint32_t i = 0; i < inst_funcs.size(); i++ ) + { + // begin insertion set + m_appAddrSpace->beginInsertionSet(); + + // instrument function entry points + if( ( error = !instrumentFunctionEntry( inst_funcs[i] ) ) ) + break; + // instrument function exit points + if( ( error = !instrumentFunctionExit( inst_funcs[i] ) ) ) + break; + + // finalize insertion set + // + if( !error && !m_appAddrSpace->finalizeInsertionSet( true, 0 ) ) + { + std::cerr << ExeName << ": [" << ExePid << "]: " + << "Error: Could not finalize instrumentation set for " + << "function '" << inst_funcs[i].name << "'. Aborting." + << std::endl; + error = true; + } + } inst_funcs.clear(); } while( false ); @@ -354,19 +382,37 @@ MutatorC::initialize() do { + // set recommended optimizations to reduce runtime overhead + // + + // turn on inlined trampolines + m_bpatch.setMergeTramp( true ); + + // turn on trampoline recursion because there is no way for the snippets + // to call themselves + m_bpatch.setTrampRecursive( true ); + + // turn off stack frames in instrumentation + m_bpatch.setInstrStackFrames( false ); + + // turn on floating point saves due to the instrumentation does clobber + // floating point registers + // + + m_bpatch.setSaveFPR( true ); +#ifdef DYNINST_7_0 + m_bpatch.forceSaveFPR( true ); +#endif // DYNINST_7_0 + // read input filter file if( ( error = !readFilter() ) ) break; - // turn on trampoline recursion because there is no way for the snippets - // to call themselves; reduces runtime overhead - m_bpatch.setTrampRecursive( true ); - switch( Params.mode ) { case MODE_CREATE: { - VPRINT( 1, "Creating process" ); + vPrint( 1, "Creating process\n" ); assert( Params.mutatee.length() > 0 ); @@ -412,10 +458,10 @@ MutatorC::initialize() } case MODE_ATTACH: { - VPRINT( 1, "Attaching to PID " << Params.mutatee_pid ); - assert( Params.mutatee_pid ); + vPrint( 1, "Attaching to PID %d\n", Params.mutatee_pid ); + // attach to running process m_appAddrSpace = m_bpatch.processAttach( @@ -445,10 +491,10 @@ MutatorC::initialize() } case MODE_REWRITE: { - VPRINT( 1, "Opening " << Params.mutatee ); - assert( Params.mutatee.length() > 0 ); + vPrint( 1, "Opening %s\n", Params.mutatee.c_str() ); + // open binary for rewriting m_appAddrSpace = m_bpatch.openBinary( Params.mutatee.c_str(), true ); @@ -514,12 +560,12 @@ MutatorC::finalize( bool & error ) // if( !error ) { - VPRINT( 1, "Executing application" ); + vPrint( 1, "Continuing process execution\n" ); if( !app_process->isStopped() || app_process->isTerminated() ) { std::cerr << ExeName << ": [" << ExePid << "]: " - << "Error: Could not continue execution of process. " + << "Error: Could not continue process execution. " << "Aborting." << std::endl; error = true; } @@ -530,18 +576,27 @@ MutatorC::finalize( bool & error ) if( Params.mutatee_pid != -1 ) kill( Params.mutatee_pid, SIGUSR1 ); - // continue execution - app_process->continueExecution(); - - // wait until mutatee is terminated - // - while( !app_process->isTerminated() ) + if( Params.mode == MODE_CREATE || !Params.detach ) { - m_bpatch.waitForStatusChange(); - sleep(1); - } + // continue execution of mutatee + app_process->continueExecution(); - VPRINT( 1, "End of application" ); + // wait until mutatee is terminated + // + while( !app_process->isTerminated() ) + { + m_bpatch.waitForStatusChange(); + sleep(1); + } + + vPrint( 1, "End of process\n" ); + vPrint( 1, "Done\n" ); + } + else // Params.mode == MODE_ATTACH && Params.detach + { + // continue execution of mutatee and detach from its process + app_process->detach( true ); + } } } // ... or terminate execution on error @@ -558,7 +613,7 @@ MutatorC::finalize( bool & error ) { if( !error ) { - VPRINT( 1, "Writing " << Params.outfile ); + vPrint( 1, "Writing %s\n", Params.outfile.c_str() ); BPatch_binaryEdit * app_editor = dynamic_cast(m_appAddrSpace); @@ -572,6 +627,10 @@ MutatorC::finalize( bool & error ) << ". Aborting." << std::endl; error = true; } + else + { + vPrint( 1, "Done\n" ); + } } break; @@ -582,9 +641,6 @@ MutatorC::finalize( bool & error ) if( m_filter ) RFG_Filter_free( m_filter ); - if( !error ) - VPRINT( 1, "Done" ); - return !error; } @@ -593,15 +649,13 @@ MutatorC::getFunctions( std::vector & instFuncs ) { bool error = false; - VPRINT( 1, "Get instrumentable functions" ); + vPrint( 1, "Get instrumentable functions\n" ); do { // get list of modules from image // - - BPatch_Vector * modules = m_appImage->getModules(); - + const BPatch_Vector * modules = m_appImage->getModules(); if( !modules ) { std::cerr << ExeName << ": [" << ExePid << "]: " @@ -612,24 +666,29 @@ MutatorC::getFunctions( std::vector & instFuncs ) } // iterate over all modules - for( uint32_t i = 0; i < modules->size(); i++ ) + for( uint32_t i = 0; i < modules->size() && !error; i++ ) { + // get module name + // + std::string module_name; char buffer[STRBUFSIZE] = ""; (*modules)[i]->getName( buffer, STRBUFSIZE ); module_name = buffer; + // check whether module should be instrumented + // if( constraintModule( module_name ) ) { - VPRINT( 2, " Skip module '" << module_name << "'" ); + vPrint( 2, " Skip module '%s'\n", module_name.c_str() ); continue; } // get functions of module // - BPatch_Vector * functions = + const BPatch_Vector * functions = (*modules)[i]->getProcedures(); if( !functions ) @@ -644,21 +703,52 @@ MutatorC::getFunctions( std::vector & instFuncs ) // iterate over all functions for( uint32_t j = 0; j < functions->size(); j++ ) { + // get function name + // + std::string function_name; (*functions)[j]->getName( buffer, STRBUFSIZE ); function_name = buffer; + // check whether function is instrumentable + // if( !(*functions)[j]->isInstrumentable() ) { - VPRINT( 2, " Skip function '" << function_name << - "' (not instrumentable)" ); + vPrint( 2, " Skip function '%s' (not instrumentable)\n", + function_name.c_str() ); continue; } + // get function entry points + // + const BPatch_Vector* entry_points = + (*functions)[j]->findPoint( BPatch_entry ); + if( !entry_points || entry_points->size() == 0 ) + { + vPrint( 2, " Skip function '%s' " + "(no entry instrumentation points found)\n", + function_name.c_str() ); + continue; + } + + // get function exit points + // + const BPatch_Vector* exit_points = + (*functions)[j]->findPoint( BPatch_exit ); + if( !exit_points || exit_points->size() == 0 ) + { + vPrint( 2, " Skip function '%s' " + "(no exit instrumentation points found)\n", + function_name.c_str() ); + continue; + } + + // check whether function should be instrumented + // if( constraintFunction( function_name ) ) { - VPRINT( 2, " Skip function '" << function_name << "'" ); + vPrint( 2, " Skip function '%s'\n", function_name.c_str() ); continue; } @@ -683,7 +773,7 @@ MutatorC::getFunctions( std::vector & instFuncs ) { if( constraintModule( file_name ) ) { - VPRINT( 2, " Skip function '" << function_name << "'" ); + vPrint( 2, " Skip function '%s'\n", function_name.c_str() ); continue; } } @@ -691,8 +781,8 @@ MutatorC::getFunctions( std::vector & instFuncs ) { if( Params.ignore_no_dbg ) { - VPRINT( 2, " Skip function '" << function_name << - "' (no debug)" ); + vPrint( 2, " Skip function '%s' (no debug information)\n", + function_name.c_str() ); continue; } @@ -700,35 +790,50 @@ MutatorC::getFunctions( std::vector & instFuncs ) line_number = 0; } - VPRINT( 2, " Add function '" << function_name << - "' for instrumenting" ); + vPrint( 2, " Add function '%s' for instrumenting\n", + function_name.c_str() ); + + // get function index + // + uint32_t function_index = instFuncs.size(); + if( function_index + 1 > VT_MAX_DYNINST_REGIONS ) + { + std::cerr << ExeName << ": [" << ExePid << "]: " + << "Error: Too many functions to instrument (max. " + << VT_MAX_DYNINST_REGIONS << "). Aborting." + << std::endl; + error = true; + break; + } // add function for instrumenting instFuncs.push_back( - InstFuncS( (*functions)[j], addr, function_name, - file_name, line_number ) ); + InstFuncS( function_index, function_name, file_name, + line_number, entry_points, exit_points ) ); } } } while( false ); - return true; + return !error; } bool -MutatorC::instrumentFunction( const InstFuncS & instFunc ) +MutatorC::instrumentFunctionEntry( const InstFuncS & instFunc ) { bool error = false; + vPrint( 2, " Instrumenting-> '%s' Entry\n", instFunc.name.c_str() ); + // set callee arguments // static BPatch_Vector callee_args( 4 ); - // function address + // function index // - BPatch_constExpr const_expr_faddr( instFunc.addr ); - callee_args[0] = &const_expr_faddr; + BPatch_constExpr const_expr_findex( instFunc.index ); + callee_args[0] = &const_expr_findex; // function name // @@ -745,54 +850,55 @@ MutatorC::instrumentFunction( const InstFuncS & instFunc ) BPatch_constExpr const_expr_lno( instFunc.lno ); callee_args[3] = &const_expr_lno; - // create instrumentation snippets + // create instrumentation snippet + BPatch_snippet snippet = BPatch_funcCallExpr( *m_vtStartFunc, callee_args ); + + // insert instrumentation snippet // - - static BPatch_snippet snippets[2]; - - snippets[0] = BPatch_funcCallExpr( *m_vtStartFunc, callee_args ); - snippets[1] = BPatch_funcCallExpr( *m_vtEndFunc, callee_args ); - - // insert instrumentation snippets - // - - m_appAddrSpace->beginInsertionSet(); - - for( uint32_t i = 0; i < 2; i++ ) + if( !m_appAddrSpace->insertSnippet( snippet, *(instFunc.entry_points), + BPatch_callBefore, BPatch_lastSnippet ) ) { - const BPatch_snippet & snippet = snippets[i]; - - // search point for insertion - // - - const BPatch_Vector * points = - instFunc.func->findPoint( (i == 0) ? BPatch_entry : BPatch_exit ); - - if( points == 0 ) - { - std::cerr << ExeName << ": [" << ExePid << "]: " - << "Error: Unable to find insert point for callee. " - << "Aborting." << std::endl; - error = true; - break; - } - - // insert snippets - // - if( i == 0 ) - { - VPRINT( 2, " Instrumenting-> '" << instFunc.name << "' Entry" ); - m_appAddrSpace->insertSnippet( snippet, *points, - BPatch_callBefore, BPatch_lastSnippet ); - } - else // i == 1 - { - VPRINT( 2, " Instrumenting-> '" << instFunc.name << "' Exit" ); - m_appAddrSpace->insertSnippet( snippet, *points ); - } + std::cerr << ExeName << ": [" << ExePid << "]: " + << "Error: Could not instrument entry points of " + << "function '" << instFunc.name << "'. Aborting." + << std::endl; + error = true; } - m_appAddrSpace->finalizeInsertionSet( true, 0 ); + return !error; +} + +bool +MutatorC::instrumentFunctionExit( const InstFuncS & instFunc ) +{ + bool error = false; + + vPrint( 2, " Instrumenting-> '%s' Exit\n", instFunc.name.c_str() ); + + // set callee argument + // + + static BPatch_Vector callee_args( 1 ); + + // function index + // + BPatch_constExpr const_expr_findex( instFunc.index ); + callee_args[0] = &const_expr_findex; + + // create instrumentation snippet + BPatch_snippet snippet = BPatch_funcCallExpr( *m_vtEndFunc, callee_args ); + + // insert instrumentation snippet + // + if( !m_appAddrSpace->insertSnippet( snippet, *(instFunc.exit_points), + BPatch_callAfter, BPatch_lastSnippet ) ) + { + std::cerr << ExeName << ": [" << ExePid << "]: " + << "Error: Could not instrument exit points of " + << "function '" << instFunc.name << "'. Aborting." + << std::endl; + error = true; + } return !error; } @@ -807,7 +913,7 @@ MutatorC::readFilter() do { - VPRINT( 1, "Reading filter file" ); + vPrint( 1, "Reading filter file\n" ); // get RFG filter object m_filter = RFG_Filter_init(); @@ -882,6 +988,10 @@ MutatorC::constraintFunction( const std::string & name ) const return true; // don't instrument MPI functions // (already done by function wrapper) } + else if( name.compare( 0, 7, "UNIMCI_" ) == 0 ) + { + return true; + } else if( m_filter ) { int32_t limit; diff --git a/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.h b/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.h index d09620853f..7cbee5ee3f 100644 --- a/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.h +++ b/ompi/contrib/vt/vt/tools/vtdyn/vt_dyn.h @@ -15,6 +15,7 @@ #include "config.h" +#include "vt_defs.h" #include "vt_inttypes.h" #include "rfg_filter.h" @@ -53,7 +54,8 @@ struct ParamsS { ParamsS() : mode(MODE_CREATE), mutatee_pid(-1), verbose_level(1), - ignore_no_dbg(false), show_usage(false), show_version(false) {} + detach(true), ignore_no_dbg(false), show_usage(false), + show_version(false) {} MutationT mode; // mutation mode std::string mutatee; // mutatee executable name @@ -63,6 +65,7 @@ struct ParamsS std::string filtfile; // pathname of filter file std::string outfile; // file name of binary to rewrite uint32_t verbose_level; // verbose level + bool detach; // flag: detach from mutatee? bool ignore_no_dbg; // flag: ignore funcs. without debug? bool show_usage; // flag: show usage text? bool show_version; // flag: show VampirTrace version? @@ -93,16 +96,28 @@ private: // struct InstFuncS { - InstFuncS() : func(0), addr(0), lno(0) {} - InstFuncS(BPatch_function * _func, unsigned long _addr, - std::string _name, std::string _file, uint32_t _lno ) - : func(_func), addr(_addr), name(_name), file(_file), lno(_lno) {} + InstFuncS( const uint32_t & _index, const std::string & _name, + const std::string & _file, const uint32_t & _lno, + const BPatch_Vector *& _entry_points, + const BPatch_Vector *& _exit_points ) + : index( _index ), name( _name ), file( _file ), lno( _lno ), + entry_points( _entry_points ), exit_points( _exit_points ) {} - BPatch_function * func; // BPatch function object - unsigned long addr; // function address - std::string name; // function name - std::string file; // source file name of function definition - uint32_t lno; // line number of function definition + // function index within region id table + uint32_t index; + + // function name + std::string name; + + // source file name and line number of function definition + // + std::string file; + uint32_t lno; + + // function entry and exit points to be instrumented + // + const BPatch_Vector * entry_points; + const BPatch_Vector * exit_points; }; @@ -115,8 +130,11 @@ private: // get functions to be instrumented bool getFunctions( std::vector & instFuncs ); - // instrument a function - bool instrumentFunction( const InstFuncS & instFunc ); + // instrument a function entry + bool instrumentFunctionEntry( const InstFuncS & instFunc ); + + // instrument a function exit + bool instrumentFunctionExit( const InstFuncS & instFunc ); // read input filter file bool readFilter(); diff --git a/ompi/contrib/vt/vt/tools/vtfilter/Makefile.am b/ompi/contrib/vt/vt/tools/vtfilter/Makefile.am index 2a6e7532a3..4c132cefa3 100644 --- a/ompi/contrib/vt/vt/tools/vtfilter/Makefile.am +++ b/ompi/contrib/vt/vt/tools/vtfilter/Makefile.am @@ -9,6 +9,12 @@ endif VTFILTERSRCDIR = $(srcdir) include $(srcdir)/Makefile.common +install-exec-hook: + (cd $(DESTDIR)$(bindir); rm -f vtfiltergen$(EXEEXT); \ + $(LN_S) vtfilter$(EXEEXT) vtfiltergen$(EXEEXT)) +uninstall-local: + rm -f $(DESTDIR)$(bindir)/vtfiltergen$(EXEEXT) + vtfilter_CXXFLAGS = $(COMMONCXXFLAGS) vtfilter_LDFLAGS = $(COMMONLDFLAGS) vtfilter_SOURCES = $(COMMONSOURCES) diff --git a/ompi/contrib/vt/vt/tools/vtfilter/mpi/Makefile.am b/ompi/contrib/vt/vt/tools/vtfilter/mpi/Makefile.am index 4b77a51fbe..50cc72f5cd 100644 --- a/ompi/contrib/vt/vt/tools/vtfilter/mpi/Makefile.am +++ b/ompi/contrib/vt/vt/tools/vtfilter/mpi/Makefile.am @@ -5,6 +5,12 @@ endif VTFILTERSRCDIR = $(srcdir)/.. include $(srcdir)/../Makefile.common +install-exec-hook: + (cd $(DESTDIR)$(bindir); rm -f vtfiltergen-mpi$(EXEEXT); \ + $(LN_S) vtfilter-mpi$(EXEEXT) vtfiltergen-mpi$(EXEEXT)) +uninstall-local: + rm -f $(DESTDIR)$(bindir)/vtfiltergen-mpi$(EXEEXT) + CXX = $(MPICXX) vtfilter_mpi_CXXFLAGS = $(COMMONCXXFLAGS) -DVT_MPI $(MPICXXFLAGS) $(MPIINCDIR) diff --git a/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.cc b/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.cc index bed6db871e..3baa31abe3 100644 --- a/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.cc +++ b/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.cc @@ -46,20 +46,27 @@ inline static bool stringList2Vector( const std::string& str, std::vector& vec, const std::string& delim = ";" ); +// local variables +// + +// enforce gen. mode if called as vtfiltergen[-mpi] +static bool enforceGenMode = false; + // global variables // -#ifdef VT_MPI - const std::string ExeName = "vtfilter-mpi"; -#else // VT_MPI - const std::string ExeName = "vtfilter"; -#endif // VT_MPI +// name of program's executable +std::string ExeName; -ParamsS Params; +// program parameters +ParamsS Params; #ifdef VT_MPI - VT_MPI_INT NumRanks; - VT_MPI_INT MyRank; + // number of MPI-ranks + VT_MPI_INT NumRanks; + + // MPI-rank of calling process + VT_MPI_INT MyRank; #endif // VT_MPI int @@ -75,6 +82,21 @@ main( int argc, char ** argv ) MPI_Comm_rank( MPI_COMM_WORLD, &MyRank ); #endif // VT_MPI + // get program's executable name + // + ExeName = argv[0]; + std::string::size_type si = ExeName.rfind('/'); + if( si != std::string::npos ) + ExeName.erase( 0, si+1 ); + + // enforce gen. mode if called as vtfiltergen[-mpi] + // + if( ExeName.substr( 0, 11 ) == "vtfiltergen" ) + { + enforceGenMode = true; + Params.mode = MODE_GEN; + } + do { // get program parameters @@ -123,7 +145,7 @@ main( int argc, char ** argv ) // either generate a filter file ... // - if( Params.mode == MODE_GENFILT ) + if( Params.mode == MODE_GEN ) { // create instance of class FilterGenerator FilterGeneratorC* gen = new FilterGeneratorC(); @@ -306,17 +328,19 @@ getParams( int argc, char** argv ) { // --gen, -gen* // - if( args[i].compare( "--gen" ) == 0 || - args[i].compare( "-gen" ) == 0 ) + if( !enforceGenMode && + ( args[i].compare( "--gen" ) == 0 || + args[i].compare( "-gen" ) == 0 ) ) { - Params.mode = MODE_GENFILT; + Params.mode = MODE_GEN; } // --filt, -filt* // - else if( args[i].compare( "--filt" ) == 0 || - args[i].compare( "-filt" ) == 0 ) + else if( !enforceGenMode && + ( args[i].compare( "--filt" ) == 0 || + args[i].compare( "-filt" ) == 0 ) ) { - Params.mode = MODE_FILTTRC; + Params.mode = MODE_FILT; } // -h, --help // @@ -364,10 +388,11 @@ getParams( int argc, char** argv ) { // already handled options // - if( args[i].compare( "--gen" ) == 0 || - args[i].compare( "-gen" ) == 0 || - args[i].compare( "--filt" ) == 0 || - args[i].compare( "-filt" ) == 0 || + if( ( !enforceGenMode && + ( args[i].compare( "--gen" ) == 0 || + args[i].compare( "-gen" ) == 0 || + args[i].compare( "--filt" ) == 0 || + args[i].compare( "-filt" ) == 0 ) ) || args[i].compare( "-v" ) == 0 || args[i].compare( "--verbose" ) == 0 || // args[i].compare( "-q" ) == 0 || @@ -381,7 +406,7 @@ getParams( int argc, char** argv ) { // gen-options // - if( Params.mode == MODE_GENFILT ) + if( Params.mode == MODE_GEN ) { // -o, --output, -fo* // @@ -691,19 +716,19 @@ getParams( int argc, char** argv ) opt_error = OPT_ERR_OTHER; opt_error_other = ExeName + ": no input trace file specified"; } - else if( Params.mode == MODE_GENFILT && + else if( Params.mode == MODE_GEN && Params.g_output_filtfile.length() == 0 ) { opt_error = OPT_ERR_OTHER; opt_error_other = ExeName + ": no output filter file specified"; } - else if( Params.mode == MODE_FILTTRC && + else if( Params.mode == MODE_FILT && Params.f_input_filtfile.length() == 0 ) { opt_error = OPT_ERR_OTHER; opt_error_other = ExeName + ": no input filter file specified"; } - else if( Params.mode == MODE_FILTTRC && + else if( Params.mode == MODE_FILT && Params.f_output_trcfile.length() == 0 ) { opt_error = OPT_ERR_OTHER; @@ -759,36 +784,65 @@ getParams( int argc, char** argv ) static void showUsage() { - std::cout << std::endl - << " " << ExeName << " - filter generator for VampirTrace." << std::endl - << std::endl - << " Syntax: " << std::endl - << " Generate a filter file:" << std::endl - << " " << ExeName << " [gen-options] " << std::endl - << std::endl - << " Filter a trace using an already existing filter file:" << std::endl - << " " << ExeName << " --filt [filt-options] " << std::endl - << std::endl - << " options:" << std::endl - << " --gen Generate a filter file. (default)" << std::endl - << " See 'gen-options' below for valid options." << std::endl - << std::endl - << " --filt Filter a trace using an already existing filter file." << std::endl - << " See 'filt-options' below for valid options." << std::endl + if( enforceGenMode ) + { + std::cout + << std::endl + << " " << ExeName << " - filter generator for VampirTrace." << std::endl + << std::endl + << " Syntax: " << ExeName << " [options] " << std::endl; + } + else + { + std::cout + << std::endl + << " " << ExeName << " - filter tool for VampirTrace." << std::endl + << std::endl + << " Syntax: " << std::endl + << " Generate a filter file:" << std::endl + << " " << ExeName << " --gen [gen-options] " << std::endl + << std::endl + << " Filter a trace using an already existing filter file:" << std::endl + << " " << ExeName << " [--filt] [filt-options]" << std::endl + << " --filter= " << std::endl; + } + + std::cout << std::endl + << " options:" << std::endl; + + if( !enforceGenMode ) + { + std::cout + << " --gen Generate a filter file." << std::endl + << " See 'gen-options' below for valid options." << std::endl + << std::endl + << " --filt Filter a trace using an already existing" << std::endl + << " filter file. (default)" << std::endl + << " See 'filt-options' below for valid options." << std::endl + << std::endl; + } + + std::cout << " -h, --help Show this help message." << std::endl << std::endl << " -V, --version Show VampirTrace version." << std::endl << std::endl - << " -v, --verbose Increase output verbosity." << std::endl - << " (can be used more than once)" << std::endl - << std::endl -// << " -q, --quiet Enable quiet mode." << std::endl -// << " (only emergency output)" << std::endl -// << std::endl << " -p, --progress Show progress." << std::endl << std::endl - << " gen-options:" << std::endl + << " -v, --verbose Increase output verbosity." << std::endl + << " (can be used more than once)" << std::endl + << std::endl; +// << " -q, --quiet Enable quiet mode." << std::endl +// << " (only emergency output)" << std::endl +// << std::endl; + + if( !enforceGenMode ) + { + std::cout << " gen-options:" << std::endl; + } + + std::cout << " -o, --output=FILE Pathname of output filter file." << std::endl << std::endl << " -r, --reduce=N Reduce the trace size to N percent of the original size." << std::endl @@ -820,35 +874,49 @@ showUsage() << std::endl << " --include-callees Automatically include callees of included functions" << std::endl << " as well into the filter." << std::endl - << std::endl - << " filt-options:" << std::endl - << " -o, --output=FILE Pathname of output trace file." << std::endl - << std::endl - << " -f, --filter=FILE Pathname of input filter file." << std::endl - << std::endl - << " -s, --max-streams=N Maximum number of output streams." << std::endl - << " Set this to 0 to get the same number of output streams" << std::endl + << std::endl; + + if( !enforceGenMode ) + { + std::cout + << " filt-options:" << std::endl + << " -o, --output=FILE Pathname of output trace file." << std::endl + << std::endl + << " -f, --filter=FILE Pathname of input filter file." << std::endl + << std::endl + << " -s, --max-streams=N Maximum number of output streams." << std::endl + << " Set this to 0 to get the same number of output streams" << std::endl #ifndef VT_MPI - << " as input streams." << std::endl + << " as input streams." << std::endl #else // VT_MPI - << " as MPI processes used, but at least the number of" << std::endl - << " input streams." << std::endl + << " as MPI processes used, but at least the number of" << std::endl + << " input streams." << std::endl #endif // VT_MPI - << " (default: " << ParamsS::f_default_max_output_streams << ")" << std::endl - << std::endl - << " --max-file-handles=N" << std::endl - << " Maximum number of files that are allowed to be open" << std::endl - << " simultaneously." << std::endl - << " (default: " << ParamsS::f_default_max_file_handles << ")" << std::endl - << std::endl - << " --nocompress Don't compress output trace files." << std::endl - << std::endl + << " (default: " << ParamsS::f_default_max_output_streams << ")" << std::endl + << std::endl + << " --max-file-handles=N" << std::endl + << " Maximum number of files that are allowed to be open" << std::endl + << " simultaneously." << std::endl + << " (default: " << ParamsS::f_default_max_file_handles << ")" << std::endl + << std::endl + << " --nocompress Don't compress output trace files." << std::endl + << std::endl; + } + + std::cout << " obsolete options and environment variables:" << std::endl - << " (still available for backward-compatibility)" << std::endl - << " -gen equivalent to '--gen'" << std::endl - << " -filt equivalent to '--filt'" << std::endl - << std::endl - << " gen-options:" << std::endl + << " (still available for backward-compatibility)" << std::endl; + + if( !enforceGenMode ) + { + std::cout + << " -gen equivalent to '--gen'" << std::endl + << " -filt equivalent to '--filt'" << std::endl + << std::endl + << " gen-options:" << std::endl; + } + + std::cout << " -fo equivalent to '-o' or '--output'" << std::endl << " -stats equivalent to '-s' or '--stats'" << std::endl << " -ex equivalent to '-e' or '--exclude'" << std::endl @@ -860,15 +928,20 @@ showUsage() << " equivalent to '--exclude-file'" << std::endl << " TRACEFILTER_INCLUDEFILE" << std::endl << " equivalent to '--include-file'" << std::endl - << std::endl - << " filt-options:" << std::endl - << " -to equivalent to '-o' or '--output'" << std::endl - << " -fi equivalent to '-f' or '--filter'" << std::endl - << " -z LEVEL Set the compression level. Level reaches from 0 to 9" << std::endl - << " where 0 is no compression (--nocompress) and 9 is the" << std::endl - << " highest level." << std::endl - << " (default: " << Params.f_default_compress_level << ")" << std::endl << std::endl; + + if( !enforceGenMode ) + { + std::cout + << " filt-options:" << std::endl + << " -to equivalent to '-o' or '--output'" << std::endl + << " -fi equivalent to '-f' or '--filter'" << std::endl + << " -z LEVEL Set the compression level. Level reaches from 0 to 9" << std::endl + << " where 0 is no compression (--nocompress) and 9 is the" << std::endl + << " highest level." << std::endl + << " (default: " << Params.f_default_compress_level << ")" << std::endl + << std::endl; + } } static bool diff --git a/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.h b/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.h index ec133c7e04..30c320ac9a 100644 --- a/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.h +++ b/ompi/contrib/vt/vt/tools/vtfilter/vt_filter.h @@ -38,8 +38,8 @@ // typedef enum { - MODE_GENFILT, // generate a filter file - MODE_FILTTRC // filter a trace using an already existing filter file + MODE_GEN, // generate a filter file + MODE_FILT // filter a trace using an already existing filter file } FilterModeT; // data structure for program parameters @@ -47,7 +47,7 @@ typedef enum struct ParamsS { ParamsS() - : mode(MODE_GENFILT), input_trcfile(""), verbose_level(0), + : mode(default_mode), input_trcfile(""), verbose_level(0), show_progress(false), show_usage(false), show_version(false), g_output_filtfile(""), g_incl_file(""), g_excl_file(""), g_call_limit(g_default_call_limit), g_reduce_ratio(0), @@ -59,6 +59,7 @@ struct ParamsS // defaults // + static const FilterModeT default_mode = MODE_FILT; static const uint32_t g_default_call_limit = 0; static const uint32_t f_default_max_output_streams = 0; static const uint32_t f_default_max_file_handles = 256; @@ -108,7 +109,7 @@ extern void PVPrint( uint8_t level, const char * fmt, ... ); // // name of program's executable -extern const std::string ExeName; +extern std::string ExeName; // program parameters extern ParamsS Params; diff --git a/ompi/contrib/vt/vt/tools/vtrun/vtrun.in b/ompi/contrib/vt/vt/tools/vtrun/vtrun.in index efe542397a..6213dd840b 100644 --- a/ompi/contrib/vt/vt/tools/vtrun/vtrun.in +++ b/ompi/contrib/vt/vt/tools/vtrun/vtrun.in @@ -46,7 +46,11 @@ show_helptext() echo "" echo " -V, --version Show VampirTrace version." echo "" - echo " -v, --verbose Enable verbose mode." + echo " -v, --verbose Increase output verbosity." + echo " (can be used more than once)" + echo "" + echo " -q, --quiet Enable quiet mode." + echo " (only emergency output)" echo "" echo " - Set application's parallelization type." if test x"$NM" != x; then @@ -76,8 +80,6 @@ show_helptext() fi echo "" echo " --dyninst Instrument user functions by Dyninst." - echo " This kind of instrumentation will be enforced for" - echo " non-MPI applications." echo "" echo " --extra-libs=LIBS Extra libraries to preload." echo "" @@ -106,11 +108,11 @@ show_version() # verbose_echo() { - if test $verbose -eq 1; then + if test $verbose -ge $1; then if test $mpi -eq 1; then - echo "$EXENAME: [$PID]: $1" + echo "$EXENAME: [$PID]: $2" else - echo "$EXENAME: $1" + echo "$EXENAME: $2" fi fi } @@ -132,7 +134,7 @@ libdir=@libdir@ # parse command line options # -verbose=0 +verbose=1 dyninst=0 fortran=0 mpi=$HAVE_MPI @@ -168,7 +170,11 @@ else shift ;; -v | --verbose) - verbose=1 + verbose=$(($verbose+1)) + shift + ;; + -q | --quiet) + verbose=0 shift ;; --dyninst) @@ -179,22 +185,22 @@ else fortran=1 shift ;; - -seq) + -seq | --seq) mpi=0; mt=0 par_type_enforced=1 shift ;; - -mt) + -mt | --mt) mpi=0; mt=1 par_type_enforced=1 shift ;; - -mpi) + -mpi | --mpi) mpi=1; mt=0 par_type_enforced=1 shift ;; - -hyb) + -hyb | --mpi) mpi=1; mt=1 par_type_enforced=1 shift @@ -276,7 +282,7 @@ if test $par_type_enforced -eq 0 -a x"$NM" != x; then fi fi - verbose_echo "Detected parallelization type: $par_type" + verbose_echo 2 "Detected parallelization type: $par_type" fi # set suitable VampirTrace library @@ -296,13 +302,6 @@ else fi fi -# enforce instrumentation by Dyninst, if a non-MPI application is going to execute -# -if test $mpi -eq 0 -a $dyninst -eq 0; then - echo "$EXENAME: Warning: Enforcing instrumentation by Dyninst due to execution of a non-MPI application." - dyninst=1 -fi - # catch bad configuration # @@ -350,22 +349,28 @@ fi # set environment variables # if test $MACOS -eq 1; then - verbose_echo "Prepending $libdir to DYLD_LIBRARY_PATH" + verbose_echo 2 "Prepending $libdir to DYLD_LIBRARY_PATH" export DYLD_LIBRARY_PATH=$libdir:$DYLD_LIBRARY_PATH - verbose_echo "Setting DYLD_INSERT_LIBRARIES to $ld_preload" + verbose_echo 2 "Setting DYLD_INSERT_LIBRARIES to $ld_preload" export DYLD_INSERT_LIBRARIES=$ld_preload - verbose_echo "Setting DYLD_FORCE_FLAT_NAMESPACE" + verbose_echo 2 "Setting DYLD_FORCE_FLAT_NAMESPACE" export DYLD_FORCE_FLAT_NAMESPACE="" else - verbose_echo "Prepending $libdir to LD_LIBRARY_PATH" + verbose_echo 2 "Prepending $libdir to LD_LIBRARY_PATH" export LD_LIBRARY_PATH=$libdir:$LD_LIBRARY_PATH - verbose_echo "Setting LD_PRELOAD to $ld_preload" + verbose_echo 2 "Setting LD_PRELOAD to $ld_preload" export LD_PRELOAD=$ld_preload fi +# forward verbosity level to VT_VERBOSE +# +if test x"$VT_VERBOSE" = x; then + export VT_VERBOSE=$(($verbose)) +fi + # run application # -verbose_echo "Executing: $exe $exe_args" +verbose_echo 2 "Executing: $exe $exe_args" $exe $exe_args exit $? diff --git a/ompi/contrib/vt/vt/tools/vtsetup/ChangeLog b/ompi/contrib/vt/vt/tools/vtsetup/ChangeLog index 939e5a9346..c448f7f4fe 100644 --- a/ompi/contrib/vt/vt/tools/vtsetup/ChangeLog +++ b/ompi/contrib/vt/vt/tools/vtsetup/ChangeLog @@ -1,2 +1,7 @@ +1.0.1 + - removed environment variable in tolltip box for executable + - fixed a bug that caused the version number was not shown correctly in the about dialog + 1.0.0 - initial version + diff --git a/ompi/contrib/vt/vt/tools/vtsetup/Makefile.am b/ompi/contrib/vt/vt/tools/vtsetup/Makefile.am index c7ae56e402..e3b191618a 100644 --- a/ompi/contrib/vt/vt/tools/vtsetup/Makefile.am +++ b/ompi/contrib/vt/vt/tools/vtsetup/Makefile.am @@ -16,5 +16,5 @@ EXTRA_DIST = \ vtsetup-data.dtd \ ChangeLog \ README \ - src/vtsetup-1.0.src.tar.gz + src/vtsetup-1.0.1.src.tar.gz diff --git a/ompi/contrib/vt/vt/tools/vtsetup/doc/ChangeLog b/ompi/contrib/vt/vt/tools/vtsetup/doc/ChangeLog index 939e5a9346..c448f7f4fe 100644 --- a/ompi/contrib/vt/vt/tools/vtsetup/doc/ChangeLog +++ b/ompi/contrib/vt/vt/tools/vtsetup/doc/ChangeLog @@ -1,2 +1,7 @@ +1.0.1 + - removed environment variable in tolltip box for executable + - fixed a bug that caused the version number was not shown correctly in the about dialog + 1.0.0 - initial version + diff --git a/ompi/contrib/vt/vt/tools/vtsetup/src/vtsetup-1.0.1.src.tar.gz b/ompi/contrib/vt/vt/tools/vtsetup/src/vtsetup-1.0.1.src.tar.gz new file mode 100644 index 0000000000..9ca3fac95d Binary files /dev/null and b/ompi/contrib/vt/vt/tools/vtsetup/src/vtsetup-1.0.1.src.tar.gz differ diff --git a/ompi/contrib/vt/vt/tools/vtsetup/src/vtsetup-1.0.src.tar.gz b/ompi/contrib/vt/vt/tools/vtsetup/src/vtsetup-1.0.src.tar.gz deleted file mode 100644 index 13fa306260..0000000000 Binary files a/ompi/contrib/vt/vt/tools/vtsetup/src/vtsetup-1.0.src.tar.gz and /dev/null differ diff --git a/ompi/contrib/vt/vt/tools/vtsetup/vtsetup-data.xml.in b/ompi/contrib/vt/vt/tools/vtsetup/vtsetup-data.xml.in index dd167bb8e2..38a5ff4b7c 100644 --- a/ompi/contrib/vt/vt/tools/vtsetup/vtsetup-data.xml.in +++ b/ompi/contrib/vt/vt/tools/vtsetup/vtsetup-data.xml.in @@ -45,10 +45,14 @@ FiltGrp - + Output Files FiltGrp + + Miscellaneous + FiltGrp + Enhanced Time Synchronization optFeat @@ -518,10 +522,21 @@ 0 - @VT_SETUP_DYNINST@ + @VT_SETUP_DYNINST@ VT_SETUP_VT_DYN_IGNORE_NODBG - + Whether to detach the Dyninst mutator-program (vtdyn) from the application process. + + + 1 + + + @VT_SETUP_DYNINST@ + VT_SETUP_VT_DYN_DETACH + + Name of function/region filter file. @@ -532,9 +547,9 @@ 1 VT_SETUP_VT_FILTER_SPEC - - Name of function grouping file. + Name of function/region group file. vtsetup_groups.spec0 @@ -544,9 +559,9 @@ 1 VT_SETUP_VT_GROUPS_SPEC - - Name of Java specific filter file. + Name of Java specific input filter file. @@ -567,7 +582,18 @@ @VT_SETUP_JAVA@ VT_SETUP_VT_GROUP_CLASSES - + When restarting the recording of events by VT_ON check whether the call stack has the same depth as when the recording was stopped by VT_OFF. + + + 1 + + + 1 + VT_SETUP_VT_ONOFF_CHECK_STACK_BALANCE + + Maximum number of stack level to be traced (0 = unlimited). @@ -590,27 +616,6 @@ 1 VT_SETUP_VT_GNU_NMFILE - - Maximum number of MPI communicators used in a MPI program. - - - 100 - - - @VT_SETUP_MPI@ - VT_SETUP_VT_MAX_MPI_COMMS - - - Maximum number of MPI windows used in a MPI program. - - - 100 - - - @VT_SETUP_MPI@VT_SETUP_VT_MAX_MPI_WINS - Enable / disable tracing of OpenMP events instrumented by OPARI. diff --git a/ompi/contrib/vt/vt/tools/vtsetup/vtsetup.jar b/ompi/contrib/vt/vt/tools/vtsetup/vtsetup.jar index a287739e1b..23849e840c 100644 Binary files a/ompi/contrib/vt/vt/tools/vtsetup/vtsetup.jar and b/ompi/contrib/vt/vt/tools/vtsetup/vtsetup.jar differ diff --git a/ompi/contrib/vt/vt/tools/vtunify/Makefile.common b/ompi/contrib/vt/vt/tools/vtunify/Makefile.common index c43fb9b313..4244a05674 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/Makefile.common +++ b/ompi/contrib/vt/vt/tools/vtunify/Makefile.common @@ -15,44 +15,67 @@ HOOKS2LIBRARY = HOOKS2DEPENDENCIES = HOOKS3SOURCES = \ - $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_msgmatch.h \ - $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_msgmatch.cc -HOOKS3CXXFLAGS = -DVT_UNIFY_HOOKS_MSGMATCH -if AMEXTERNOTF + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_margins.h \ + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_margins.cc +HOOKS3CXXFLAGS = -DVT_UNIFY_HOOKS_MARGINS HOOKS3INCLUDE = -HOOKS3LIBRARY = $(OTFLIBDIR) -lotfaux +HOOKS3LIBRARY = HOOKS3DEPENDENCIES = -else -HOOKS3INCLUDE = -I$(top_srcdir)/extlib/otf/otfauxlib -HOOKS3LIBRARY = $(top_builddir)/extlib/otf/otfauxlib/libotfaux.la -HOOKS3DEPENDENCIES = $(HOOKS2LIBRARY) -endif HOOKS4SOURCES = \ - $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_prof.h \ - $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_prof.cc -HOOKS4CXXFLAGS = -DVT_UNIFY_HOOKS_PROF + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_msgmatch.h \ + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_msgmatch.cc +HOOKS4CXXFLAGS = -DVT_UNIFY_HOOKS_MSGMATCH +if AMEXTERNOTF HOOKS4INCLUDE = -HOOKS4LIBRARY = +HOOKS4LIBRARY = $(OTFLIBDIR) -lotfaux HOOKS4DEPENDENCIES = +else +HOOKS4INCLUDE = -I$(top_srcdir)/extlib/otf/otfauxlib +HOOKS4LIBRARY = $(top_builddir)/extlib/otf/otfauxlib/libotfaux.la +HOOKS4DEPENDENCIES = $(HOOKS2LIBRARY) +endif HOOKS5SOURCES = \ - $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_tdb.h \ - $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_tdb.cc -HOOKS5CXXFLAGS = -DVT_UNIFY_HOOKS_TDB + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_prof.h \ + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_prof.cc +HOOKS5CXXFLAGS = -DVT_UNIFY_HOOKS_PROF HOOKS5INCLUDE = HOOKS5LIBRARY = HOOKS5DEPENDENCIES = +HOOKS6SOURCES = \ + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_tdb.h \ + $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_tdb.cc +HOOKS6CXXFLAGS = -DVT_UNIFY_HOOKS_TDB +HOOKS6INCLUDE = +HOOKS6LIBRARY = +HOOKS6DEPENDENCIES = + +#HOOKS7SOURCES = \ +# $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_thumb.h \ +# $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_thumb.cc +#HOOKS7CXXFLAGS = -DVT_UNIFY_HOOKS_THUMB +## dependency to libotfaux is already be set by the message matching hook above +##if AMEXTERNOTF +##HOOKS7INCLUDE = +##HOOKS7LIBRARY = $(OTFLIBDIR) -lotfaux +##HOOKS7DEPENDENCIES = +##else +##HOOKS7INCLUDE = -I$(top_srcdir)/extlib/otf/otfauxlib +##HOOKS7LIBRARY = $(top_builddir)/extlib/otf/otfauxlib/libotfaux.la +##HOOKS7DEPENDENCIES = $(HOOKS2LIBRARY) +##endif + HOOKSSOURCES = \ $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_base.h \ $(VTUNIFYSRCDIR)/hooks/vt_unify_hooks_base.cc \ - $(HOOKS1SOURCES) $(HOOKS2SOURCES) $(HOOKS3SOURCES) $(HOOKS4SOURCES) $(HOOKS5SOURCES) + $(HOOKS1SOURCES) $(HOOKS2SOURCES) $(HOOKS3SOURCES) $(HOOKS4SOURCES) $(HOOKS5SOURCES) $(HOOKS6SOURCES) -HOOKSCXXFLAGS = $(HOOKS1CXXFLAGS) $(HOOKS2CXXFLAGS) $(HOOKS3CXXFLAGS) $(HOOKS4CXXFLAGS) $(HOOKS5CXXFLAGS) -HOOKSINCLUDE = $(HOOKS1INCLUDE) $(HOOKS2INCLUDE) $(HOOKS3INCLUDE) $(HOOKS4INCLUDE) $(HOOKS5INCLUDE) -HOOKSLIBRARY = $(HOOKS1LIBRARY) $(HOOKS2LIBRARY) $(HOOKS3LIBRARY) $(HOOKS4LIBRARY) $(HOOKS5LIBRARY) -HOOKSDEPENDENCIES = $(HOOKS1DEPENDENCIES) $(HOOKS2DEPENDENCIES) $(HOOKS3DEPENDENCIES) $(HOOKS4DEPENDENCIES) $(HOOKS5DEPENDENCIES) +HOOKSCXXFLAGS = $(HOOKS1CXXFLAGS) $(HOOKS2CXXFLAGS) $(HOOKS3CXXFLAGS) $(HOOKS4CXXFLAGS) $(HOOKS5CXXFLAGS) $(HOOKS6CXXFLAGS) +HOOKSINCLUDE = $(HOOKS1INCLUDE) $(HOOKS2INCLUDE) $(HOOKS3INCLUDE) $(HOOKS4INCLUDE) $(HOOKS5INCLUDE) $(HOOKS6INCLUDE) $(HOOKS7INCLUDE) +HOOKSLIBRARY = $(HOOKS1LIBRARY) $(HOOKS2LIBRARY) $(HOOKS3LIBRARY) $(HOOKS4LIBRARY) $(HOOKS5LIBRARY) $(HOOKS6LIBRARY) $(HOOKS7LIBRARY) +HOOKSDEPENDENCIES = $(HOOKS1DEPENDENCIES) $(HOOKS2DEPENDENCIES) $(HOOKS3DEPENDENCIES) $(HOOKS4DEPENDENCIES) $(HOOKS5DEPENDENCIES) $(HOOKS6DEPENDENCIES) $(HOOKS7DEPENDENCIES) if AMBUILDETIMESYNC ETIMESYNCSOURCES = $(VTUNIFYSRCDIR)/vt_unify_esync.cc diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.cc b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.cc index 611991df9b..394f9078cf 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.cc @@ -11,6 +11,7 @@ **/ #include "vt_unify.h" +#include "vt_unify_defs.h" #include "vt_unify_handlers.h" #include "vt_unify_hooks_aevents.h" #include "vt_unify_sync.h" @@ -55,7 +56,7 @@ HooksAsyncEventsC::HandleAsyncEventPre( bool ret = false; // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); do { @@ -147,10 +148,27 @@ HooksAsyncEventsC::HandleAsyncCounter( AsyncSourceManagerS::SourceS * source, // pre-handle event: get actual time of async. event from key-value list if( HandleAsyncEventPre( *source, proc, time, kvs ) ) { + // get global token factory for DefProcessGroup + static const TokenFactoryScopeI * tkfac_defprocgrp = + theTokenFactory->getScope( DEF_REC_TYPE__DefProcessGroup ); + // get global token factory for DefCounter static const TokenFactoryScopeI * tkfac_defcntr = theTokenFactory->getScope( DEF_REC_TYPE__DefCounter ); + // try to get local process group token (!=0 if it's a group counter) + uint32_t procgrp = + theDefinitions->groupCounters()->getGroup( proc, counter ); + + // translate local process group token, if necessary + // + uint32_t global_procgrp = procgrp; + if( procgrp != 0 ) + { + global_procgrp = tkfac_defprocgrp->translate( proc, procgrp ); + assert( global_procgrp != 0 ); + } + // translate local counter token // uint32_t global_counter = tkfac_defcntr->translate( proc, counter ); @@ -159,7 +177,8 @@ HooksAsyncEventsC::HandleAsyncCounter( AsyncSourceManagerS::SourceS * source, // create new async. event // AsyncEventBaseS * new_async_event = - new AsyncEventCounterS( time, kvs, global_counter, value ); + new AsyncEventCounterS( time, kvs, global_procgrp, global_counter, + value ); assert( new_async_event ); // post-handle event: enqueue new async. event @@ -276,9 +295,8 @@ HooksAsyncEventsC::writeRecHook_DefKeyValue( HooksC::VaArgsT & args ) } void -HooksAsyncEventsC::writeRecHook_Event( HooksC::VaArgsT & args, - const uint32_t & timeArgIdx, const uint32_t & streamIdArgIdx, - const uint32_t & kvsArgIdx, const uint32_t & doWriteArgIdx ) +HooksAsyncEventsC::writeRecHook_Event( uint64_t * time, uint32_t * streamid, + OTF_KeyValueList ** kvs, bool * dowrite ) { bool error = false; @@ -286,14 +304,6 @@ HooksAsyncEventsC::writeRecHook_Event( HooksC::VaArgsT & args, if( m_sourceKeys.empty() ) return; - // get hook arguments - // - - uint64_t * time = (uint64_t*)args[timeArgIdx]; - uint32_t * streamid = (uint32_t*)args[streamIdArgIdx]; - OTF_KeyValueList ** kvs = (OTF_KeyValueList**)args[kvsArgIdx]; - bool * do_write = (bool*)args[doWriteArgIdx]; - // get async. source manager by stream id // AsyncSourceManagerS * manager = getSourceManagerByStreamId( *streamid ); @@ -306,7 +316,7 @@ HooksAsyncEventsC::writeRecHook_Event( HooksC::VaArgsT & args, error = !writeAsyncEvents( *manager, *time ); // drop this event record, if it's asynchronous - *do_write = !isAsyncEvent( *kvs ); + *dowrite = !isAsyncEvent( *kvs ); } //return !error; @@ -323,13 +333,13 @@ HooksAsyncEventsC::genericHook( const uint32_t & id, HooksC::VaArgsT & args ) if( m_sourceKeys.empty() ) return; - if( id == VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_OPEN ) + if( ( id & VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_OPEN ) != 0 ) { // get hook arguments // - uint32_t * stream_id = (uint32_t*)args[0]; - std::string * stream_prefix = (std::string*)args[1]; - OTF_WStream ** wstream = (OTF_WStream**)args[2]; + OTF_WStream ** wstream = (OTF_WStream**)args[0]; + uint32_t * stream_id = (uint32_t*)args[1]; + std::string * stream_prefix = (std::string*)args[2]; // get async. source manager by stream id // @@ -339,7 +349,7 @@ HooksAsyncEventsC::genericHook( const uint32_t & id, HooksC::VaArgsT & args ) // open reader streams of async. sources error = !openSources( *manager, *stream_id, *stream_prefix, *wstream ); } - else if( id == VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_CLOSE ) + else if( ( id & VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_CLOSE ) != 0 ) { // get stream id from hook arguments uint32_t * stream_id = (uint32_t*)args[0]; @@ -612,15 +622,17 @@ HooksAsyncEventsC::writeAsyncEvents( AsyncSourceManagerS & manager, static_cast( top ); // trigger write record hook - theHooks->triggerWriteRecordHook( HooksC::Record_Counter, 7, + theHooks->triggerWriteRecordHook( HooksC::Record_Counter, 8, &(manager.wstream), &(record->time), &(manager.stream_id), - &(record->counter), &(record->value), &(record->kvs), - &do_write ); + &(record->procgrp), &(record->counter), &(record->value), + &(record->kvs), &do_write ); // write record if( do_write ) error = ( OTF_WStream_writeCounterKV( manager.wstream, - record->time, manager.stream_id, + record->time, + record->procgrp ? + record->procgrp : manager.stream_id, record->counter, record->value, record->kvs ) == 0 ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.h b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.h index 6fd835ebc3..ec88a418e6 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.h +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_aevents.h @@ -80,10 +80,12 @@ private: { // constructor AsyncEventCounterS( const uint64_t & _time, OTF_KeyValueList *& _kvs, - const uint32_t & _counter, const uint64_t & _value ) + const uint32_t & _procgrp, const uint32_t & _counter, + const uint64_t & _value ) : AsyncEventBaseS(ASYNC_EVENT_TYPE_COUNTER, _time, _kvs), - counter(_counter), value(_value) {} + procgrp(_procgrp), counter(_counter), value(_value) {} + uint32_t procgrp; // global process group token (if it's a group counter) uint32_t counter; // global counter token uint64_t value; // counter value @@ -183,38 +185,91 @@ private: // event records // common stuff for write event record hooks - void writeRecHook_Event( HooksC::VaArgsT & args, - const uint32_t & timeArgIdx, const uint32_t & streamIdArgIdx, - const uint32_t & kvsArgIdx, const uint32_t & doWriteArgIdx ); + void writeRecHook_Event( uint64_t * time, uint32_t * streamid, + OTF_KeyValueList ** kvs, bool * dowrite ); void writeRecHook_EventComment( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 4, 5 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[4], (bool*)args[5] ); + } + void writeRecHook_Enter( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 3, 5, 6 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[3], + (OTF_KeyValueList**)args[5], (bool*)args[6] ); + } + void writeRecHook_Leave( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 3, 5, 6 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[3], + (OTF_KeyValueList**)args[5], (bool*)args[6] ); + } + void writeRecHook_Counter( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 5, 6 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[6], (bool*)args[7] ); + } + void writeRecHook_BeginFileOp( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 5, 6 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[5], (bool*)args[6] ); + } + void writeRecHook_EndFileOp( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 9, 10 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[9], (bool*)args[10] ); + } + void writeRecHook_SendMsg( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 8, 9 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[8], (bool*)args[9] ); + } + void writeRecHook_RecvMsg( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 8, 9 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[8], (bool*)args[9] ); + } + void writeRecHook_BeginCollOp( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 10, 11 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[10], (bool*)args[11] ); + } + void writeRecHook_EndCollOp( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 4, 5 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[4], (bool*)args[5] ); + } + void writeRecHook_RMAPut( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 9, 10 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[9], (bool*)args[10] ); + } + void writeRecHook_RMAPutRemoteEnd( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 9, 10 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[9], (bool*)args[10] ); + } void writeRecHook_RMAGet( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 9, 10 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[9], (bool*)args[10] ); + } + void writeRecHook_RMAEnd( HooksC::VaArgsT & args ) - { writeRecHook_Event( args, 1, 2, 7, 8 ); } + { + writeRecHook_Event( (uint64_t*)args[1], (uint32_t*)args[2], + (OTF_KeyValueList**)args[7], (bool*)args[8] ); + } // generic hook void genericHook( const uint32_t & id, HooksC::VaArgsT & args ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.cc b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.cc index 119823559d..f6a00280da 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.cc @@ -99,6 +99,11 @@ HooksBaseC::HooksBaseC() m_writeRecHookMethods[HooksC::Record_DefProcessGroup] = &HooksBaseC::writeRecHook_DefProcessGroup; + m_readRecHookMethods[HooksC::Record_DefProcessGroupAttributes] = + &HooksBaseC::readRecHook_DefProcessGroupAttributes; + m_writeRecHookMethods[HooksC::Record_DefProcessGroupAttributes] = + &HooksBaseC::writeRecHook_DefProcessGroupAttributes; + m_readRecHookMethods[HooksC::Record_DefProcess] = &HooksBaseC::readRecHook_DefProcess; m_writeRecHookMethods[HooksC::Record_DefProcess] = @@ -149,6 +154,11 @@ HooksBaseC::HooksBaseC() m_writeRecHookMethods[HooksC::Record_DefCounter] = &HooksBaseC::writeRecHook_DefCounter; + m_readRecHookMethods[HooksC::Record_DefCounterAssignments] = + &HooksBaseC::readRecHook_DefCounterAssignments; + m_writeRecHookMethods[HooksC::Record_DefCounterAssignments] = + &HooksBaseC::writeRecHook_DefCounterAssignments; + m_readRecHookMethods[HooksC::Record_DefKeyValue] = &HooksBaseC::readRecHook_DefKeyValue; m_writeRecHookMethods[HooksC::Record_DefKeyValue] = diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.h b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.h index ffeeaa2fb0..4cf4b345c4 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.h +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_base.h @@ -114,6 +114,9 @@ private: virtual void readRecHook_DefProcessGroup( HooksC::VaArgsT & args ) { (void)args; } virtual void writeRecHook_DefProcessGroup( HooksC::VaArgsT & args ) { (void)args; } + virtual void readRecHook_DefProcessGroupAttributes( HooksC::VaArgsT & args ) { (void)args; } + virtual void writeRecHook_DefProcessGroupAttributes( HooksC::VaArgsT & args ) { (void)args; } + virtual void readRecHook_DefProcess( HooksC::VaArgsT & args ) { (void)args; } virtual void writeRecHook_DefProcess( HooksC::VaArgsT & args ) { (void)args; } @@ -144,6 +147,9 @@ private: virtual void readRecHook_DefCounter( HooksC::VaArgsT & args ) { (void)args; } virtual void writeRecHook_DefCounter( HooksC::VaArgsT & args ) { (void)args; } + virtual void readRecHook_DefCounterAssignments( HooksC::VaArgsT & args ) { (void)args; } + virtual void writeRecHook_DefCounterAssignments( HooksC::VaArgsT & args ) { (void)args; } + virtual void readRecHook_DefKeyValue( HooksC::VaArgsT & args ) { (void)args; } virtual void writeRecHook_DefKeyValue( HooksC::VaArgsT & args ) { (void)args; } diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_margins.cc b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_margins.cc new file mode 100644 index 0000000000..c3731ddb70 --- /dev/null +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_margins.cc @@ -0,0 +1,161 @@ +/** + * VampirTrace + * http://www.tu-dresden.de/zih/vampirtrace + * + * Copyright (c) 2005-2011, ZIH, TU Dresden, Federal Republic of Germany + * + * Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing + * Centre, Federal Republic of Germany + * + * See the file COPYING in the package base directory for details + **/ + +#include "vt_unify.h" +#include "vt_unify_hooks_margins.h" + +#include + +//////////////////// class HooksProcessMarginsC //////////////////// + +// public methods +// + +HooksProcessMarginsC::HooksProcessMarginsC() : HooksBaseC(), + m_maxThreads( 1 ) +{ + // Empty +} + +HooksProcessMarginsC::~HooksProcessMarginsC() +{ + // Empty +} + +// private methods +// + +// vvvvvvvvvvvvvvvvvvvv HOOK METHODS vvvvvvvvvvvvvvvvvvvv + +// initialization/finalization hooks +// + +void +HooksProcessMarginsC::initHook() +{ + // Empty +} + +void +HooksProcessMarginsC::finalizeHook( const bool & error ) +{ + // Empty +} + +// phase hooks +// + +void +HooksProcessMarginsC::phaseHook_UnifyEvents_pre() +{ +#if defined(HAVE_OMP) && HAVE_OMP + // update maximum number of threads to use for unifying events + m_maxThreads = omp_get_max_threads(); +#endif // HAVE_OMP + + // create array of thread contexts + // + m_threadContexts = new ThreadContextS[m_maxThreads]; + assert( m_threadContexts ); +} + +void +HooksProcessMarginsC::phaseHook_UnifyEvents_post() +{ + // delete array of thread contexts + delete [] m_threadContexts; +} + +// record hooks +// + +void +HooksProcessMarginsC::writeRecHook_Event( OTF_WStream ** wstream, + uint64_t * time, uint32_t * streamid, bool * dowrite ) +{ + bool error = false; + +#if defined(HAVE_OMP) && HAVE_OMP + ThreadContextS & context = m_threadContexts[omp_get_thread_num()]; +#else // HAVE_OMP + ThreadContextS & context = m_threadContexts[0]; +#endif // HAVE_OMP + + if( *dowrite ) + { + // update last written timestamp + context.last_time = *time; + + // first event record to write? + if( context.first_event ) + { + context.first_event = false; + + // write begin process record + error = ( OTF_WStream_writeBeginProcess( *wstream, *time, + *streamid ) == 0 ); + } + } + + //return !error; + assert( !error ); +} + +// generic hook +void +HooksProcessMarginsC::genericHook( const uint32_t & id, HooksC::VaArgsT & args ) +{ + bool error = false; + + if( ( id & VT_UNIFY_HOOKS_MARGINS_GENID__EVENT_STREAM_OPEN ) != 0 ) + { +#if defined(HAVE_OMP) && HAVE_OMP + ThreadContextS & context = m_threadContexts[omp_get_thread_num()]; +#else // HAVE_OMP + ThreadContextS & context = m_threadContexts[0]; +#endif // HAVE_OMP + + // get hook arguments + // + OTF_WStream ** wstream = (OTF_WStream**)args[0]; + uint32_t * stream_id = (uint32_t*)args[1]; + + // [re-]initialize thread context + // + context.wstream = *wstream; + context.streamid = *stream_id; + context.first_event = true; + context.last_time = 0; + } + else if( ( id & VT_UNIFY_HOOKS_MARGINS_GENID__EVENT_STREAM_CLOSE ) != 0 ) + { +#if defined(HAVE_OMP) && HAVE_OMP + ThreadContextS & context = m_threadContexts[omp_get_thread_num()]; +#else // HAVE_OMP + ThreadContextS & context = m_threadContexts[0]; +#endif // HAVE_OMP + + // get stream id from hook arguments + uint32_t * stream_id = (uint32_t*)args[0]; + + assert( context.streamid == *stream_id ); + + // write process end record + error = ( OTF_WStream_writeEndProcess( context.wstream, context.last_time, + context.streamid ) == 0 ); + } + + //return !error; + assert( !error ); +} + +// ^^^^^^^^^^^^^^^^^^^^ HOOK METHODS ^^^^^^^^^^^^^^^^^^^^ diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_margins.h b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_margins.h new file mode 100644 index 0000000000..d7b5f02de7 --- /dev/null +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_margins.h @@ -0,0 +1,173 @@ +/** + * VampirTrace + * http://www.tu-dresden.de/zih/vampirtrace + * + * Copyright (c) 2005-2011, ZIH, TU Dresden, Federal Republic of Germany + * + * Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing + * Centre, Federal Republic of Germany + * + * See the file COPYING in the package base directory for details + **/ + +#ifndef _VT_UNIFY_HOOKS_MARGINS_H_ +#define _VT_UNIFY_HOOKS_MARGINS_H_ + +#include "vt_unify.h" +#include "vt_unify_hooks_base.h" + +#include "otf.h" + +// +// HooksProcessMarginsC class +// +class HooksProcessMarginsC : public HooksBaseC +{ +public: + + // constructor + HooksProcessMarginsC(); + + // destructor + ~HooksProcessMarginsC(); + + // is this hook enabled? + static bool isEnabled() { return true; } + +private: + + // + // thread context structure + // + struct ThreadContextS + { + ThreadContextS() + : wstream( 0 ), streamid( 0 ), first_event( true ), last_time( 0 ) {} + + OTF_WStream * wstream; // OTF writer stream + uint32_t streamid; // stream id + bool first_event; // flag: first event record to write? + uint64_t last_time; // last written timestamp + + }; + + // vvvvvvvvvvvvvvvvvvvv HOOK METHODS vvvvvvvvvvvvvvvvvvvv + + // initialization/finalization hooks + // + + void initHook(); + void finalizeHook( const bool & error ); + + // phase hooks + // + + void phaseHook_UnifyEvents_pre(); + void phaseHook_UnifyEvents_post(); + + // record hooks + // + + // event records + + // common stuff for write event record hooks + void writeRecHook_Event( OTF_WStream ** wstream, uint64_t * time, + uint32_t * streamid, bool * dowrite ); + + void writeRecHook_EventComment( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[5] ); + } + + void writeRecHook_Enter( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[3], (bool*)args[6] ); + } + + void writeRecHook_Leave( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[3], (bool*)args[6] ); + } + + void writeRecHook_Counter( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[7] ); + } + + void writeRecHook_BeginFileOp( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[6] ); + } + + void writeRecHook_EndFileOp( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[10] ); + } + + void writeRecHook_SendMsg( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[9] ); + } + + void writeRecHook_RecvMsg( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[9] ); + } + + void writeRecHook_BeginCollOp( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[11] ); + } + + void writeRecHook_EndCollOp( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[5] ); + } + + void writeRecHook_RMAPut( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[10] ); + } + + void writeRecHook_RMAPutRemoteEnd( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[10] ); + } + void writeRecHook_RMAGet( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[10] ); + } + + void writeRecHook_RMAEnd( HooksC::VaArgsT & args ) + { + writeRecHook_Event( (OTF_WStream**)args[0], (uint64_t*)args[1], + (uint32_t*)args[2], (bool*)args[8] ); + } + + // generic hook + void genericHook( const uint32_t & id, HooksC::VaArgsT & args ); + + // ^^^^^^^^^^^^^^^^^^^^ HOOK METHODS ^^^^^^^^^^^^^^^^^^^^ + + // maximum number of threads to use for unifying events + int m_maxThreads; + + // array of thread contexts + ThreadContextS * m_threadContexts; + +}; + +#endif // _VT_UNIFY_HOOKS_MARGINS_H_ diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.cc b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.cc index 7c3c2a3d0f..3f1afd7543 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.cc @@ -50,7 +50,7 @@ HooksMsgMatchC::~HooksMsgMatchC() // int -HooksMsgMatchC::Handle_EventComment( void * userData, +HooksMsgMatchC::HandleEventComment( void * userData, uint64_t time, uint32_t proc, const char * comment ) { #ifdef VT_ETIMESYNC @@ -67,7 +67,7 @@ HooksMsgMatchC::Handle_EventComment( void * userData, } int -HooksMsgMatchC::Handle_RecvMsg( LargeVectorC * recvMsgs, +HooksMsgMatchC::HandleRecvMsg( LargeVectorC * recvMsgs, uint64_t time, uint32_t receiver, uint32_t sender, uint32_t comm, uint32_t tag, uint32_t length, uint32_t scl ) { @@ -383,12 +383,12 @@ HooksMsgMatchC::getRecvMsgs( LargeVectorC & recvMsgs ) // ... OTF_EVENTCOMMENT_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)HooksMsgMatchC::Handle_EventComment, + (OTF_FunctionPointer*)HooksMsgMatchC::HandleEventComment, OTF_EVENTCOMMENT_RECORD ); // ... OTF_RECEIVE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)HooksMsgMatchC::Handle_RecvMsg, + (OTF_FunctionPointer*)HooksMsgMatchC::HandleRecvMsg, OTF_RECEIVE_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, recv_msgs[threadid], OTF_RECEIVE_RECORD ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.h b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.h index 7d92f156dd..24b76972f8 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.h +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_msgmatch.h @@ -103,10 +103,10 @@ private: // event record handlers // - static int Handle_EventComment( void * userData, + static int HandleEventComment( void * userData, uint64_t time, uint32_t proc, const char * comment ); - static int Handle_RecvMsg( LargeVectorC * recvMsgs, + static int HandleRecvMsg( LargeVectorC * recvMsgs, uint64_t time, uint32_t receiver, uint32_t sender, uint32_t comm, uint32_t tag, uint32_t length, uint32_t scl ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.cc b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.cc index 90cfcd6c3e..77d777bf94 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.cc @@ -91,6 +91,9 @@ void HooksRawC::writeRecHook_DefTimeRange( HooksC::VaArgsT & args ) { DOSOMETHIN void HooksRawC::readRecHook_DefProcessGroup( HooksC::VaArgsT & args ) { DOSOMETHING; } void HooksRawC::writeRecHook_DefProcessGroup( HooksC::VaArgsT & args ) { DOSOMETHING; } +void HooksRawC::readRecHook_DefProcessGroupAttributes( HooksC::VaArgsT & args ) { DOSOMETHING; } +void HooksRawC::writeRecHook_DefProcessGroupAttributes( HooksC::VaArgsT & args ) { DOSOMETHING; } + void HooksRawC::readRecHook_DefProcess( HooksC::VaArgsT & args ) { DOSOMETHING; } void HooksRawC::writeRecHook_DefProcess( HooksC::VaArgsT & args ) { DOSOMETHING; } @@ -121,6 +124,9 @@ void HooksRawC::writeRecHook_DefCounterGroup( HooksC::VaArgsT & args ) { DOSOMET void HooksRawC::readRecHook_DefCounter( HooksC::VaArgsT & args ) { DOSOMETHING; } void HooksRawC::writeRecHook_DefCounter( HooksC::VaArgsT & args ) { DOSOMETHING; } +void HooksRawC::readRecHook_DefCounterAssignments( HooksC::VaArgsT & args ) { DOSOMETHING; } +void HooksRawC::writeRecHook_DefCounterAssignments( HooksC::VaArgsT & args ) { DOSOMETHING; } + void HooksRawC::readRecHook_DefKeyValue( HooksC::VaArgsT & args ) { DOSOMETHING; } void HooksRawC::writeRecHook_DefKeyValue( HooksC::VaArgsT & args ) { DOSOMETHING; } diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.h b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.h index 47021742cc..85782eae1a 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.h +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_raw.h @@ -85,6 +85,9 @@ private: void readRecHook_DefProcessGroup( HooksC::VaArgsT & args ); void writeRecHook_DefProcessGroup( HooksC::VaArgsT & args ); + void readRecHook_DefProcessGroupAttributes( HooksC::VaArgsT & args ); + void writeRecHook_DefProcessGroupAttributes( HooksC::VaArgsT & args ); + void readRecHook_DefProcess( HooksC::VaArgsT & args ); void writeRecHook_DefProcess( HooksC::VaArgsT & args ); @@ -115,6 +118,9 @@ private: void readRecHook_DefCounter( HooksC::VaArgsT & args ); void writeRecHook_DefCounter( HooksC::VaArgsT & args ); + void readRecHook_DefCounterAssignments( HooksC::VaArgsT & args ); + void writeRecHook_DefCounterAssignments( HooksC::VaArgsT & args ); + void readRecHook_DefKeyValue( HooksC::VaArgsT & args ); void writeRecHook_DefKeyValue( HooksC::VaArgsT & args ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_tdb.cc b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_tdb.cc index c131f6d925..ccc738b333 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_tdb.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/hooks/vt_unify_hooks_tdb.cc @@ -894,7 +894,7 @@ void HooksTdbC::writeRecHook_Counter( HooksC::VaArgsT & args ) { GET_THREAD_ID( thread_id ); /* get hook arguments */ - GET_PARAM( bool*, do_write, 6 ); + GET_PARAM( bool*, do_write, 7 ); if( *do_write ) { @@ -922,7 +922,7 @@ void HooksTdbC::writeRecHook_EventComment( HooksC::VaArgsT & args ) { // generic hook void HooksTdbC::genericHook( const uint32_t & id, HooksC::VaArgsT & args ) { - if( id == VT_UNIFY_HOOKS_TDB_GENID__STARTSTOPTIME_EPOCH ) { + if( ( id & VT_UNIFY_HOOKS_TDB_GENID__STARTSTOPTIME_EPOCH ) != 0 ) { MinStartTimeEpoch = *((uint64_t*)args[0]); MaxStopTimeEpoch = *((uint64_t*)args[1]); diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc index 27bcaf5152..3f1be123d5 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc @@ -379,12 +379,13 @@ getUnifyControls() // flag which indicates whether any stream is available bool any_stream_avail = false; + char buffer[STRBUFSIZE]; + uint32_t line_no = 1; + uint32_t i; + do { - char buffer[STRBUFSIZE]; - uint32_t line_no = 1; uint32_t line_no_sec = 1; - uint32_t i; std::vector streamids; std::vector streamavs; @@ -406,10 +407,15 @@ getUnifyControls() { // if that's the first section, continue reading if( line_no == 1 ) + { continue; + } // otherwise, leave read loop to finalize previous section else + { + line_no++; break; + } } // increment line number and remove new-line @@ -424,7 +430,7 @@ getUnifyControls() switch( line_no_sec ) { - // line_no = 1: ids of input streams + // line_no_sec = 1: ids of input streams // case 1: { @@ -456,8 +462,8 @@ getUnifyControls() break; } - // line_no = 2: read chronological offsets to global time - // and local times + // line_no_sec = 2: read chronological offsets to global time + // and local times // case 2: { @@ -473,7 +479,12 @@ getUnifyControls() } case 2: { - error = !( iss >> std::hex >> offset[0] ); + // std::stringstream expects unsigned values after + // switching format to std::hex; read unsigned and + // convert to signed afterwards + uint64_t tmp; + if( !( error = !(iss >> std::hex >> tmp ) ) ) + offset[0] = tmp; break; } case 3: @@ -483,7 +494,12 @@ getUnifyControls() } case 4: { - error = !( iss >> std::hex >> offset[1] ); + // std::stringstream expects unsigned values after + // switching format to std::hex; read unsigned and + // convert to signed afterwards + uint64_t tmp; + if( !( error = !(iss >> std::hex >> tmp ) ) ) + offset[1] = tmp; break; } default: @@ -495,8 +511,8 @@ getUnifyControls() break; } #ifdef VT_ETIMESYNC - // line_no = 3: read synchronization mapping - // information + // line_no_sec = 3: read synchronization mapping information + // case 3: { static ETimeSyncC::SyncPhaseS sync_phase; @@ -537,8 +553,9 @@ getUnifyControls() } break; } - // line_no = 4-n: read synchronization timestamps of each - // synchronization phase (each per line) + // line_no_sec = 4-n: read synchronization timestamps of each + // synchronization phase (each per line) + // default: { static std::pair sync_pair; @@ -606,7 +623,7 @@ getUnifyControls() break; } #else // VT_ETIMESYNC - // line_no = 3-n: stuff for enhanced time sync. + // line_no_sec = 3-n: stuff for enhanced time sync. // default: { @@ -718,20 +735,42 @@ getUnifyControls() { UnifyControlS * uctl = UnifyCtls[i]; + // set stream id/unify control mapping StreamId2UnifyCtl[uctl->streamid] = uctl; + if( uctl->stream_avail ) { #ifdef VT_MPI - // assign stream id to rank - // - static VT_MPI_INT rank = 0; - StreamId2Rank[uctl->streamid] = rank; - if( rank == MyRank ) - MyStreamIds.push_back( uctl->streamid ); - rank = ( rank + 1 < NumRanks ) ? rank + 1 : 0; -#else // VT_MPI - MyStreamIds.push_back( uctl->streamid ); + if( NumRanks > 1 ) + { + // assign stream id to rank, whereas childs will not be + // separated from its parent stream id + // + + static VT_MPI_INT rank = 0; + + // assign stream id to rank + if( rank == MyRank ) + MyStreamIds.push_back( uctl->streamid ); + + // set stream id/rank mapping + StreamId2Rank[uctl->streamid] = rank; + + // get rank for the next stream id + // + if( i < UnifyCtls.size() - 1 && UnifyCtls[i+1]->pstreamid == 0 ) + { + if( rank + 1 < NumRanks ) + rank++; + else + rank = 0; + } + } + else #endif // VT_MPI + { + MyStreamIds.push_back( uctl->streamid ); + } } } @@ -822,6 +861,12 @@ parseCommandLine( int argc, char ** argv ) Params.droprecvs = true; } #endif // VT_UNIFY_HOOKS_MSGMATCH +#ifdef VT_UNIFY_HOOKS_THUMB + else if( strcmp( argv[i], "--nothumb" ) == 0 ) + { + Params.createthumb = false; + } +#endif // VT_UNIFY_HOOKS_THUMB #if defined(HAVE_ZLIB) && HAVE_ZLIB else if( strcmp( argv[i], "--nocompress" ) == 0 ) { @@ -896,9 +941,9 @@ writeMasterControl() OTF_MasterControl * mc = OTF_MasterControl_new( manager ); assert( mc ); - // add stream/process mapping to master control + // add stream/process[group] mappings to master control // - for( uint32_t i = 0; i < UnifyCtls.size(); i++ ) + for( uint32_t i = 0; i < UnifyCtls.size() && !error; i++ ) { // add only available streams/processes // @@ -906,13 +951,41 @@ writeMasterControl() { const uint32_t & streamid = UnifyCtls[i]->streamid; - if( OTF_MasterControl_append( mc, streamid, streamid ) == 0 ) + // get additional process group tokens of stream + const std::set * procgrps = + theDefinitions->groupCounters()->getGroupsOfStream( streamid ); + + // add mappings + // + std::set::const_iterator procgrp_it; + if( procgrps ) procgrp_it = procgrps->begin(); + uint32_t proc_or_group = streamid; + while( proc_or_group != 0 ) { - std::cerr << ExeName << ": Error: " - << "Could not append " << streamid << ":" << streamid - << " to OTF master control" << std::endl; - error = true; - break; + if( OTF_MasterControl_append( mc, streamid, proc_or_group ) == 0 ) + { + std::cerr << ExeName << ": Error: " + << "Could not append mapping " << std::hex + << streamid << ":" << proc_or_group << std::dec + << " to OTF master control" << std::endl; + error = true; + break; + } + + VPrint( 3, " Added mapping %x:%x to OTF master control\n", + streamid, proc_or_group ); + + // get next process group token to add + // + if( procgrps && procgrp_it != procgrps->end() ) + { + proc_or_group = *procgrp_it; + procgrp_it++; + } + else + { + proc_or_group = 0; + } } } } @@ -1061,12 +1134,12 @@ showUsage() << std::endl << " -p, --progress Show progress." << std::endl << std::endl - << " -q, --quiet Enable quiet mode." << std::endl - << " (only emergency output)" << std::endl - << std::endl << " -v, --verbose Increase output verbosity." << std::endl << " (can be used more than once)" << std::endl << std::endl + << " -q, --quiet Enable quiet mode." << std::endl + << " (only emergency output)" << std::endl + << std::endl #if defined(HAVE_ZLIB) && HAVE_ZLIB << " --nocompress Don't compress output trace files." << std::endl << std::endl @@ -1076,8 +1149,13 @@ showUsage() << std::endl << " --droprecvs Drop message receive events, if msg. matching" << std::endl << " is enabled." << std::endl + << std::endl #endif // VT_UNIFY_HOOKS_MSGMATCH - << std::endl; +#ifdef VT_UNIFY_HOOKS_THUMB + << " --nothumb Don't create Vampir thumbnail." << std::endl + << std::endl +#endif // VT_UNIFY_HOOKS_THUMB + ; } #ifdef VT_MPI diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h index 698cb9956a..8b24cf5b98 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h @@ -66,7 +66,7 @@ struct ParamsS : verbose_level( 0 ), docompress( false ), doclean( true ), showusage( false ), showversion( false ), showprogress( false ), bequiet( false ), domsgmatch( false ), droprecvs( false ), - prof_sort_flags( 0x22 ) + prof_sort_flags( 0x22 ), createthumb( false ) { #if defined(HAVE_ZLIB) && HAVE_ZLIB docompress = true; @@ -75,6 +75,10 @@ struct ParamsS #ifdef VT_UNIFY_HOOKS_MSGMATCH domsgmatch = true; #endif // VT_UNIFY_HOOKS_MSGMATCH + +#ifdef VT_UNIFY_HOOKS_THUMB + createthumb = true; +#endif // VT_UNIFY_HOOKS_THUMB } std::string in_file_prefix; // input trace file prefix @@ -97,6 +101,10 @@ struct ParamsS std::string prof_out_file; // profile output file int prof_sort_flags; // profile sort flags + // HooksThumbC's parameters + // + bool createthumb; // flag: create Vampir thumbnail? + }; // diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc index aa2b429084..873343108d 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc @@ -27,6 +27,18 @@ #include #include +// function for resorting global definitions based on T::SortS +template +static void resortGlobDefs( const std::set & in, + std::set & out ) +{ + for( typename std::set::const_iterator it = + in.begin(); it != in.end(); it++ ) + { + out.insert( &(*it) ); + } +} + DefinitionsC * theDefinitions = 0; // instance of class DefinitionsC //////////////////// class DefinitionsC //////////////////// @@ -36,16 +48,6 @@ DefinitionsC * theDefinitions = 0; // instance of class DefinitionsC DefinitionsC::DefinitionsC() { - // create instance of sub-class CommentsC - // - m_comments = new CommentsC( *this ); - assert( m_comments ); - - // create instance of sub-class ProcessGroupsC - // - m_procGrps = new ProcessGroupsC( *this ); - assert( m_procGrps ); - // create token factory scopes for def. record type ... // @@ -114,10 +116,28 @@ DefinitionsC::DefinitionsC() DEF_REC_TYPE__DefKeyValue, new TokenFactoryScopeC ( &(m_globDefs.keyVals) ) ); + + // create instance of sub-class GroupCountersC + // + m_groupCntrs = new GroupCountersC( *this ); + assert( m_groupCntrs ); + + // create instance of sub-class CommentsC + // + m_comments = new CommentsC( *this ); + assert( m_comments ); + + // create instance of sub-class ProcessGroupsC + // + m_procGrps = new ProcessGroupsC( *this ); + assert( m_procGrps ); } DefinitionsC::~DefinitionsC() { + // delete instance of sub-class GroupCountersC + delete m_groupCntrs; + // delete instance of sub-class CommentsC delete m_comments; @@ -181,14 +201,9 @@ DefinitionsC::run() do { - // get stream ids to read - // - std::vector streamids; - getStreamIds( streamids ); - // read local definitions // - error = !readLocal( streamids ); + error = !readLocal(); if( SyncError( &error ) ) break; @@ -255,8 +270,53 @@ DefinitionsC::cleanUp() char filename1[STRBUFSIZE]; char filename2[STRBUFSIZE]; + // remove local definition files, if necessary + // + if( Params.doclean ) + { + int streams_num = (int)MyStreamIds.size(); + int i; + +#if defined(HAVE_OMP) && HAVE_OMP +# pragma omp parallel for private(i, filename1) +#endif // HAVE_OMP + for( i = 0; i < streams_num; i++ ) + { + const uint32_t & streamid = MyStreamIds[i]; + + // try to remove file without compression suffix + OTF_getFilename( Params.in_file_prefix.c_str(), streamid, + OTF_FILETYPE_DEF, STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + PVPrint( 3, " Removed %s\n", filename1 ); + + // try to remove file with compression suffix + OTF_getFilename( Params.in_file_prefix.c_str(), streamid, + OTF_FILETYPE_DEF | OTF_FILECOMPRESSION_COMPRESSED, + STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + PVPrint( 3, " Removed %s\n", filename1 ); + } + } + MASTER { + // remove previous created definition output file + // + + // try to remove file without compression suffix + OTF_getFilename( Params.out_file_prefix.c_str(), 0, + OTF_FILETYPE_DEF, STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + VPrint( 3, " Removed %s\n", filename1 ); + + // try to remove file with compression suffix + OTF_getFilename( Params.out_file_prefix.c_str(), 0, + OTF_FILETYPE_DEF | OTF_FILECOMPRESSION_COMPRESSED, + STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + VPrint( 3, " Removed %s\n", filename1 ); + // rename temporary definition output file // @@ -293,95 +353,14 @@ DefinitionsC::cleanUp() SyncError( &error ); #endif // VT_MPI - // remove local definition files, if necessary - // - if( !error && Params.doclean ) - { - int streams_num = (int)MyStreamIds.size(); - int i; - -#if defined(HAVE_OMP) && HAVE_OMP -# pragma omp parallel for private(i, filename1) -#endif // HAVE_OMP - for( i = 0; i < streams_num; i++ ) - { - const uint32_t & streamid = MyStreamIds[i]; - - bool removed = false; - - // get file name without compression suffix - OTF_getFilename( Params.in_file_prefix.c_str(), streamid, - OTF_FILETYPE_DEF, STRBUFSIZE, filename1 ); - - // try to remove file - if( !( removed = ( remove( filename1 ) == 0 ) ) ) - { - // if failed, get file name with compression suffix - OTF_getFilename( Params.in_file_prefix.c_str(), streamid, - OTF_FILETYPE_DEF | OTF_FILECOMPRESSION_COMPRESSED, STRBUFSIZE, - filename1 ); - - // try to remove file again - removed = ( remove( filename1 ) == 0 ); - } - - if( removed ) - PVPrint( 3, " Removed %s\n", filename1 ); - } - } - return !error; } // private methods // -void -DefinitionsC::getStreamIds( std::vector & streamIds ) -{ -#ifdef VT_MPI - if( NumRanks > 1 ) - { - // distribute stream ids to ranks, whereas childs will not be - // separated from its parent stream id - // - - VT_MPI_INT rank = 0; - for( uint32_t i = 0; i < UnifyCtls.size(); i++ ) - { - // ignore stream, if it isn't available - if( !UnifyCtls[i]->stream_avail ) - continue; - - // add stream id to vector, if it's for my rank - if( rank == MyRank ) - streamIds.push_back( UnifyCtls[i]->streamid ); - - // get rank for the next stream id - // - if( i < UnifyCtls.size() - 1 && UnifyCtls[i+1]->pstreamid == 0 ) - { - if( rank + 1 < NumRanks ) - { - rank++; - } - else - { - rank = 0; - } - } - } - } - else -#endif // VT_MPI - { - // add all available stream ids into vector, if serial - streamIds.assign( MyStreamIds.begin(), MyStreamIds.end() ); - } -} - bool -DefinitionsC::readLocal( const std::vector & streamIds ) +DefinitionsC::readLocal() { bool error = false; @@ -404,29 +383,23 @@ DefinitionsC::readLocal( const std::vector & streamIds ) // vector of local definitions LargeVectorC loc_defs; - for( uint32_t i = 0; i < streamIds.size(); i++ ) + for( uint32_t i = 0; i < MyStreamIds.size(); i++ ) { uint32_t defs_read = loc_defs.size(); // N defs. read in this iteration - bool presort = false; // flag: pre-sort subset of local definitions? // put local definitions of streams which belonging together into // one vector // - for( ; i < streamIds.size(); i++ ) + for( ; i < MyStreamIds.size(); i++ ) { // read local definitions of stream - if( (error = !readLocal( streamIds[i], loc_defs )) ) + if( (error = !readLocal( MyStreamIds[i], loc_defs )) ) break; - if( i < streamIds.size() - 1 ) - { - // abort loop, if next stream isn't a child - if( StreamId2UnifyCtl[streamIds[i+1]]->pstreamid == 0 ) - break; - // otherwise, set flag to pre-sort subset of local definitions - else - presort = true; - } + // abort loop, if next stream isn't a child + if( i < MyStreamIds.size() - 1 && + StreamId2UnifyCtl[MyStreamIds[i+1]]->pstreamid == 0 ) + break; } if( error ) break; @@ -435,23 +408,21 @@ DefinitionsC::readLocal( const std::vector & streamIds ) defs_read = loc_defs.size() - defs_read; // continue, if nothing is read - if( ( i >= streamIds.size() - 1 && loc_defs.empty() ) || - ( i < streamIds.size() - 1 && defs_read == 0 ) ) + if( ( i >= MyStreamIds.size() - 1 && loc_defs.empty() ) || + ( i < MyStreamIds.size() - 1 && defs_read == 0 ) ) continue; - // pre-sort subset of local definitions, if necessary + // pre-sort subset of local definitions // - if( presort ) - { - // get begin iterator of subset - // - std::vector::iterator sort_begin_it = loc_defs.begin(); - if( loc_defs.size() != defs_read ) - sort_begin_it += ( loc_defs.size() - defs_read - 1 ); - // pre-sort - std::sort( sort_begin_it, loc_defs.end(), DefRec_LocCmp ); - } + // get begin iterator of subset + // + LargeVectorC::iterator sort_begin_it = loc_defs.begin(); + if( loc_defs.size() != defs_read ) + sort_begin_it += ( loc_defs.size() - defs_read - 1 ); + + // pre-sort + std::sort( sort_begin_it, loc_defs.end(), DefRec_LocCmp ); MASTER { @@ -515,7 +486,7 @@ DefinitionsC::readLocal( const std::vector & streamIds ) } // continue reading, if minimum buffer size isn't reached - if( i < streamIds.size() && buffer_size < min_msg_size ) + if( i < MyStreamIds.size() && buffer_size < min_msg_size ) continue; // allocate memory for the send buffer @@ -680,6 +651,11 @@ DefinitionsC::readLocal( const std::vector & streamIds ) new_loc_def = new DefRec_DefProcessGroupS(); break; } + case DEF_REC_TYPE__DefProcessGroupAttributes: + { + new_loc_def = new DefRec_DefProcessGroupAttributesS(); + break; + } case DEF_REC_TYPE__DefSclFile: { new_loc_def = new DefRec_DefSclFileS(); @@ -725,6 +701,11 @@ DefinitionsC::readLocal( const std::vector & streamIds ) new_loc_def = new DefRec_DefCounterS(); break; } + case DEF_REC_TYPE__DefCounterAssignments: + { + new_loc_def = new DefRec_DefCounterAssignmentsS(); + break; + } case DEF_REC_TYPE__DefKeyValue: { new_loc_def = new DefRec_DefKeyValueS(); @@ -857,140 +838,142 @@ DefinitionsC::readLocal( const uint32_t & streamId, // close definitions buffer OTF_RStream_closeDefBuffer( rstream ); - // create record handler and set the local definition - // vector as first handler argument for ... + // create record handler array // - OTF_HandlerArray * handler_array = OTF_HandlerArray_open(); assert( handler_array ); + // create first handler argument + FirstHandlerArg_DefsS fha( locDefs ); + + // set record handler and its first argument for ... + // + // ... OTF_DEFINITIONCOMMENT_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefComment, + (OTF_FunctionPointer*)HandleDefComment, OTF_DEFINITIONCOMMENT_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFINITIONCOMMENT_RECORD ); + &fha, OTF_DEFINITIONCOMMENT_RECORD ); // ... OTF_DEFCREATOR_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefCreator, + (OTF_FunctionPointer*)HandleDefCreator, OTF_DEFCREATOR_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFCREATOR_RECORD ); + &fha, OTF_DEFCREATOR_RECORD ); // ... OTF_DEFTIMERRESOLUTION_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefTimerResolution, + (OTF_FunctionPointer*)HandleDefTimerResolution, OTF_DEFTIMERRESOLUTION_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFTIMERRESOLUTION_RECORD ); + &fha, OTF_DEFTIMERRESOLUTION_RECORD ); // ... OTF_DEFTIMERANGE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefTimeRange, + (OTF_FunctionPointer*)HandleDefTimeRange, OTF_DEFTIMERANGE_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFTIMERANGE_RECORD ); + &fha, OTF_DEFTIMERANGE_RECORD ); // ... OTF_DEFPROCESSGROUP_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefProcessGroup, + (OTF_FunctionPointer*)HandleDefProcessGroup, OTF_DEFPROCESSGROUP_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFPROCESSGROUP_RECORD ); + &fha, OTF_DEFPROCESSGROUP_RECORD ); + + // ... OTF_DEFPROCESSORGROUPATTR_RECORD + OTF_HandlerArray_setHandler( handler_array, + (OTF_FunctionPointer*)HandleDefProcessGroupAttributes, + OTF_DEFPROCESSORGROUPATTR_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handler_array, + &fha, OTF_DEFPROCESSORGROUPATTR_RECORD ); // ... OTF_DEFPROCESS_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefProcess, + (OTF_FunctionPointer*)HandleDefProcess, OTF_DEFPROCESS_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFPROCESS_RECORD ); + &fha, OTF_DEFPROCESS_RECORD ); // ... OTF_DEFSCLFILE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefSclFile, + (OTF_FunctionPointer*)HandleDefSclFile, OTF_DEFSCLFILE_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFSCLFILE_RECORD ); + &fha, OTF_DEFSCLFILE_RECORD ); // ... OTF_DEFSCL_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefScl, + (OTF_FunctionPointer*)HandleDefScl, OTF_DEFSCL_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFSCL_RECORD ); + &fha, OTF_DEFSCL_RECORD ); // ... OTF_DEFFILEGROUP_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefFileGroup, + (OTF_FunctionPointer*)HandleDefFileGroup, OTF_DEFFILEGROUP_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFFILEGROUP_RECORD ); + &fha, OTF_DEFFILEGROUP_RECORD ); // ... OTF_DEFFILE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefFile, + (OTF_FunctionPointer*)HandleDefFile, OTF_DEFFILE_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFFILE_RECORD ); + &fha, OTF_DEFFILE_RECORD ); // ... OTF_DEFFUNCTIONGROUP_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefFunctionGroup, + (OTF_FunctionPointer*)HandleDefFunctionGroup, OTF_DEFFUNCTIONGROUP_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFFUNCTIONGROUP_RECORD ); + &fha, OTF_DEFFUNCTIONGROUP_RECORD ); // ... OTF_DEFFUNCTION_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefFunction, + (OTF_FunctionPointer*)HandleDefFunction, OTF_DEFFUNCTION_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFFUNCTION_RECORD ); + &fha, OTF_DEFFUNCTION_RECORD ); // ... OTF_DEFCOLLOP_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefCollOp, + (OTF_FunctionPointer*)HandleDefCollOp, OTF_DEFCOLLOP_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFCOLLOP_RECORD ); + &fha, OTF_DEFCOLLOP_RECORD ); // ... OTF_DEFCOUNTERGROUP_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefCounterGroup, + (OTF_FunctionPointer*)HandleDefCounterGroup, OTF_DEFCOUNTERGROUP_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFCOUNTERGROUP_RECORD ); + &fha, OTF_DEFCOUNTERGROUP_RECORD ); // ... OTF_DEFCOUNTER_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefCounter, + (OTF_FunctionPointer*)HandleDefCounter, OTF_DEFCOUNTER_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFCOUNTER_RECORD ); + &fha, OTF_DEFCOUNTER_RECORD ); + + // ... OTF_DEFCOUNTERASSIGNMENTS_RECORD + OTF_HandlerArray_setHandler( handler_array, + (OTF_FunctionPointer*)HandleDefCounterAssignments, + OTF_DEFCOUNTERASSIGNMENTS_RECORD ); + OTF_HandlerArray_setFirstHandlerArg( handler_array, + &fha, OTF_DEFCOUNTERASSIGNMENTS_RECORD ); // ... OTF_DEFKEYVALUE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefKeyValue, + (OTF_FunctionPointer*)HandleDefKeyValue, OTF_DEFKEYVALUE_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFKEYVALUE_RECORD ); + &fha, OTF_DEFKEYVALUE_RECORD ); // read local definitions // @@ -1112,6 +1095,45 @@ DefinitionsC::processLocal( const LargeVectorC & locDefs ) break; } + case DEF_REC_TYPE__DefProcessGroupAttributes: + { + // get local definition entry + DefRec_DefProcessGroupAttributesS * loc_def_entry = + static_cast( locDefs[i] ); + + // get global token factory for DefProcessGroup + static TokenFactoryScopeI * tkfac_defprocgrp = + theTokenFactory->getScope( DEF_REC_TYPE__DefProcessGroup ); + + // get global token for DefProcessGroup + // + uint32_t global_procgrp = + tkfac_defprocgrp->translate( loc_def_entry->loccpuid, + loc_def_entry->deftoken ); + assert( global_procgrp != 0 ); + + // search for global attributes by process group token + std::map::iterator + procgrp_attrs_it = + m_globDefs.procGrpAttrs.find( global_procgrp ); + + // add global process group attributes, if not found + // + if( procgrp_attrs_it == m_globDefs.procGrpAttrs.end() ) + { + procgrp_attrs_it = + m_globDefs.procGrpAttrs.insert( + std::make_pair( global_procgrp, + DefRec_DefProcessGroupAttributesS() ) ).first; + + procgrp_attrs_it->second.deftoken = global_procgrp; + } + + // add attributes to global process group attributes + procgrp_attrs_it->second.attributes |= loc_def_entry->attributes; + + break; + } case DEF_REC_TYPE__DefSclFile: { // get local definition entry @@ -1310,6 +1332,59 @@ DefinitionsC::processLocal( const LargeVectorC & locDefs ) break; } + case DEF_REC_TYPE__DefCounterAssignments: + { + // get local definition entry + DefRec_DefCounterAssignmentsS * loc_def_entry = + static_cast( locDefs[i] ); + + // get global token factory for DefCounter + static TokenFactoryScopeI * tkfac_defcntr = + theTokenFactory->getScope( DEF_REC_TYPE__DefCounter ); + + // get global token factory for DefProcessGroup + static TokenFactoryScopeI * tkfac_defprocgrp = + theTokenFactory->getScope( DEF_REC_TYPE__DefProcessGroup ); + + // get global token for DefCounter + // + uint32_t global_cntr = + tkfac_defcntr->translate( + loc_def_entry->loccpuid, loc_def_entry->deftoken ); + assert( global_cntr != 0 ); + + // search for global counter assignments by counter token + std::map::iterator + cntr_assigns_it = + m_globDefs.cntrAssigns.find( global_cntr ); + + // add global counter assignments, if not found + // + if( cntr_assigns_it == m_globDefs.cntrAssigns.end() ) + { + cntr_assigns_it = + m_globDefs.cntrAssigns.insert( std::make_pair( global_cntr, + DefRec_DefCounterAssignmentsS() ) ).first; + + cntr_assigns_it->second.deftoken = global_cntr; + } + + // get global token for DefProcessGroup + // + uint32_t global_procgrp = + tkfac_defprocgrp->translate( loc_def_entry->loccpuid, + *(loc_def_entry->groups.begin()) ); + assert( global_procgrp != 0 ); + + // add process group token to global counter assignments + cntr_assigns_it->second.groups.insert( global_procgrp ); + + // add process group token to stream + m_groupCntrs->addGroupToStream( loc_def_entry->loccpuid, + global_procgrp ); + + break; + } case DEF_REC_TYPE__DefKeyValue: { // get local definition entry @@ -1335,18 +1410,6 @@ DefinitionsC::processLocal( const LargeVectorC & locDefs ) return !error; } -// function for resorting global definitions based on T::SortS -template -static void resort_glob_defs( const std::set & in, - std::set & out ) -{ - for( typename std::set::const_iterator it = - in.begin(); it != in.end(); it++ ) - { - out.insert( &(*it) ); - } -} - bool DefinitionsC::writeGlobal() { @@ -1393,18 +1456,21 @@ DefinitionsC::writeGlobal() { bool do_write = true; - // get copy of definition record in order that hook(s) can - // modify it - DefRec_DefCreatorS record = m_globDefs.creator; + // get reference to definition record for more convenient access + DefRec_DefCreatorS & record = m_globDefs.creator; // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_DefCreator, 3, &wstream, &(record.creator), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefCreator( wstream, - record.creator.c_str() ) == 0 ); + { + error = + ( OTF_WStream_writeDefCreator( wstream, + record.creator.c_str() ) == 0 ); + } break; } @@ -1412,9 +1478,8 @@ DefinitionsC::writeGlobal() { bool do_write = true; - // get copy of definition record in order that hook(s) can - // modify it - DefRec_DefTimerResolutionS record = m_globDefs.timeres; + // get reference to definition record for more convenient access + DefRec_DefTimerResolutionS & record = m_globDefs.timeres; // trigger write record hook theHooks->triggerWriteRecordHook( @@ -1422,9 +1487,13 @@ DefinitionsC::writeGlobal() &(record.ticksPerSecond), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefTimerResolution( wstream, - record.ticksPerSecond ) == 0 ); + { + error = + ( OTF_WStream_writeDefTimerResolution( wstream, + record.ticksPerSecond ) == 0 ); + } break; } @@ -1432,9 +1501,8 @@ DefinitionsC::writeGlobal() { bool do_write = true; - // get copy of definition record in order that hook(s) can - // modify it - DefRec_DefTimeRangeS record = m_globDefs.timerange; + // get reference to definition record for more convenient access + DefRec_DefTimeRangeS & record = m_globDefs.timerange; // trigger write record hook theHooks->triggerWriteRecordHook( @@ -1442,9 +1510,13 @@ DefinitionsC::writeGlobal() &(record.minTime), &(record.maxTime), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefTimeRange( wstream, - record.minTime, record.maxTime, 0 ) == 0 ); + { + error = + ( OTF_WStream_writeDefTimeRange( wstream, record.minTime, + record.maxTime, 0 ) == 0 ); + } break; } @@ -1454,12 +1526,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_comments_t; + std::set resorted_comments_t; resorted_comments_t resorted_comments; - resort_glob_defs( + resortGlobDefs( m_globDefs.comments, resorted_comments ); // iterate over all definition comments @@ -1479,9 +1551,13 @@ DefinitionsC::writeGlobal() &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefinitionComment( - wstream, record.comment.c_str() ) == 0 ); + { + error = + ( OTF_WStream_writeDefinitionComment( wstream, + record.comment.c_str() ) == 0 ); + } } break; @@ -1492,12 +1568,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_procs_t; + std::set resorted_procs_t; resorted_procs_t resorted_procs; - resort_glob_defs( + resortGlobDefs( m_globDefs.procs, resorted_procs ); // iterate over all process definitions @@ -1516,10 +1592,13 @@ DefinitionsC::writeGlobal() &(record.parent), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefProcess( wstream, - record.deftoken, record.name.c_str(), - record.parent ) == 0 ); + { + error = + ( OTF_WStream_writeDefProcess( wstream, record.deftoken, + record.name.c_str(), record.parent ) == 0 ); + } } break; @@ -1530,12 +1609,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_proc_grps_t; + std::set resorted_proc_grps_t; resorted_proc_grps_t resorted_proc_grps; - resort_glob_defs( + resortGlobDefs( m_globDefs.procGrps, resorted_proc_grps ); // iterate over all process group definitions @@ -1550,31 +1629,136 @@ DefinitionsC::writeGlobal() DefRec_DefProcessGroupS record = **it; // inflate group members - m_procGrps->inflateMembers( record.members ); + m_procGrps->inflateMembers( record ); // trigger write record hook theHooks->triggerWriteRecordHook( - HooksC::Record_DefProcessGroup, 5, &wstream, - &(record.deftoken), &(record.name), &(record.members), - &do_write ); + HooksC::Record_DefProcessGroup, 6, &wstream, + &(record.deftoken), &(record.name), &(record.nmembers), + &(record.members), &do_write ); + // write record + // if( do_write ) { - // convert std::vector to C-array - // - uint32_t n = record.members.size(); - uint32_t * array = new uint32_t[n]; - assert( array ); - for( uint32_t i = 0; i < n; i++ ) - array[i] = record.members[i]; - - // write record error = ( OTF_WStream_writeDefProcessGroup( wstream, - record.deftoken, record.name.c_str(), n, - array ) == 0 ); + record.deftoken, record.name.c_str(), + record.nmembers, record.members ) == 0 ); + } + } - delete[] array; + break; + } + case DEF_REC_TYPE__DefProcessGroupAttributes: + { + if( !m_globDefs.procGrpAttrs.empty() ) + { + typedef DefRec_DefProcessGroupAttributesS + attrs_list_t; + + // storage of global attributes list definitions + std::set global_attrs_lists; + + // create global attributes list definitions + // + { + // create global token factory scope for attributes list + // definitions + // + TokenFactoryScopeC * tkfac_attrs_list = + new TokenFactoryScopeC + ( &global_attrs_lists ); + assert( tkfac_attrs_list ); + + // iterate over all process group attr. definitions + for( std::map::iterator + procgrp_attrs_it = m_globDefs.procGrpAttrs.begin(); + procgrp_attrs_it != m_globDefs.procGrpAttrs.end(); + procgrp_attrs_it++ ) + { + bool do_write = true; + + DefRec_DefProcessGroupAttributesS & procgrp_attrs = + procgrp_attrs_it->second; + + // trigger write record hook + theHooks->triggerWriteRecordHook( + HooksC::Record_DefProcessGroupAttributes, 4, + &wstream, &(procgrp_attrs.deftoken), + &(procgrp_attrs.attributes), &do_write ); + + if( do_write ) + { + // create global attributes list definition + // + + attrs_list_t attrs_list = + attrs_list_t( 0, 0, procgrp_attrs.attributes ); + + procgrp_attrs.attributes = + tkfac_attrs_list->create( &attrs_list ); + } + else + { + procgrp_attrs.attributes = 0; + } + } + + // delete global token factory scope for attributes list + // definitions + delete tkfac_attrs_list; + } + + // write global attributes list definitions + // + { + // iterate over all attributes list definitions + for( std::set::const_iterator attrs_list_it = + global_attrs_lists.begin(); attrs_list_it != + global_attrs_lists.end() && !error; attrs_list_it++ ) + { + // convert bitmask to array + // + uint32_t n = 0; + OTF_ATTR_TYPE array[32]; + for( uint32_t i = 0; i < 32; i++ ) + { + if( attrs_list_it->attributes & (1<( i ); + } + + // write record + error = + ( OTF_WStream_writeDefAttributeList( wstream, + attrs_list_it->deftoken, n, array ) == 0 ); + } + } + + // write global process group attr. definitions + // + { + // iterate over all process group attr. definitions + for( std::map::const_iterator + procgrp_attrs_it = m_globDefs.procGrpAttrs.begin(); + procgrp_attrs_it != m_globDefs.procGrpAttrs.end() && + !error; procgrp_attrs_it++ ) + { + const DefRec_DefProcessGroupAttributesS & procgrp_attrs + = procgrp_attrs_it->second; + + // write record, if global attributes token is present + // + if( procgrp_attrs.attributes != 0 ) + { + error = + ( OTF_WStream_writeDefProcessOrGroupAttributes( + wstream, procgrp_attrs.deftoken, + procgrp_attrs.attributes ) == 0 ); + } + } } } @@ -1586,12 +1770,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_scl_files_t; + std::set resorted_scl_files_t; resorted_scl_files_t resorted_scl_files; - resort_glob_defs( + resortGlobDefs( m_globDefs.sclFiles, resorted_scl_files ); // iterate over all scl file definitions @@ -1611,10 +1795,13 @@ DefinitionsC::writeGlobal() &do_write ); // write record + // if( do_write ) + { error = - ( OTF_WStream_writeDefSclFile( wstream, - record.deftoken, record.filename.c_str() ) == 0 ); + ( OTF_WStream_writeDefSclFile( wstream, record.deftoken, + record.filename.c_str() ) == 0 ); + } } break; @@ -1630,7 +1817,7 @@ DefinitionsC::writeGlobal() resorted_scls_t resorted_scls; - resort_glob_defs( + resortGlobDefs( m_globDefs.scls, resorted_scls ); // iterate over all scl definitions @@ -1650,10 +1837,13 @@ DefinitionsC::writeGlobal() &(record.sclline), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefScl( wstream, - record.deftoken, record.sclfile, - record.sclline ) == 0 ); + { + error = + ( OTF_WStream_writeDefScl( wstream, record.deftoken, + record.sclfile, record.sclline ) == 0 ); + } } break; @@ -1664,12 +1854,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_file_grps_t; + std::set resorted_file_grps_t; resorted_file_grps_t resorted_file_grps; - resort_glob_defs( + resortGlobDefs( m_globDefs.fileGrps, resorted_file_grps ); // iterate over all file group definitions @@ -1689,9 +1879,12 @@ DefinitionsC::writeGlobal() &do_write ); // write record + // if( do_write ) + { error = ( OTF_WStream_writeDefFileGroup( wstream, record.deftoken, record.name.c_str() ) == 0 ); + } } break; @@ -1707,7 +1900,7 @@ DefinitionsC::writeGlobal() resorted_files_t resorted_files; - resort_glob_defs( + resortGlobDefs( m_globDefs.files, resorted_files ); // iterate over all file definitions @@ -1726,10 +1919,13 @@ DefinitionsC::writeGlobal() &(record.group), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefFile( wstream, - record.deftoken, record.name.c_str(), - record.group ) == 0 ); + { + error = + ( OTF_WStream_writeDefFile( wstream, record.deftoken, + record.name.c_str(), record.group ) == 0 ); + } } break; @@ -1740,12 +1936,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_func_grps_t; + std::set resorted_func_grps_t; resorted_func_grps_t resorted_func_grps; - resort_glob_defs( + resortGlobDefs( m_globDefs.funcGrps, resorted_func_grps ); // iterate over all function group definitions @@ -1765,9 +1961,13 @@ DefinitionsC::writeGlobal() &(record.deftoken), &(record.name), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefFunctionGroup( wstream, - record.deftoken, record.name.c_str() ) == 0 ); + { + error = + ( OTF_WStream_writeDefFunctionGroup( wstream, + record.deftoken, record.name.c_str() ) == 0 ); + } } break; @@ -1778,12 +1978,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_funcs_t; + std::set resorted_funcs_t; resorted_funcs_t resorted_funcs; - resort_glob_defs( + resortGlobDefs( m_globDefs.funcs, resorted_funcs ); // iterate over all function definitions @@ -1803,10 +2003,14 @@ DefinitionsC::writeGlobal() &(record.group), &(record.scltoken), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefFunction( wstream, - record.deftoken, record.name.c_str(), - record.group, record.scltoken ) == 0 ); + { + error = + ( OTF_WStream_writeDefFunction( wstream, + record.deftoken, record.name.c_str(), + record.group, record.scltoken ) == 0 ); + } } break; @@ -1818,11 +2022,11 @@ DefinitionsC::writeGlobal() typedef std::set - resorted_collops_t; + resorted_collops_t; resorted_collops_t resorted_collops; - resort_glob_defs( + resortGlobDefs( m_globDefs.collops, resorted_collops ); // iterate over all collop. definitions @@ -1842,10 +2046,14 @@ DefinitionsC::writeGlobal() &(record.type), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefCollectiveOperation( wstream, - record.deftoken, record.name.c_str(), - record.type ) == 0 ); + { + error = + ( OTF_WStream_writeDefCollectiveOperation( wstream, + record.deftoken, record.name.c_str(), + record.type ) == 0 ); + } } break; @@ -1856,12 +2064,12 @@ DefinitionsC::writeGlobal() // typedef - std::set - resorted_cntr_grps_t; + std::set resorted_cntr_grps_t; resorted_cntr_grps_t resorted_cntr_grps; - resort_glob_defs( + resortGlobDefs( m_globDefs.cntrGrps, resorted_cntr_grps ); // iterate over all counter group definitions @@ -1881,9 +2089,13 @@ DefinitionsC::writeGlobal() &(record.deftoken), &(record.name), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefCounterGroup( wstream, - record.deftoken, record.name.c_str() ) == 0 ); + { + error = + ( OTF_WStream_writeDefCounterGroup( wstream, + record.deftoken, record.name.c_str() ) == 0 ); + } } break; @@ -1899,7 +2111,7 @@ DefinitionsC::writeGlobal() resorted_cntrs_t resorted_cntrs; - resort_glob_defs( + resortGlobDefs( m_globDefs.cntrs, resorted_cntrs ); // iterate over all counter definitions @@ -1920,27 +2132,77 @@ DefinitionsC::writeGlobal() &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefCounter( wstream, - record.deftoken, record.name.c_str(), - record.properties, record.group, - record.unit.c_str() ) == 0 ); + { + error = + ( OTF_WStream_writeDefCounter( wstream, + record.deftoken, record.name.c_str(), + record.properties, record.group, + record.unit.c_str() ) == 0 ); + } } break; } + case DEF_REC_TYPE__DefCounterAssignments: + { + // iterate over all counter assignment definitions + for( std::map::iterator + cntr_assigns_it = m_globDefs.cntrAssigns.begin(); + cntr_assigns_it != m_globDefs.cntrAssigns.end(); + cntr_assigns_it++ ) + { + bool do_write = true; + + DefRec_DefCounterAssignmentsS & record = + cntr_assigns_it->second; + + // trigger write record hook + theHooks->triggerWriteRecordHook( + HooksC::Record_DefCounterAssignments, 4, + &wstream, &(record.deftoken), &(record.groups), &do_write ); + + // write record + // + if( do_write ) + { + // convert std::set to C-array + // + uint32_t n = record.groups.size(); + uint32_t * array = new uint32_t[n]; + assert( array ); + uint32_t i = 0; + for( std::set::const_iterator it = + record.groups.begin(); it != record.groups.end(); + it++, i++ ) + { + array[i] = *it; + } + + // write record + error = + ( OTF_WStream_writeDefCounterAssignments( wstream, + record.deftoken, n, array, 0 ) == 0 ); + + delete [] array; + } + } + + break; + } case DEF_REC_TYPE__DefKeyValue: { // resort key-value definitions // typedef - std::set - resorted_keyvals_t; + std::set resorted_keyvals_t; resorted_keyvals_t resorted_keyvals; - resort_glob_defs( + resortGlobDefs( m_globDefs.keyVals, resorted_keyvals ); // iterate over all key-value definitions @@ -1960,11 +2222,15 @@ DefinitionsC::writeGlobal() &(record.name), &do_write ); // write record + // if( do_write ) - error = ( OTF_WStream_writeDefKeyValue( wstream, - record.deftoken, record.type, - record.name.c_str(), - "" /* description */ ) == 0 ); + { + error = + ( OTF_WStream_writeDefKeyValue( wstream, + record.deftoken, record.type, + record.name.c_str(), + "" /* description */ ) == 0 ); + } } break; @@ -2000,20 +2266,9 @@ DefinitionsC::writeGlobal() //////////////////// sub-class DefinitionsC::CommentsC //////////////////// -// private methods +// public methods // -DefinitionsC::CommentsC::CommentsC( DefinitionsC & _defs ) - : m_defs( _defs ), m_seqOrderIdx( 0 ) -{ - // Empty -} - -DefinitionsC::CommentsC::~CommentsC() -{ - // Empty -} - bool DefinitionsC::CommentsC::processLocal( const DefRec_DefCommentS & locComment ) { @@ -2033,8 +2288,8 @@ DefinitionsC::CommentsC::processLocal( const DefRec_DefCommentS & locComment ) assert( iss ); // update minimum start time, if necessary - if( starttime < m_traceTimes.minStartTimeEpoch ) - m_traceTimes.minStartTimeEpoch = starttime; + if( starttime < m_minStartTimeEpoch ) + m_minStartTimeEpoch = starttime; break; } @@ -2047,8 +2302,8 @@ DefinitionsC::CommentsC::processLocal( const DefRec_DefCommentS & locComment ) assert( iss ); // update maximum stop time, if necessary - if( stoptime > m_traceTimes.maxStopTimeEpoch ) - m_traceTimes.maxStopTimeEpoch = stoptime; + if( stoptime > m_maxStopTimeEpoch ) + m_maxStopTimeEpoch = stoptime; break; } @@ -2097,16 +2352,33 @@ DefinitionsC::CommentsC::processLocal( const DefRec_DefCommentS & locComment ) } } - // temporary store user communication id and peer - m_userCom.comIdsAndPeers.push_back( - UserComS::ComIdPeerS( UserComC::ComIdS( comm, tag ), peer, - ( locComment.type == DefRec_DefCommentS::TYPE_USRCOM_SEND ) ) ); + // get global token factory for DefProcessGroup + TokenFactoryScopeI * tkfac_defprocgrp = + theTokenFactory->getScope( DEF_REC_TYPE__DefProcessGroup ); - // add process id to certain user communicator - m_defs.m_procGrps->m_userCom.addCommMember( - locComment.loccpuid & VT_TRACEID_BITMASK, comm, + // translate local comm. token + // + uint32_t global_comm = + tkfac_defprocgrp->translate( locComment.loccpuid, comm ); + assert( global_comm != 0 ); + + // add process id to members of user communicator + m_defs.m_procGrps->m_userCom.addCommMember( global_comm, locComment.loccpuid ); + // register communication id and its peer + // + if( locComment.type == DefRec_DefCommentS::TYPE_USRCOM_SEND ) + { + theUserCom->addSender( UserComC::ComIdS( global_comm, tag ), + peer ); + } + else // locComment.type == DefRec_DefCommentS::TYPE_USRCOM_RECV + { + theUserCom->addReceiver( UserComC::ComIdS( global_comm, tag ), + peer ); + } + break; } case DefRec_DefCommentS::TYPE_VT: @@ -2166,13 +2438,13 @@ DefinitionsC::CommentsC::finish( void ) // add time comments to global definitions, if present // - if( m_traceTimes.minStartTimeEpoch != (uint64_t)-1 && - m_traceTimes.maxStopTimeEpoch != 0 ) + if( m_minStartTimeEpoch != (uint64_t)-1 && m_maxStopTimeEpoch != 0 ) { #ifdef VT_UNIFY_HOOKS_TDB // trigger HooksTdbC's generic hook to set trace times - theHooks->triggerGenericHook( VT_UNIFY_HOOKS_TDB_GENID__STARTSTOPTIME_EPOCH, - 2, &m_traceTimes.minStartTimeEpoch, &m_traceTimes.maxStopTimeEpoch ); + theHooks->triggerGenericHook( + VT_UNIFY_HOOKS_TDB_GENID__STARTSTOPTIME_EPOCH, + 2, &m_minStartTimeEpoch, &m_maxStopTimeEpoch ); #endif // VT_UNIFY_HOOKS_TDB // get reference to global definition comments @@ -2208,15 +2480,15 @@ DefinitionsC::CommentsC::finish( void ) if( i == 1 ) { - tt = (time_t)(m_traceTimes.minStartTimeEpoch / 1e6); + tt = (time_t)(m_minStartTimeEpoch / 1e6); ss << " Start: " << asctime(localtime(&tt)) << "(" - << m_traceTimes.minStartTimeEpoch << ")"; + << m_minStartTimeEpoch << ")"; } else // i == 2 { - tt = (time_t)(m_traceTimes.maxStopTimeEpoch / 1e6); + tt = (time_t)(m_maxStopTimeEpoch / 1e6); ss << " Stop: " << asctime(localtime(&tt)) << "(" - << m_traceTimes.maxStopTimeEpoch << ")"; + << m_maxStopTimeEpoch << ")"; } new_comment.comment = ss.str(); @@ -2231,8 +2503,7 @@ DefinitionsC::CommentsC::finish( void ) std::ostringstream ss; tt = - (time_t)((m_traceTimes.maxStopTimeEpoch - - m_traceTimes.minStartTimeEpoch) / 1e6); + (time_t)((m_maxStopTimeEpoch - m_minStartTimeEpoch) / 1e6); gmtime_r(&tt, &elapsed_tm); ss << " Elapsed: " << (elapsed_tm.tm_hour < 10 ? "0" : "") @@ -2241,8 +2512,7 @@ DefinitionsC::CommentsC::finish( void ) << elapsed_tm.tm_min << ":" << (elapsed_tm.tm_sec < 10 ? "0" : "") << elapsed_tm.tm_sec - << " (" << m_traceTimes.maxStopTimeEpoch - - m_traceTimes.minStartTimeEpoch << ")"; + << " (" << m_maxStopTimeEpoch - m_minStartTimeEpoch << ")"; new_comment.comment = ss.str(); ss.str(""); ss.clear(); @@ -2256,57 +2526,14 @@ DefinitionsC::CommentsC::finish( void ) } } - // register user communication ids and peers - // - if( !m_userCom.comIdsAndPeers.empty() ) - { - // get global token factory for DefProcessGroup - TokenFactoryScopeI * tkfac_defprocgrp = - theTokenFactory->getScope( DEF_REC_TYPE__DefProcessGroup ); - - while( !m_userCom.comIdsAndPeers.empty() ) - { - // get first list element - std::list::iterator it = - m_userCom.comIdsAndPeers.begin(); - - // translate local comm. token - // - it->comid.comm = - tkfac_defprocgrp->translate( it->peer, it->comid.comm ); - assert( it->comid.comm != 0 ); - - // register communication id and its peer - // - if( it->is_sender ) - theUserCom->addSender( it->comid, it->peer ); - else - theUserCom->addReceiver( it->comid, it->peer ); - - // erase first list element - m_userCom.comIdsAndPeers.pop_front(); - } - } - return !error; } //////////////////// sub-class DefinitionsC::ProcessGroupsC //////////////////// -// private methods +// public methods // -DefinitionsC::ProcessGroupsC::ProcessGroupsC( DefinitionsC & _defs ) - : m_defs( _defs ) -{ - // Empty -} - -DefinitionsC::ProcessGroupsC::~ProcessGroupsC() -{ - // Empty -} - bool DefinitionsC::ProcessGroupsC::processLocal( DefRec_DefProcessGroupS & locProcGrp ) @@ -2321,138 +2548,102 @@ DefinitionsC::ProcessGroupsC::processLocal( // switch( locProcGrp.type ) { - case DefRec_DefProcessGroupS::TYPE_NODE: + case DefRec_DefProcessGroupS::TYPE_MPI_COMM_WORLD: { - // add process id to node group members - m_node.name2Procs[locProcGrp.name].insert( locProcGrp.members[0] ); + // create global process group token, if not already done + if( m_mpi.worldComm.global_token == 0 ) + m_mpi.worldComm.global_token = tkfac_defprocgrp->getNextToken(); - break; - } - case DefRec_DefProcessGroupS::TYPE_GPU_GROUP: - { - // add process ids to GPU group members - m_gpu.procs.insert( locProcGrp.members.begin(), - locProcGrp.members.end() ); - - break; - } - case DefRec_DefProcessGroupS::TYPE_GPU_COMM: - { - // add process ids to comm. group members - m_gpu.commMembers.insert( locProcGrp.members.begin(), - locProcGrp.members.end() ); - - // add local token for translation - m_gpu.proc2LocCommTk[locProcGrp.loccpuid] = locProcGrp.deftoken; - - break; - } - case DefRec_DefProcessGroupS::TYPE_OMP_TEAM: - { - static uint32_t omp_thread_team_no = 0; - - // get reference to global process group definitions - std::set & glob_proc_grps = - m_defs.m_globDefs.procGrps; - - // deflate comm. group members - deflateMembers( locProcGrp.members ); - - std::set::const_iterator it = - glob_proc_grps.end(); - - // not the first OMP thread team comm.? - if( omp_thread_team_no > 0 ) - { - // search global definition by comm. group members - // (content m_globDefs is sorted by type; abort searching - // after last TYPE_OMP_TEAM) - // - for( it = glob_proc_grps.begin(); it != glob_proc_grps.end(); it++ ) - { - static bool abort_search = false; - if( it->type == DefRec_DefProcessGroupS::TYPE_OMP_TEAM ) - { - abort_search = true; - - if( it->members == locProcGrp.members ) - break; - } - else if( abort_search ) - { - break; - } - } - } - - // create global definition, if not found + // create global process group definition, only if members are present + // (only the first available (not disabled) process contains a + // filled process group for MPI_COMM_WORLD) // - if( it == glob_proc_grps.end() ) + if( locProcGrp.nmembers > 0 ) { - // compose comm. group name - // - std::ostringstream new_name; - new_name << m_omp.commName() << " " << omp_thread_team_no++; - locProcGrp.name = new_name.str(); + // deflate process group members + deflateMembers( locProcGrp ); - // create global definition - tkfac_defprocgrp->create( &locProcGrp ); + // set process group name + locProcGrp.name = MpiS::WorldCommS::NAME(); + + // create global process group definition with previous created + // global token + tkfac_defprocgrp->create( &locProcGrp, + m_mpi.worldComm.global_token ); } // otherwise, set token translation for process // else { tkfac_defprocgrp->setTranslation( locProcGrp.loccpuid, - locProcGrp.deftoken, it->deftoken ); + locProcGrp.deftoken, m_mpi.worldComm.global_token ); } break; } - case DefRec_DefProcessGroupS::TYPE_MPI_COMM_WORLD: + case DefRec_DefProcessGroupS::TYPE_MPI_COMM_SELF: { - // global token of process group definition - static uint32_t global_token = 0; + // process group member = id of defining process + const uint32_t member = locProcGrp.loccpuid & VT_TRACEID_BITMASK; - // temporary storage of local tokens to translate - static std::vector< - std::pair > local_tokens; + // compose and set process group name + // + std::ostringstream name; + name << MpiS::SelfCommsS::NAME() << " " << member - 1; + locProcGrp.name = name.str(); - // create global token, if necessary + // set process group member + locProcGrp.assignMembers( 1, &member, &member + 1 ); + + // create global process group definition + tkfac_defprocgrp->create( &locProcGrp ); + + break; + } + case DefRec_DefProcessGroupS::TYPE_MPI_COMM_OTHER: + case DefRec_DefProcessGroupS::TYPE_MPI_GROUP: + { + const bool is_mpi_group = + ( locProcGrp.type == DefRec_DefProcessGroupS::TYPE_MPI_GROUP ); + + // process group must have members + assert( locProcGrp.nmembers > 0 ); + + // deflate process group members and get its unique id + // + deflateMembers( locProcGrp ); + const uint32_t membersid = locProcGrp.members[1]; + + // get count by process and members (0 if it's a group) + const uint32_t count = + is_mpi_group ? 0 : ++(m_mpi.commsAndGroups.counts + [ std::make_pair( locProcGrp.loccpuid, membersid ) ]); + + // get global token by members and count + uint32_t & global_token = m_mpi.commsAndGroups.global_tokens + [ std::make_pair( membersid, count ) ]; + + // create global process group definition, if not already done // if( global_token == 0 ) { - // create global process group definition, if members are present - // (only the first available (not disabled) process contains a - // filled process group for MPI_COMM_WORLD) + // compose and set process group name // - if( !locProcGrp.members.empty() ) - { - // deflate comm. group members - deflateMembers( locProcGrp.members ); - - // set group name - locProcGrp.name = m_mpi.worldCommName(); - - // create global definition and get its token - global_token = tkfac_defprocgrp->create( &locProcGrp ); - - // set translations for temporary stored local tokens - // - for( uint32_t i = 0; i < local_tokens.size(); i++ ) - { - tkfac_defprocgrp->setTranslation( local_tokens[i].first, - local_tokens[i].second, global_token ); - } - local_tokens.clear(); - } - // otherwise, temporary store local token - // - else - { - local_tokens.push_back( - std::make_pair( locProcGrp.loccpuid, locProcGrp.deftoken ) ); + std::ostringstream name; + if( is_mpi_group ) + { + name << MpiS::CommsAndGroupsS::GROUP_NAME() << " " + << m_mpi.commsAndGroups.group_seqno++; } + else + { + name << MpiS::CommsAndGroupsS::COMM_NAME() << " " + << m_mpi.commsAndGroups.comm_seqno++; + } + locProcGrp.name = name.str(); + + // create global process group definition and store its token + global_token = tkfac_defprocgrp->create( &locProcGrp ); } // otherwise, set token translation for process // @@ -2464,75 +2655,83 @@ DefinitionsC::ProcessGroupsC::processLocal( break; } - case DefRec_DefProcessGroupS::TYPE_MPI_COMM_SELF: - { - // add communicator - m_mpi.selfComms.insert( - MpiS::SelfCommS( locProcGrp.deftoken, locProcGrp.loccpuid ) ); - - break; - } - case DefRec_DefProcessGroupS::TYPE_MPI_COMM_OTHER: - { - // comm. group must have members - assert( !locProcGrp.members.empty() ); - - // deflate comm. group members - // - deflateMembers( locProcGrp.members ); - uint32_t membersid = locProcGrp.members[1]; - - // search for communicator by its defining process and members - // (search reversed in order to get an increasing index) - std::list::reverse_iterator it = - std::find( m_mpi.proc2OtherComms[locProcGrp.loccpuid].rbegin(), - m_mpi.proc2OtherComms[locProcGrp.loccpuid].rend(), - MpiS::OtherCommS( membersid ) ); - - // get new local communicator index - // - uint32_t index = 0; - if( it != m_mpi.proc2OtherComms[locProcGrp.loccpuid].rend() ) - index = it->index + 1; - - // add communicator - m_mpi.proc2OtherComms[locProcGrp.loccpuid].push_back( - MpiS::OtherCommS( locProcGrp.deftoken, membersid, index ) ); - - break; - } case DefRec_DefProcessGroupS::TYPE_USER_COMM: { // search for communicator by its name - std::list::iterator it = - std::find( m_userCom.comms.begin(), m_userCom.comms.end(), - UserComS::CommS( locProcGrp.name ) ); + std::map::iterator it = + m_userCom.name2Comm.find( locProcGrp.name ); // add communicator, if not found // - if( it == m_userCom.comms.end() ) + if( it == m_userCom.name2Comm.end() ) { - m_userCom.comms.push_back( UserComS::CommS( locProcGrp.name ) ); - it = m_userCom.comms.end(); it--; + // create global communicator token + uint32_t global_token = tkfac_defprocgrp->getNextToken(); + + it = + m_userCom.name2Comm.insert( std::make_pair( locProcGrp.name, + new UserComS::CommS() ) ).first; + assert( it->second ); + + it->second->global_token = global_token; + + m_userCom.globTk2Comm[global_token] = it->second; + + // register global communicator token + theUserCom->addUserComm( global_token ); } - // add local token for translation - it->proc2LocCommTk[locProcGrp.loccpuid] = locProcGrp.deftoken; - - // add iterator of user comm. for fast access - m_userCom.proc2LocCommTk2CommIt - [locProcGrp.loccpuid & VT_TRACEID_BITMASK][locProcGrp.deftoken] = - it; + // set token translation for process + tkfac_defprocgrp->setTranslation( locProcGrp.loccpuid, + locProcGrp.deftoken, it->second->global_token ); break; } + case DefRec_DefProcessGroupS::TYPE_ALL: + case DefRec_DefProcessGroupS::TYPE_NODE: case DefRec_DefProcessGroupS::TYPE_OTHER: { - // deflate group members - deflateMembers( locProcGrp.members ); + if( locProcGrp.type == DefRec_DefProcessGroupS::TYPE_ALL ) + { + // set process group name + locProcGrp.name = OtherS::ALL_NAME(); + } + else if( locProcGrp.type == DefRec_DefProcessGroupS::TYPE_NODE ) + { + // add member process ids to the "All" process group + m_other.name2Group[OtherS::ALL_NAME()].members.insert( + locProcGrp.members, locProcGrp.members + locProcGrp.nmembers ); + } - // create global definition - tkfac_defprocgrp->create( &locProcGrp ); + // get reference to process group by name + OtherS::GroupS & group = m_other.name2Group[locProcGrp.name]; + + // create global process group token, if not already done + if( group.global_token == 0 ) + group.global_token = tkfac_defprocgrp->getNextToken(); + + // set token translation for process + tkfac_defprocgrp->setTranslation( locProcGrp.loccpuid, + locProcGrp.deftoken, group.global_token ); + + // add member process ids + // (the members of the "All" process group will be collected from the + // node process groups) + // + if( locProcGrp.type != DefRec_DefProcessGroupS::TYPE_ALL ) + { + // add id of defining process, if no members are present + // + if( locProcGrp.nmembers == 0 ) + { + group.members.insert( locProcGrp.loccpuid ); + } + else + { + group.members.insert( locProcGrp.members, + locProcGrp.members + locProcGrp.nmembers ); + } + } break; } @@ -2554,290 +2753,145 @@ DefinitionsC::ProcessGroupsC::finish( void ) TokenFactoryScopeI * tkfac_defprocgrp = theTokenFactory->getScope( DEF_REC_TYPE__DefProcessGroup ); - // get reference to global process group definitions - std::set & glob_proc_grps = - m_defs.m_globDefs.procGrps; - - // add node process groups to global definitions - // - if( !m_node.name2Procs.empty() ) - { - // initialize common stuff of new process groups - // - DefRec_DefProcessGroupS new_proc_grp; - new_proc_grp.loccpuid = 0; - new_proc_grp.type = DefRec_DefProcessGroupS::TYPE_NODE; - - // iterate over all node groups - for( std::map >::iterator - it = m_node.name2Procs.begin(); it != m_node.name2Procs.end(); it++ ) - { - // set global token, name, and members - // - new_proc_grp.deftoken = tkfac_defprocgrp->getNextToken(); - new_proc_grp.name = it->first; - new_proc_grp.members.assign( it->second.begin(), it->second.end() ); - it->second.clear(); - - // add node process group to global definitions - glob_proc_grps.insert( new_proc_grp ); - } - - // not needed anymore; free some memory - m_node.name2Procs.clear(); - } - - // add GPU process group to global definitions - // - if( !m_gpu.procs.empty() ) - { - // initialize new process group - // - DefRec_DefProcessGroupS new_proc_grp; - new_proc_grp.loccpuid = 0; - new_proc_grp.deftoken = tkfac_defprocgrp->getNextToken(); - new_proc_grp.type = DefRec_DefProcessGroupS::TYPE_GPU_GROUP; - new_proc_grp.name = m_gpu.groupName(); - new_proc_grp.members.assign( m_gpu.procs.begin(), m_gpu.procs.end() ); - m_gpu.procs.clear(); - - // add GPU process group to global definitions - glob_proc_grps.insert( new_proc_grp ); - } - - // add GPU communicator group to global definitions - // - if( !m_gpu.commMembers.empty() && !m_gpu.proc2LocCommTk.empty() ) - { - // initialize new process group - // - DefRec_DefProcessGroupS new_proc_grp; - new_proc_grp.type = DefRec_DefProcessGroupS::TYPE_GPU_COMM; - new_proc_grp.name = m_gpu.commName(); - new_proc_grp.members.assign( m_gpu.commMembers.begin(), - m_gpu.commMembers.end() ); - m_gpu.commMembers.clear(); - - // iterate over all local tokens - for( std::map::const_iterator it = - m_gpu.proc2LocCommTk.begin(); it != m_gpu.proc2LocCommTk.end(); - it++ ) - { - static uint32_t global_token = 0; - - // create global definition, if necessary - // - if( global_token == 0 ) - { - new_proc_grp.loccpuid = it->first; - new_proc_grp.deftoken = it->second; - global_token = tkfac_defprocgrp->create( &new_proc_grp ); - } - // otherwise, set token translation for process - // - else - { - tkfac_defprocgrp->setTranslation( it->first, it->second, - global_token ); - } - } - - // not needed anymore; free some memory - m_gpu.proc2LocCommTk.clear(); - } - - // add MPI_COMM_SELF groups to global definitions - // - if( !m_mpi.selfComms.empty() ) - { - // initialize new process group - // - DefRec_DefProcessGroupS new_proc_grp; - new_proc_grp.type = DefRec_DefProcessGroupS::TYPE_MPI_COMM_SELF; - new_proc_grp.members.resize( 1 ); - - uint32_t i = 0; - for( std::set::const_iterator it = - m_mpi.selfComms.begin(); it != m_mpi.selfComms.end(); it++, i++ ) - { - // set defining process, members, and local token - // - new_proc_grp.loccpuid = new_proc_grp.members[0] = it->member; - new_proc_grp.deftoken = it->loctk; - - // compose and set name - // - std::ostringstream name; - name << m_mpi.selfCommName() << " " << i; - new_proc_grp.name = name.str(); - - // create global definition - tkfac_defprocgrp->create( &new_proc_grp ); - } - - // not needed anymore; free some memory - m_mpi.selfComms.clear(); - } - - // add user created MPI communicator groups to global definitions - // - if( !m_mpi.proc2OtherComms.empty() ) - { - // map comm. members/index <-> global token - std::map, uint32_t> global_tokens; - - // global comm. index (=name suffix) - uint32_t global_index = 0; - - for( std::map >::const_iterator - proc_it = m_mpi.proc2OtherComms.begin(); - proc_it != m_mpi.proc2OtherComms.end(); proc_it++ ) - { - while( !m_mpi.proc2OtherComms[proc_it->first].empty() ) - { - // get first list element - std::list::const_iterator comm_it = - m_mpi.proc2OtherComms[proc_it->first].begin(); - - // get global token of comm. members/index - uint32_t & global_token = - global_tokens[std::make_pair(comm_it->membersid, - comm_it->index)]; - - // create global definition, if necessary - // - if( global_token == 0 ) - { - // initialize new process group - // - DefRec_DefProcessGroupS new_proc_grp; - new_proc_grp.type = DefRec_DefProcessGroupS::TYPE_MPI_COMM_OTHER; - new_proc_grp.loccpuid = proc_it->first; - new_proc_grp.deftoken = comm_it->deftoken; - - // set members - // - new_proc_grp.members.resize( 2 ); - new_proc_grp.members[0] = DEFLATED_MEMBERS_TAG; - new_proc_grp.members[1] = comm_it->membersid; - - // compose and set name - // - std::ostringstream name; - name << m_mpi.otherCommName() << " " << global_index++; - new_proc_grp.name = name.str(); - - // create global definition - global_token = tkfac_defprocgrp->create( &new_proc_grp ); - } - // otherwise, set token translation for process - // - else - { - tkfac_defprocgrp->setTranslation( proc_it->first, - comm_it->deftoken, global_token ); - } - - // erase first list element - m_mpi.proc2OtherComms[proc_it->first].pop_front(); - } - } - } - // add user communicator groups to global definitions // - while( !m_userCom.comms.empty() ) + if( !m_userCom.name2Comm.empty() ) { - // get first list element - std::list::const_iterator comm_it = - m_userCom.comms.begin(); - - // initialize new process group + // initialize new process group definition // DefRec_DefProcessGroupS new_proc_grp; new_proc_grp.type = DefRec_DefProcessGroupS::TYPE_USER_COMM; - new_proc_grp.name = comm_it->name; - new_proc_grp.members.assign( comm_it->members.begin(), - comm_it->members.end() ); - // create global definition - uint32_t global_token = tkfac_defprocgrp->create( &new_proc_grp ); - - // set token translations for processes - // - for( std::map::const_iterator it = - comm_it->proc2LocCommTk.begin(); - it != comm_it->proc2LocCommTk.end(); it++ ) + for( std::map::iterator comm_it = + m_userCom.name2Comm.begin(); comm_it != m_userCom.name2Comm.end(); + comm_it++ ) { - tkfac_defprocgrp->setTranslation( it->first, it->second, - global_token ); + assert( comm_it->second->global_token != 0 ); + + new_proc_grp.name = comm_it->first; + new_proc_grp.assignMembers( comm_it->second->members.size(), + comm_it->second->members.begin(), comm_it->second->members.end() ); + + // create global definition with previous created global token + tkfac_defprocgrp->create( &new_proc_grp, + comm_it->second->global_token ); + + delete comm_it->second; } - // register global user communicator token - theUserCom->addUserComm( global_token ); + m_userCom.name2Comm.clear(); + m_userCom.globTk2Comm.clear(); + } - // erase first list element - m_userCom.comms.pop_front(); + // add other process groups to global definitions + // + if( !m_other.name2Group.empty() ) + { + // initialize new process group definition + // + DefRec_DefProcessGroupS new_proc_grp; + new_proc_grp.type = DefRec_DefProcessGroupS::TYPE_OTHER; + + for( std::map::const_iterator group_it = + m_other.name2Group.begin(); + group_it != m_other.name2Group.end(); group_it++ ) + { + assert( group_it->second.global_token != 0 ); + + new_proc_grp.name = group_it->first; + new_proc_grp.assignMembers( group_it->second.members.size(), + group_it->second.members.begin(), group_it->second.members.end() ); + + // create global definition with previous created global token + tkfac_defprocgrp->create( &new_proc_grp, + group_it->second.global_token ); + } + + m_other.name2Group.clear(); } return !error; } void -DefinitionsC::ProcessGroupsC::deflateMembers( std::vector & members ) +DefinitionsC::ProcessGroupsC::deflateMembers( + DefRec_DefProcessGroupS & procGrp ) { - // return, if vector is empty or already deflated - if( members.empty() || members[0] == DEFLATED_MEMBERS_TAG ) + // return, if input group member array is empty or already deflated + if( procGrp.nmembers == 0 || procGrp.members[0] == DEFLATED_MEMBERS_TAG ) return; - // search for already known members + // search for already deflated group member array and get its unique id // - std::map >::const_iterator it; - for( it = m_id2Members.begin(); it != m_id2Members.end(); it++ ) + + std::pair::const_iterator, + std::multimap::const_iterator> range = + m_hash2UniqueMembers.equal_range( procGrp.members_hash ); + + uint32_t id = (uint32_t)-1; + + for( std::multimap::const_iterator it = + range.first; it != range.second; it++ ) { - if( it->second == members ) + if( it->second->nmembers != procGrp.nmembers ) + continue; + + if( memcmp( it->second->members, procGrp.members, + procGrp.nmembers * sizeof( uint32_t ) ) == 0 ) + { + id = it->second->id; break; + } } - uint32_t id; - - // get its unique id, if found + // if not found, create new unique id and assign input group member + // array to it // - if( it != m_id2Members.end() ) + if( id == (uint32_t)-1 ) { - id = it->first; - } - // otherwise, create new unique id and assign members to it - // - else - { - id = m_id2Members.size(); - m_id2Members[id] = members; + id = m_uniqueMembers.size(); + + UniqueMembersS * new_unique_members = + new UniqueMembersS( id, procGrp.nmembers, procGrp.members ); + assert( new_unique_members ); + + m_uniqueMembers.push_back( new_unique_members ); + m_hash2UniqueMembers.insert( + std::make_pair( procGrp.members_hash, new_unique_members ) ); } - // do actual deflating + // deflate input group member array // - members.resize( 2 ); - members[0] = DEFLATED_MEMBERS_TAG; // deflated-identifier - members[1] = id; // unique id + delete [] procGrp.members; + procGrp.nmembers = 2; + procGrp.members = new uint32_t[2]; + assert( procGrp.members ); + procGrp.members[0] = DEFLATED_MEMBERS_TAG; + procGrp.members[1] = id; } void -DefinitionsC::ProcessGroupsC::inflateMembers( std::vector & members ) +DefinitionsC::ProcessGroupsC::inflateMembers( + DefRec_DefProcessGroupS & procGrp ) { - // return, if vector is empty or not deflated - if( members.empty() || members[0] != DEFLATED_MEMBERS_TAG ) + // return, if input group member array is empty or not deflated + if( procGrp.nmembers == 0 || procGrp.members[0] != DEFLATED_MEMBERS_TAG ) return; - assert( members.size() == 2 ); + assert( procGrp.nmembers == 2 ); - // search for members by unique id + // get unique id of deflated input group member array // - std::map >::const_iterator it = - m_id2Members.find( members[1] ); - assert( it != m_id2Members.end() ); + uint32_t id = procGrp.members[1]; + assert( id < m_uniqueMembers.size() ); - // set vector - members = it->second; + // inflate input group member array + // + delete [] procGrp.members; + procGrp.nmembers = m_uniqueMembers[id]->nmembers; + procGrp.members = new uint32_t[procGrp.nmembers]; + assert( procGrp.members ); + memcpy( procGrp.members, m_uniqueMembers[id]->members, + procGrp.nmembers * sizeof( uint32_t ) ); } diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h index 32a24736a0..31da714373 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h @@ -32,6 +32,9 @@ class DefinitionsC { public: + // forward declaration of sub-class GroupCountersC + class GroupCountersC; + // constructor DefinitionsC(); @@ -44,301 +47,19 @@ public: // rename temporary output files bool cleanUp(); + // get instance of sub-class GroupCountersC + GroupCountersC * groupCounters() const { return m_groupCntrs; } + private: - // - // CommentsC sub-class - // (pre-processes comments before adding these to global definitions) - // - class CommentsC - { - friend class DefinitionsC; + // forward declaration of sub-class CommentsC + class CommentsC; - //private: + // forward declaration of sub-class ProcessGroupsC + class ProcessGroupsC; - // constructor - CommentsC( DefinitionsC & _defs ); - - // destructor - ~CommentsC(); - - // process local definition comment - bool processLocal( const DefRec_DefCommentS & locComment ); - - // finish global definition comments - // (i.e. add trace time comments to global definitions) - bool finish(); - - // - // trace time comments - // - struct TraceTimeS - { - TraceTimeS() - : minStartTimeEpoch( (uint64_t)-1 ), maxStopTimeEpoch( 0 ) {} - - uint64_t minStartTimeEpoch; // minimum start time since epoch - uint64_t maxStopTimeEpoch; // maximum stop time since epoch - - } m_traceTimes; - - // - // user communication ids and peers - // - struct UserComS - { - // - // structure for user communication ids and peers - // - struct ComIdPeerS - { - ComIdPeerS( const UserComC::ComIdS & _comid, const uint32_t _peer, - const bool & _is_sender ) - : comid( _comid ), peer( _peer ), is_sender( _is_sender ) {} - - UserComC::ComIdS comid; // user communication id (comm./tag) - uint32_t peer; // peer process id - bool is_sender; // peer type indicator - - }; - - // list of user communication ids and peers - std::list comIdsAndPeers; - - } m_userCom; - - // reference to parent class instance - DefinitionsC & m_defs; - - // sequential order index - uint32_t m_seqOrderIdx; - - }; - - // - // ProcessGroupsC sub-class - // (pre-processes process groups before adding these to global definitions) - // - class ProcessGroupsC - { - friend class DefinitionsC; - friend class DefinitionsC::CommentsC; - - //private: - - // identifier for deflated vector of group members - // (will be putted at the first vector element) - static const uint32_t DEFLATED_MEMBERS_TAG = (uint32_t)-1; - - // constructor - ProcessGroupsC( DefinitionsC & _defs ); - - // destructor - ~ProcessGroupsC(); - - // process local process group definition - bool processLocal( DefRec_DefProcessGroupS & locProcGrp ); - - // finish global process group definitions - // (i.e. add process groups for nodes and MPI-comms. to global defs.) - bool finish(); - - // deflate vector of group members - // (replaces vector elements by an unique id) - inline void deflateMembers( std::vector & members ); - - // inflate vector of group members - // (replaces unique id by the actual vector elements) - inline void inflateMembers( std::vector & members ); - - // - // compare structure for sorting process ids - // - struct ProcCmpS - { - bool operator()( const uint32_t & a, const uint32_t & b ) const - { - if( ( a & VT_TRACEID_BITMASK ) == ( b & VT_TRACEID_BITMASK ) ) - return a < b; - else - return ( a & VT_TRACEID_BITMASK ) < ( b & VT_TRACEID_BITMASK ); - } - - }; - - // - // Node process groups - // - struct NodeS - { - // map node name <-> process ids - std::map > name2Procs; - - } m_node; - - // - // GPU process groups and communicators - // - struct GpuS - { - // names of final process groups - // - static const char * groupName() { return "GPU_GROUP"; } - static const char * commName() { return "GPU_COMM_GLOBAL"; } - - // set of GPU process ids - std::set procs; - - // set of related process and GPU ids - std::set commMembers; - - // map process id <-> local GPU comm. token - // (storage of local tokens to translate afterwards) - std::map proc2LocCommTk; - - } m_gpu; - - // - // OpenMP thread team communicators - // - struct OmpS - { - // name (prefix) of final process groups - static const char * commName() { return "OMP Thread Team"; } - - } m_omp; - - // - // MPI communicators - // - struct MpiS - { - // names of final process groups - // - static const char * worldCommName() { return "MPI_COMM_WORLD"; } - static const char * selfCommName() { return "MPI_COMM_SELF"; } - static const char * otherCommName() { return "MPI Communicator"; } - - // - // structure for MPI_COMM_SELFs - // - struct SelfCommS - { - SelfCommS( const uint32_t & _loctk, const uint32_t & _member ) - : loctk( _loctk ), member( _member ) {} - - bool operator<( const SelfCommS & a ) const - { - return member < a.member; - } - - uint32_t loctk; // local comm. token - uint32_t member; // comm. member process id - - }; - - // - // structure for user created (other) MPI communicators - // - struct OtherCommS - { - // constructor for searching a communicator by its members - OtherCommS( const uint32_t & _membersid ) - : deftoken ( 0 ), membersid( _membersid ), index( 0 ) {} - - // constructor for creating a new communicator entry - OtherCommS( const uint32_t & _deftoken, const uint32_t & _membersid, - const uint32_t & _index ) - : deftoken( _deftoken ), membersid( _membersid ), - index( _index ) {} - - // operator for searching - bool operator==( const OtherCommS & a ) const - { - return membersid == a.membersid; - } - - uint32_t deftoken; // local token on def. process - uint32_t membersid; // id of deflated members vector - uint32_t index; // local index - - }; - - // set of MPI_COMM_SELFs - std::set selfComms; - - // map process <-> list of user created MPI comms. - std::map > proc2OtherComms; - - } m_mpi; - - // - // communicators for user communication - // - struct UserComS - { - // - // structure for user communicators - // - struct CommS - { - // constructor for searching a communicator by its name - CommS( const std::string & _name ) - : name( _name ) {} - - // operator for searching - bool operator==( const CommS & a ) const - { - return name.compare( a.name ) == 0; - } - - std::string name; // comm. name - std::set members; // comm. member process ids - - // map process id <-> local comm. token - // (storage of local tokens to translate afterwards) - std::map proc2LocCommTk; - - }; - - // add member process id to certain user communicator - void addCommMember( const uint32_t & proc, const uint32_t & comm, - const uint32_t & member ) - { - std::map::iterator> >::iterator - proc_it = proc2LocCommTk2CommIt.find( proc ); - assert( proc_it != proc2LocCommTk2CommIt.end() ); - - std::map::iterator>::iterator comm_it = - proc_it->second.find( comm ); - assert( comm_it != proc_it->second.end() ); - - comm_it->second->members.insert( member ); - } - - // list of user communicators - std::list comms; - - // map process <-> local comm. token <-> iterator in comm. list - std::map::iterator> > - proc2LocCommTk2CommIt; - - } m_userCom; - - // reference to parent class instance - DefinitionsC & m_defs; - - // map id <-> vector of group members - std::map > m_id2Members; - - }; - - // get stream ids to read - void getStreamIds( std::vector & streamIds ); - - // read local definitions of certain streams - bool readLocal( const std::vector & streamIds ); + // read local definitions + bool readLocal(); // read local definitions of certain single stream bool readLocal( const uint32_t & streamId, @@ -351,38 +72,390 @@ private: // write global definitions bool writeGlobal(); - // instance of class CommentsC + // instance of sub-class GroupCountersC + GroupCountersC * m_groupCntrs; + + // instance of sub-class CommentsC CommentsC * m_comments; - // instance of class ProcessGroupsC + // instance of sub-class ProcessGroupsC ProcessGroupsC * m_procGrps; // - // container of global definitions + // storage of global definitions // struct { - DefRec_DefCreatorS creator; - DefRec_DefTimerResolutionS timeres; - DefRec_DefTimeRangeS timerange; - std::set comments; - std::set procs; - std::set procGrps; - std::set sclFiles; - std::set scls; - std::set fileGrps; - std::set files; - std::set funcGrps; - std::set funcs; - std::set collops; - std::set cntrGrps; - std::set cntrs; - std::set keyVals; + // global definitions created by TokenFactoryScopeC + // (concerns all definitions which define an identifier token + // that has to be unified) + // + + std::set procGrps; + std::set sclFiles; + std::set scls; + std::set fileGrps; + std::set files; + std::set funcGrps; + std::set funcs; + std::set collops; + std::set cntrGrps; + std::set cntrs; + std::set keyVals; + + // miscellaneous global definitions + // + + DefRec_DefCreatorS creator; + DefRec_DefTimerResolutionS timeres; + DefRec_DefTimeRangeS timerange; + std::set comments; + std::set procs; + // map global counter token <-> process group assignments + std::map + cntrAssigns; + // map global process group token <-> attributes + std::map + procGrpAttrs; } m_globDefs; }; +// +// DefinitionsC::GroupCountersC sub-class +// (manages process group assignments of group counters) +// +class DefinitionsC::GroupCountersC +{ +public: + + // constructor + GroupCountersC( DefinitionsC & _defs ) : m_defs( _defs ) {} + + // destructor + ~GroupCountersC() {} + + // set local process group token for local process/counter token + // (only one process group assignment per counter allowed) + void setGroup( const uint32_t & proc, const uint32_t & counter, + const uint32_t & procGrp ) + { + m_cntr2ProcGrp[std::make_pair( proc, counter )] = procGrp; + } + + // get local process group token of certain local process/counter token + uint32_t getGroup( const uint32_t & proc, const uint32_t & counter ) const + { + // search for process group assignment + std::map, uint32_t>::const_iterator it = + m_cntr2ProcGrp.find( std::make_pair( proc, counter ) ); + + // if found, return local process group token; otherwise, return 0 + // + if( it != m_cntr2ProcGrp.end() ) + return it->second; + else + return 0; + } + + // The following methods are significant for the final stream/process[group] + // mapping in the OTF master control file. + // + + // add global process group token to stream + void addGroupToStream( const uint32_t & streamid, const uint32_t & procGrp ) + { + if( m_streamId2ProcGrps[streamid].insert( procGrp ).second ) + { + // catch multiple added process groups + // + bool added_once = m_procGrps.insert( procGrp ).second; + assert( added_once ); + } + } + + // get global process group tokens of certain stream + const std::set * getGroupsOfStream( + const uint32_t & streamid ) const + { + // search for stream id + std::map >::const_iterator it = + m_streamId2ProcGrps.find( streamid ); + + // if found, return pointer to set of global process group tokens; + // otherwise, return 0 + // + if( it != m_streamId2ProcGrps.end() ) + return &(it->second); + else + return 0; + } + +private: + + // reference to parent class instance + DefinitionsC & m_defs; + + // set of global process group tokens which have counters + std::set m_procGrps; + + // map stream id <-> global process group tokens + std::map > m_streamId2ProcGrps; + + // map local process/counter token <-> local process group token + std::map, uint32_t> m_cntr2ProcGrp; + +}; + +// +// DefinitionsC::CommentsC sub-class +// (pre-processes comments before adding these to global definitions) +// +class DefinitionsC::CommentsC +{ +public: + + // constructor + CommentsC( DefinitionsC & _defs ) + : m_defs( _defs ), m_minStartTimeEpoch( (uint64_t)-1 ), + m_maxStopTimeEpoch( 0 ), m_seqOrderIdx( 0 ) {} + + // destructor + ~CommentsC() {} + + // process local definition comment + bool processLocal( const DefRec_DefCommentS & locComment ); + + // finish global definition comments + // (i.e. add trace time comments to global definitions) + bool finish(); + +private: + + // reference to parent class instance + DefinitionsC & m_defs; + + // trace times + // + uint64_t m_minStartTimeEpoch; + uint64_t m_maxStopTimeEpoch; + + // sequential order index + uint32_t m_seqOrderIdx; + +}; + +// +// DefinitionsC::ProcessGroupsC sub-class +// (pre-processes process groups before adding these to global definitions) +// +class DefinitionsC::ProcessGroupsC +{ + // friend declaration for sub-class DefinitionsC::CommentsC; + // needs access to m_userCom to add member process ids to certain user + // communicators + friend class DefinitionsC::CommentsC; + +public: + + // constructor + ProcessGroupsC( DefinitionsC & _defs ) : m_defs( _defs ) {} + + // destructor + ~ProcessGroupsC() + { + for( uint32_t i = 0; i < m_uniqueMembers.size(); i++ ) + delete m_uniqueMembers[i]; + } + + // process local process group definition + bool processLocal( DefRec_DefProcessGroupS & locProcGrp ); + + // finish global process group definitions + // (i.e. add process groups for nodes and MPI-comms. to global defs.) + bool finish(); + + // deflate group member array of certain process group definition + // (replaces array elements by an unique id) + inline void deflateMembers( DefRec_DefProcessGroupS & procGrp ); + + // inflate group members array of certain process group definition + // (replaces unique id by the actual array elements) + inline void inflateMembers( DefRec_DefProcessGroupS & procGrp ); + +private: + + // identifier for deflated group member arrays + // (will be putted at the first array element) + static const uint32_t DEFLATED_MEMBERS_TAG = (uint32_t)-1; + + // + // compare structure for sorting process ids + // + struct ProcCmpS + { + bool operator()( const uint32_t & a, const uint32_t & b ) const + { + if( ( a & VT_TRACEID_BITMASK ) == ( b & VT_TRACEID_BITMASK ) ) + return a < b; + else + return ( a & VT_TRACEID_BITMASK ) < ( b & VT_TRACEID_BITMASK ); + } + + }; + + // + // structure for storing unique (un-deflated) group member arrays + // + struct UniqueMembersS + { + UniqueMembersS( uint32_t _id, uint32_t _nmembers, + const uint32_t * _members ) + : id( _id ), nmembers( _nmembers ), members( 0 ) + { + assert( nmembers > 0 ); + + members = new uint32_t[nmembers]; + assert( members ); + + memcpy( members, _members, nmembers * sizeof( uint32_t ) ); + } + ~UniqueMembersS() + { + delete [] members; + } + + uint32_t id; // unique id representing this group member array + uint32_t nmembers; // number of group members + uint32_t * members; // array of group members + + }; + + // + // scope for MPI communicators and groups + // + struct MpiS + { + // + // sub-scope for MPI_COMM_WORLD + // + struct WorldCommS + { + WorldCommS() : global_token( 0 ) {} + + // name of final process group + static const char * NAME() { return "MPI_COMM_WORLD"; } + + // global process group token + uint32_t global_token; + + } worldComm; + + // + // sub-scope for MPI_COMM_SELFs + // + struct SelfCommsS + { + // name (prefix) of final process groups + static const char * NAME() { return "MPI_COMM_SELF"; } + + } selfComms; + + // + // sub-scope for user created MPI communicators and groups + // + struct CommsAndGroupsS + { + CommsAndGroupsS() : comm_seqno( 0 ), group_seqno( 0 ) {} + + // name (prefix) of final process groups + // + static const char * COMM_NAME() { return "MPI Communicator"; } + static const char * GROUP_NAME() { return "MPI Group"; } + + // communicator/group sequential number (=name suffix) + // + uint32_t comm_seqno; + uint32_t group_seqno; + + // map process/membersid <-> count + std::map, uint32_t> counts; + + // map membersid/count <-> global token + std::map, uint32_t> global_tokens; + + } commsAndGroups; + + } m_mpi; + + // + // scope for communicators of user communication + // + struct UserComS + { + // + // structure for user communicators + // + struct CommS + { + uint32_t global_token; // global comm. token + std::set members; // member process ids + + }; + + // add member process id to certain user communicator + void addCommMember( const uint32_t & comm, const uint32_t & member ) + { + std::map::iterator it = + globTk2Comm.find( comm ); + assert( it != globTk2Comm.end() ); + + it->second->members.insert( member ); + } + + // map name <-> communicator + std::map name2Comm; + + // map global token <-> communicator + std::map globTk2Comm; + + } m_userCom; + + // + // scope for other process groups (e.g. nodes, GPU comms./groups) + // + struct OtherS + { + // name of final process group containing all processes + static const char * ALL_NAME() { return "All"; } + + // + // structure for other process groups + // + struct GroupS + { + uint32_t global_token; // global process group token + std::set members; // member process ids + + }; + + // map name <-> process group + std::map name2Group; + + } m_other; + + // map hash <-> unique group members array(s) + std::multimap m_hash2UniqueMembers; + + // vector of unique group member arrays + std::vector m_uniqueMembers; + + // reference to parent class instance + DefinitionsC & m_defs; + +}; + // instance of class DefinitionsC extern DefinitionsC * theDefinitions; diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.cc index 7056a262b4..8c310af90c 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.cc @@ -13,30 +13,17 @@ #include "vt_unify.h" #include "vt_unify_defs_recs.h" -#include - #ifdef VT_MPI -// -// DefRec_BaseS -// +//////////////////// struct DefRec_BaseS //////////////////// + VT_MPI_INT DefRec_BaseS::getPackSize() { - VT_MPI_INT buffer_size = 0; - VT_MPI_INT size; + VT_MPI_INT buffer_size; - // dtype - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // loccpuid - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // deftoken - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; + // dtype + loccpuid + deftoken + CALL_MPI( MPI_Pack_size( 3, MPI_UNSIGNED, MPI_COMM_WORLD, &buffer_size ) ); return buffer_size; } @@ -75,26 +62,22 @@ DefRec_BaseS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_UNSIGNED, MPI_COMM_WORLD ) ); } -// -// DefRec_DefCommentS -// +//////////////////// struct DefRec_DefCommentS //////////////////// + VT_MPI_INT DefRec_DefCommentS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // type - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // comment.length() - uint32_t comment_length = comment.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // type + comment.length() + // + CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // comment - CALL_MPI( MPI_Pack_size( comment_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( comment.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -112,12 +95,15 @@ DefRec_DefCommentS::pack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); // comment.length() + // uint32_t comment_length = comment.length(); CALL_MPI( MPI_Pack( &comment_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // comment + // char * c_comment = new char[comment_length+1]; + assert( c_comment ); strcpy( c_comment, comment.c_str() ); CALL_MPI( MPI_Pack( c_comment, comment_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -135,21 +121,23 @@ DefRec_DefCommentS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); // comment.length() + // uint32_t comment_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &comment_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // comment + // char * c_comment = new char[comment_length+1]; + assert( c_comment ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_comment, comment_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); comment = c_comment; delete [] c_comment; } -// -// DefRec_DefCreatorS -// +//////////////////// struct DefRec_DefCreatorS //////////////////// + VT_MPI_INT DefRec_DefCreatorS::getPackSize() { @@ -157,11 +145,13 @@ DefRec_DefCreatorS::getPackSize() VT_MPI_INT size; // creator.length() + // uint32_t creator_length = creator.length(); CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // creator + // CALL_MPI( MPI_Pack_size( creator_length + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -176,12 +166,15 @@ DefRec_DefCreatorS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // creator.length() + // uint32_t creator_length = creator.length(); CALL_MPI( MPI_Pack( &creator_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // creator + // char * c_creator = new char[creator_length+1]; + assert( c_creator ); strcpy( c_creator, creator.c_str() ); CALL_MPI( MPI_Pack( c_creator, creator_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -195,21 +188,23 @@ DefRec_DefCreatorS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // creator.length() + // uint32_t creator_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &creator_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // creator + // char * c_creator = new char[creator_length+1]; + assert( c_creator ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_creator, creator_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); creator = c_creator; delete [] c_creator; } -// -// DefRec_DefTimerResolutionS -// +//////////////////// struct DefRec_DefTimerResolutionS //////////////////// + VT_MPI_INT DefRec_DefTimerResolutionS::getPackSize() { @@ -217,6 +212,7 @@ DefRec_DefTimerResolutionS::getPackSize() VT_MPI_INT size; // ticksPerSecond + // CALL_MPI( MPI_Pack_size( 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -235,7 +231,8 @@ DefRec_DefTimerResolutionS::pack( char *& buffer, const VT_MPI_INT & bufferSize, } void -DefRec_DefTimerResolutionS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, +DefRec_DefTimerResolutionS::unpack( char *& buffer, + const VT_MPI_INT & bufferSize, VT_MPI_INT & bufferPos ) { DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); @@ -245,9 +242,8 @@ DefRec_DefTimerResolutionS::unpack( char *& buffer, const VT_MPI_INT & bufferSiz MPI_LONG_LONG_INT, MPI_COMM_WORLD ) ); } -// -// DefRec_DefTimeRangeS -// +//////////////////// struct DefRec_DefTimeRangeS //////////////////// + VT_MPI_INT DefRec_DefTimeRangeS::getPackSize() { @@ -255,6 +251,7 @@ DefRec_DefTimeRangeS::getPackSize() VT_MPI_INT size; // minTime + maxTime + // CALL_MPI( MPI_Pack_size( 2, MPI_LONG_LONG_INT, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -291,29 +288,25 @@ DefRec_DefTimeRangeS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_LONG_LONG_INT, MPI_COMM_WORLD ) ); } -// -// DefRec_DefProcessS -// +//////////////////// struct DefRec_DefProcessS //////////////////// + VT_MPI_INT DefRec_DefProcessS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // name.length() + parent + // + CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; - // parent - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - return buffer_size; } @@ -324,12 +317,15 @@ DefRec_DefProcessS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -347,12 +343,15 @@ DefRec_DefProcessS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; @@ -363,35 +362,23 @@ DefRec_DefProcessS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_UNSIGNED, MPI_COMM_WORLD ) ); } -// -// DefRec_DefProcessGroupS -// +//////////////////// struct DefRec_DefProcessGroupS //////////////////// + VT_MPI_INT DefRec_DefProcessGroupS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // type - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // type + name.length() + members_hash + nmembers + members + // + CALL_MPI( MPI_Pack_size( 4 + nmembers, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; - // members.size() - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // members - CALL_MPI( MPI_Pack_size( members.size(), MPI_UNSIGNED, MPI_COMM_WORLD, + // name + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -409,26 +396,33 @@ DefRec_DefProcessGroupS::pack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); delete [] c_name; - // members.size() - uint32_t members_size = members.size(); - CALL_MPI( MPI_Pack( &members_size, 1, MPI_UNSIGNED, buffer, bufferSize, + // members_hash + CALL_MPI( MPI_Pack( &members_hash, 1, MPI_UNSIGNED, buffer, bufferSize, + &bufferPos, MPI_COMM_WORLD ) ); + + // nmembers + CALL_MPI( MPI_Pack( &nmembers, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // members - for( uint32_t i = 0; i < members_size; i++ ) + // + if( nmembers > 0 ) { - CALL_MPI( MPI_Pack( &(members[i]), 1, MPI_UNSIGNED, buffer, bufferSize, + CALL_MPI( MPI_Pack( members, nmembers, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); } } @@ -444,34 +438,81 @@ DefRec_DefProcessGroupS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; delete [] c_name; - // members.size() - uint32_t members_size; - CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &members_size, 1, + // members_hash + CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &members_hash, 1, + MPI_UNSIGNED, MPI_COMM_WORLD ) ); + + // nmembers + CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &nmembers, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // members - members.resize( members_size ); - for( uint32_t i = 0; i < members_size; i++ ) + // + if( nmembers > 0 ) { - CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &(members[i]), 1, - MPI_UNSIGNED, MPI_COMM_WORLD ) ); + members = new uint32_t[nmembers]; + assert( members ); + CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, members, + nmembers, MPI_UNSIGNED, MPI_COMM_WORLD ) ); } } -// -// DefRec_DefSclFileS -// +/////////////////// struct DefRec_DefProcessGroupAttributesS /////////////////// + +VT_MPI_INT +DefRec_DefProcessGroupAttributesS::getPackSize() +{ + VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); + VT_MPI_INT size; + + // attributes + // + CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + buffer_size += size; + + return buffer_size; +} + +void +DefRec_DefProcessGroupAttributesS::pack( char *& buffer, + const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ) +{ + DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); + + // attributes + CALL_MPI( MPI_Pack( &attributes, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, + MPI_COMM_WORLD ) ); +} + +void +DefRec_DefProcessGroupAttributesS::unpack( char *& buffer, + const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ) +{ + DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); + + // attributes + CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &attributes, 1, + MPI_UNSIGNED, MPI_COMM_WORLD ) ); +} + +//////////////////// struct DefRec_DefSclFileS //////////////////// + VT_MPI_INT DefRec_DefSclFileS::getPackSize() { @@ -479,12 +520,13 @@ DefRec_DefSclFileS::getPackSize() VT_MPI_INT size; // filename.length() - uint32_t filename_length = filename.length(); + // CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // filename - CALL_MPI( MPI_Pack_size( filename_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( filename.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -498,12 +540,15 @@ DefRec_DefSclFileS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // filename.length() + // uint32_t filename_length = filename.length(); CALL_MPI( MPI_Pack( &filename_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // filename + // char * c_filename = new char[filename_length+1]; + assert( c_filename ); strcpy( c_filename, filename.c_str() ); CALL_MPI( MPI_Pack( c_filename, filename_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -517,33 +562,32 @@ DefRec_DefSclFileS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // filename.length() + // uint32_t filename_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &filename_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // filename + // char * c_filename = new char[filename_length+1]; + assert( c_filename ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_filename, filename_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); filename = c_filename; delete [] c_filename; } -// -// DefRec_DefSclS -// +//////////////////// struct DefRec_DefSclS //////////////////// + VT_MPI_INT DefRec_DefSclS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // sclfile - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // sclline - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // sclfile + sclline + // + CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; return buffer_size; @@ -579,9 +623,8 @@ DefRec_DefSclS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_UNSIGNED, MPI_COMM_WORLD ) ); } -// -// DefRec_DefFileGroupS -// +//////////////////// struct DefRec_DefFileGroupS //////////////////// + VT_MPI_INT DefRec_DefFileGroupS::getPackSize() { @@ -589,12 +632,13 @@ DefRec_DefFileGroupS::getPackSize() VT_MPI_INT size; // name.length() - uint32_t name_length = name.length(); + // CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -608,12 +652,15 @@ DefRec_DefFileGroupS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -627,41 +674,40 @@ DefRec_DefFileGroupS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; delete [] c_name; } -// -// DefRec_DefFileS -// +//////////////////// struct DefRec_DefFileS //////////////////// + VT_MPI_INT DefRec_DefFileS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // name.length() + group + // + CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; - // group - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - return buffer_size; } @@ -672,12 +718,15 @@ DefRec_DefFileS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -695,12 +744,15 @@ DefRec_DefFileS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; @@ -711,9 +763,8 @@ DefRec_DefFileS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_UNSIGNED, MPI_COMM_WORLD ) ); } -// -// DefRec_DefFunctionGroupS -// +//////////////////// struct DefRec_DefFunctionGroupS //////////////////// + VT_MPI_INT DefRec_DefFunctionGroupS::getPackSize() { @@ -721,12 +772,13 @@ DefRec_DefFunctionGroupS::getPackSize() VT_MPI_INT size; // name.length() - uint32_t name_length = name.length(); + // CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -740,12 +792,15 @@ DefRec_DefFunctionGroupS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -759,45 +814,40 @@ DefRec_DefFunctionGroupS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; delete [] c_name; } -// -// DefRec_DefFunctionS -// +//////////////////// struct DefRec_DefFunctionS //////////////////// + VT_MPI_INT DefRec_DefFunctionS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // name.length() + group + scltoken + // + CALL_MPI( MPI_Pack_size( 3, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; - // group - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // scltoken - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - return buffer_size; } @@ -808,12 +858,15 @@ DefRec_DefFunctionS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -835,12 +888,15 @@ DefRec_DefFunctionS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; @@ -855,29 +911,25 @@ DefRec_DefFunctionS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_UNSIGNED, MPI_COMM_WORLD ) ); } -// -// DefRec_DefCollOpS -// +//////////////////// DefRec_DefCollOpS //////////////////// + VT_MPI_INT DefRec_DefCollOpS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // name.length() + type + // + CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; - // type - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - return buffer_size; } @@ -888,12 +940,15 @@ DefRec_DefCollOpS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -911,12 +966,15 @@ DefRec_DefCollOpS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; @@ -927,9 +985,8 @@ DefRec_DefCollOpS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); } -// -// DefRec_DefCounterGroupS -// +//////////////////// DefRec_DefCounterGroupS //////////////////// + VT_MPI_INT DefRec_DefCounterGroupS::getPackSize() { @@ -937,12 +994,13 @@ DefRec_DefCounterGroupS::getPackSize() VT_MPI_INT size; // name.length() - uint32_t name_length = name.length(); + // CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -956,12 +1014,15 @@ DefRec_DefCounterGroupS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -975,53 +1036,38 @@ DefRec_DefCounterGroupS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; delete [] c_name; } -// -// DefRec_DefCounterS -// +//////////////////// DefRec_DefCounterS //////////////////// + VT_MPI_INT DefRec_DefCounterS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // name.length() + properties + group + unit.length() + // + CALL_MPI( MPI_Pack_size( 4, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; - // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, - &size ) ); - buffer_size += size; - - // properties - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // group - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // unit.length() - uint32_t unit_length = unit.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // unit - CALL_MPI( MPI_Pack_size( unit_length + 1, MPI_CHAR, MPI_COMM_WORLD, - &size ) ); + // name + unit + // + CALL_MPI( MPI_Pack_size( name.length() + 1 + unit.length() + 1, MPI_CHAR, + MPI_COMM_WORLD, &size ) ); buffer_size += size; return buffer_size; @@ -1034,12 +1080,15 @@ DefRec_DefCounterS::pack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -1054,12 +1103,15 @@ DefRec_DefCounterS::pack( char *& buffer, const VT_MPI_INT & bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // unit.length() + // uint32_t unit_length = unit.length(); CALL_MPI( MPI_Pack( &unit_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // unit + // char * c_unit = new char[unit_length+1]; + assert( c_unit ); strcpy( c_unit, unit.c_str() ); CALL_MPI( MPI_Pack( c_unit, unit_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -1073,12 +1125,15 @@ DefRec_DefCounterS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; @@ -1098,33 +1153,96 @@ DefRec_DefCounterS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // unit + // char * c_unit = new char[unit_length+1]; + assert( c_unit ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_unit, unit_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); unit = c_unit; delete [] c_unit; } -// -// DefRec_DefKeyValueS -// +//////////////////// DefRec_DefCounterAssignmentsS //////////////////// + +VT_MPI_INT +DefRec_DefCounterAssignmentsS::getPackSize() +{ + VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); + VT_MPI_INT size; + + // groups.size() + groups + // + CALL_MPI( MPI_Pack_size( 1 + groups.size(), MPI_UNSIGNED, MPI_COMM_WORLD, + &size ) ); + buffer_size += size; + + return buffer_size; +} + +void +DefRec_DefCounterAssignmentsS::pack( char *& buffer, + const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ) +{ + DefRec_BaseS::pack( buffer, bufferSize, bufferPos ); + + // groups.size() + // + uint32_t groups_size = groups.size(); + CALL_MPI( MPI_Pack( &groups_size, 1, MPI_UNSIGNED, buffer, bufferSize, + &bufferPos, MPI_COMM_WORLD ) ); + + // groups + // + for( std::set::const_iterator it = groups.begin(); + it != groups.end(); it++ ) + { + uint32_t group = *it; + CALL_MPI( MPI_Pack( &group, 1, MPI_UNSIGNED, buffer, bufferSize, + &bufferPos, MPI_COMM_WORLD ) ); + } +} + +void +DefRec_DefCounterAssignmentsS::unpack( char *& buffer, + const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ) +{ + DefRec_BaseS::unpack( buffer, bufferSize, bufferPos ); + + // groups.size() + // + uint32_t groups_size; + CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &groups_size, 1, + MPI_UNSIGNED, MPI_COMM_WORLD ) ); + + // groups + // + for( uint32_t i = 0; i < groups_size; i++ ) + { + uint32_t group; + CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &group, 1, + MPI_UNSIGNED, MPI_COMM_WORLD ) ); + groups.insert( group ); + } +} + +//////////////////// DefRec_DefKeyValueS //////////////////// + VT_MPI_INT DefRec_DefKeyValueS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // type - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // type + name.length() + // + CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -1142,12 +1260,15 @@ DefRec_DefKeyValueS::pack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -1165,38 +1286,37 @@ DefRec_DefKeyValueS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; delete [] c_name; } -// -// DefRec_DefMarkerS -// +//////////////////// DefRec_DefMarkerS //////////////////// + VT_MPI_INT DefRec_DefMarkerS::getPackSize() { VT_MPI_INT buffer_size = DefRec_BaseS::getPackSize(); VT_MPI_INT size; - // type - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); - buffer_size += size; - - // name.length() - uint32_t name_length = name.length(); - CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); + // type + name.length() + // + CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) ); buffer_size += size; // name - CALL_MPI( MPI_Pack_size( name_length + 1, MPI_CHAR, MPI_COMM_WORLD, + // + CALL_MPI( MPI_Pack_size( name.length() + 1, MPI_CHAR, MPI_COMM_WORLD, &size ) ); buffer_size += size; @@ -1214,12 +1334,15 @@ DefRec_DefMarkerS::pack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); // name.length() + // uint32_t name_length = name.length(); CALL_MPI( MPI_Pack( &name_length, 1, MPI_UNSIGNED, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); strcpy( c_name, name.c_str() ); CALL_MPI( MPI_Pack( c_name, name_length + 1, MPI_CHAR, buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) ); @@ -1237,12 +1360,15 @@ DefRec_DefMarkerS::unpack( char *& buffer, const VT_MPI_INT & bufferSize, MPI_COMM_WORLD ) ); // name.length() + // uint32_t name_length; CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &name_length, 1, MPI_UNSIGNED, MPI_COMM_WORLD ) ); // name + // char * c_name = new char[name_length+1]; + assert( c_name ); CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, c_name, name_length + 1, MPI_CHAR, MPI_COMM_WORLD ) ); name = c_name; diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.h index 63ef7654b1..88f0cc916d 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_recs.h @@ -17,11 +17,17 @@ #include "vt_inttypes.h" +#include "util/hash.h" + #include "otf.h" +#include +#include #include #include +#include + // // definition record types // @@ -32,6 +38,7 @@ typedef enum DEF_REC_TYPE__DefTimeRange, DEF_REC_TYPE__DefProcess, DEF_REC_TYPE__DefProcessGroup, + DEF_REC_TYPE__DefProcessGroupAttributes, DEF_REC_TYPE__DefSclFile, DEF_REC_TYPE__DefScl, DEF_REC_TYPE__DefFileGroup, @@ -41,6 +48,7 @@ typedef enum DEF_REC_TYPE__DefCollOp, DEF_REC_TYPE__DefCounterGroup, DEF_REC_TYPE__DefCounter, + DEF_REC_TYPE__DefCounterAssignments, DEF_REC_TYPE__DefKeyValue, DEF_REC_TYPE__DefMarker, DEF_REC_TYPE__DefComment, @@ -272,50 +280,63 @@ struct DefRec_DefProcessS : DefRec_BaseS // struct DefRec_DefProcessGroupS : DefRec_BaseS { - // - // compare structure for final sort - // - struct SortS - { - bool operator()( const DefRec_DefProcessGroupS * a, - const DefRec_DefProcessGroupS * b ) const - { - if( a->type == b->type ) - return a->deftoken < b->deftoken; - else - return a->type < b->type; - } - - }; - typedef enum { - TYPE_NODE, TYPE_MPI_COMM_WORLD, TYPE_MPI_COMM_SELF, TYPE_MPI_COMM_OTHER, - TYPE_OMP_TEAM, TYPE_GPU_COMM, TYPE_GPU_GROUP, TYPE_USER_COMM, TYPE_OTHER, + TYPE_ALL, TYPE_NODE, TYPE_MPI_COMM_WORLD, TYPE_MPI_COMM_SELF, + TYPE_MPI_COMM_OTHER, TYPE_MPI_GROUP, TYPE_USER_COMM, TYPE_OTHER, TYPE_UNKNOWN } ProcessGroupTypeT; DefRec_DefProcessGroupS() - : DefRec_BaseS( DEF_REC_TYPE__DefProcessGroup ), type( TYPE_UNKNOWN ) {} + : DefRec_BaseS( DEF_REC_TYPE__DefProcessGroup ), type( TYPE_UNKNOWN ), + members_hash( 0 ), nmembers( 0 ), members( 0 ) {} DefRec_DefProcessGroupS( const uint32_t & _loccpuid, const uint32_t & _deftoken, const ProcessGroupTypeT & _type, const std::string & _name, const uint32_t & _nmembers, const uint32_t * _members ) : DefRec_BaseS( DEF_REC_TYPE__DefProcessGroup, _loccpuid, _deftoken ), - type( _type ), name( _name ) + type( _type ), name( _name ), members_hash( 0 ), nmembers( 0 ), + members( 0 ) { - if( _nmembers > 0 ) + assignMembers( _nmembers, _members, _members + _nmembers ); + + if( nmembers > 0 && + ( type == TYPE_MPI_COMM_WORLD || type == TYPE_MPI_COMM_OTHER || + type == TYPE_MPI_GROUP ) ) { - members.resize( _nmembers ); - members.assign( _members, _members + _nmembers ); + members_hash = + vt_hash( (unsigned char*)members, + nmembers * sizeof( uint32_t ), 0 ); } } + DefRec_DefProcessGroupS( const DefRec_DefProcessGroupS & a ) + : DefRec_BaseS( DEF_REC_TYPE__DefProcessGroup, a.loccpuid, a.deftoken ), + type( a.type ), name( a.name ), members_hash( a.members_hash ), + nmembers( 0 ), members( 0 ) + { + assignMembers( a.nmembers, a.members, a.members + a.nmembers ); + } + ~DefRec_DefProcessGroupS() + { + if( nmembers > 0 ) + delete [] members; + } - DefRec_DefProcessGroupS( const uint32_t & _loccpuid, - const uint32_t & _deftoken, const ProcessGroupTypeT & _type, - const std::string & _name, const std::vector & _members ) - : DefRec_BaseS( DEF_REC_TYPE__DefProcessGroup, _loccpuid, _deftoken ), - type( _type ), name( _name ), members( _members ) {} + template + void assignMembers( uint32_t n, InputIterator first, InputIterator last ) + { + if( nmembers > 0 ) + delete [] members; + + nmembers = n; + members = 0; + if( nmembers > 0 ) + { + members = new uint32_t[nmembers]; + assert( members ); + std::copy( first, last, members ); + } + } #ifdef VT_MPI VT_MPI_INT getPackSize(); @@ -330,13 +351,22 @@ struct DefRec_DefProcessGroupS : DefRec_BaseS { if( type == a.type ) { - if( members == a.members ) + if( nmembers == a.nmembers ) { - return name < a.name; + if( name == a.name ) + { + return + memcmp( members, a.members, + nmembers * sizeof( uint32_t ) ) < 0; + } + else + { + return name < a.name; + } } else { - return members < a.members; + return nmembers < a.nmembers; } } else @@ -345,9 +375,42 @@ struct DefRec_DefProcessGroupS : DefRec_BaseS } } - ProcessGroupTypeT type; - std::string name; - std::vector members; + ProcessGroupTypeT type; + std::string name; + uint32_t members_hash; + uint32_t nmembers; + uint32_t * members; + +}; + +// +// DefRec_DefProcessGroupAttributesS +// +struct DefRec_DefProcessGroupAttributesS : DefRec_BaseS +{ + DefRec_DefProcessGroupAttributesS() + : DefRec_BaseS( DEF_REC_TYPE__DefProcessGroupAttributes ), + attributes( 0 ) {} + DefRec_DefProcessGroupAttributesS( const uint32_t & _loccpuid, + const uint32_t & _deftoken, const uint32_t & _attributes ) + : DefRec_BaseS( DEF_REC_TYPE__DefProcessGroupAttributes, _loccpuid, + _deftoken ), attributes( _attributes ) {} + +#ifdef VT_MPI + VT_MPI_INT getPackSize(); + void pack( char *& buffer, const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ); + void unpack( char *& buffer, const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ); +#endif // VT_MPI + + // operator for searching + bool operator<( const DefRec_DefProcessGroupAttributesS & a ) const + { + return attributes < a.attributes; + } + + uint32_t attributes; }; @@ -681,6 +744,36 @@ struct DefRec_DefCounterS : DefRec_BaseS }; +// +// DefRec_DefCounterAssignmentsS +// +struct DefRec_DefCounterAssignmentsS : DefRec_BaseS +{ + DefRec_DefCounterAssignmentsS() + : DefRec_BaseS( DEF_REC_TYPE__DefCounterAssignments ) {} + DefRec_DefCounterAssignmentsS( const uint32_t & _loccpuid, + const uint32_t & _counter, const uint32_t & _group ) + : DefRec_BaseS( DEF_REC_TYPE__DefCounterAssignments, _loccpuid, _counter ) + { + groups.insert( _group ); + } + DefRec_DefCounterAssignmentsS( const uint32_t & _loccpuid, + const uint32_t & _counter, const std::set & _groups ) + : DefRec_BaseS( DEF_REC_TYPE__DefCounterAssignments, _loccpuid, + _counter ), groups( _groups ) {} + +#ifdef VT_MPI + VT_MPI_INT getPackSize(); + void pack( char *& buffer, const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ); + void unpack( char *& buffer, const VT_MPI_INT & bufferSize, + VT_MPI_INT & bufferPos ); +#endif // VT_MPI + + std::set groups; + +}; + // // DefRec_DefKeyValueS // diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.cc index 61961bafdb..b4660b6acd 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.cc @@ -111,6 +111,32 @@ EventsAndStatsC::cleanUp() const OTF_FileType common_file_type = m_scope == SCOPE_EVENTS ? OTF_FILETYPE_EVENT : OTF_FILETYPE_STATS; + // remove local event/stat. files, if necessary + // + if( Params.doclean ) + { +#if defined(HAVE_OMP) && HAVE_OMP +# pragma omp parallel for private(i, filename1) +#endif // HAVE_OMP + for( i = 0; i < streams_num; i++ ) + { + const uint32_t & streamid = MyStreamIds[i]; + + // try to remove file without compression suffix + OTF_getFilename( Params.in_file_prefix.c_str(), streamid, + common_file_type, STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + PVPrint( 3, " Removed %s\n", filename1 ); + + // try to remove file with compression suffix + OTF_getFilename( Params.in_file_prefix.c_str(), streamid, + common_file_type | OTF_FILECOMPRESSION_COMPRESSED, STRBUFSIZE, + filename1 ); + if( remove( filename1 ) == 0 ) + PVPrint( 3, " Removed %s\n", filename1 ); + } + } + // rename temporary event/stat. output files // @@ -141,41 +167,6 @@ EventsAndStatsC::cleanUp() PVPrint( 3, " Renamed %s to %s\n", filename1, filename2 ); } - // remove local event/stat. files, if necessary - // - if( Params.doclean && - Params.in_file_prefix.compare( Params.out_file_prefix ) != 0 ) - { -#if defined(HAVE_OMP) && HAVE_OMP -# pragma omp parallel for private(i, filename1) -#endif // HAVE_OMP - for( i = 0; i < streams_num; i++ ) - { - const uint32_t & streamid = MyStreamIds[i]; - - bool removed = false; - - // get file name without compression suffix - OTF_getFilename( Params.in_file_prefix.c_str(), streamid, - common_file_type, STRBUFSIZE, filename1 ); - - // try to remove file - if( !( removed = ( remove( filename1 ) == 0 ) ) ) - { - // if failed, get file name with compression suffix - OTF_getFilename( Params.in_file_prefix.c_str(), streamid, - common_file_type | OTF_FILECOMPRESSION_COMPRESSED, - STRBUFSIZE, filename1 ); - - // try to remove file again - removed = ( remove( filename1 ) == 0 ); - } - - if( removed ) - PVPrint( 3, " Removed %s\n", filename1 ); - } - } - return !error; } @@ -256,17 +247,17 @@ EventsAndStatsC::rewrite() PVPrint( 3, " Opened OTF writer stream [namestub %s id %x]\n", tmp_out_file_prefix.c_str(), streamid ); -#ifdef VT_UNIFY_HOOKS_AEVENTS +#if (defined(VT_UNIFY_HOOKS_AEVENTS) || defined(VT_UNIFY_HOOKS_MARGINS)) if( m_scope == SCOPE_EVENTS ) { - // trigger HooksAsyncEventsC's generic hook for opened event stream + // trigger generic hooks for opened event stream theHooks->triggerGenericHook( - VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_OPEN, 3, - const_cast( &streamid ), - const_cast( &in_file_prefix ), - &wstream ); + VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_OPEN | + VT_UNIFY_HOOKS_MARGINS_GENID__EVENT_STREAM_OPEN, 3, + &wstream, const_cast( &streamid ), + const_cast( &in_file_prefix ) ); } -#endif // VT_UNIFY_HOOKS_AEVENTS +#endif // VT_UNIFY_HOOKS_AEVENTS || VT_UNIFY_HOOKS_MARGINS // set file compression // @@ -283,105 +274,108 @@ EventsAndStatsC::rewrite() if( m_scope == SCOPE_EVENTS ) { + // create first handler argument + FirstHandlerArg_EventsS fha( wstream ); + // set record handler and its first argument for ... // // ... OTF_EVENTCOMMENT_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_EventComment, + (OTF_FunctionPointer*)HandleEventComment, OTF_EVENTCOMMENT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_EVENTCOMMENT_RECORD ); // ... OTF_ENTER_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_Enter, + (OTF_FunctionPointer*)HandleEnter, OTF_ENTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_ENTER_RECORD ); // ... OTF_LEAVE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_Leave, + (OTF_FunctionPointer*)HandleLeave, OTF_LEAVE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_LEAVE_RECORD ); // ... OTF_COUNTER_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_Counter, + (OTF_FunctionPointer*)HandleCounter, OTF_COUNTER_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_COUNTER_RECORD ); // ... OTF_BEGINFILEOPERATION_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_BeginFileOp, + (OTF_FunctionPointer*)HandleBeginFileOp, OTF_BEGINFILEOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_BEGINFILEOP_RECORD ); // ... OTF_ENDFILEOPERATION_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_EndFileOp, + (OTF_FunctionPointer*)HandleEndFileOp, OTF_ENDFILEOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_ENDFILEOP_RECORD ); // ... OTF_SEND_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_SendMsg, + (OTF_FunctionPointer*)HandleSendMsg, OTF_SEND_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_SEND_RECORD ); // ... OTF_RECEIVE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_RecvMsg, + (OTF_FunctionPointer*)HandleRecvMsg, OTF_RECEIVE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_RECEIVE_RECORD ); // ... OTF_BEGINCOLLOP_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_BeginCollOp, + (OTF_FunctionPointer*)HandleBeginCollOp, OTF_BEGINCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_BEGINCOLLOP_RECORD ); // ... OTF_ENDCOLLOP_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_EndCollOp, + (OTF_FunctionPointer*)HandleEndCollOp, OTF_ENDCOLLOP_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_ENDCOLLOP_RECORD ); // ... OTF_RMAPUT_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_RMAPut, + (OTF_FunctionPointer*)HandleRMAPut, OTF_RMAPUT_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_RMAPUT_RECORD ); // ... OTF_RMAPUTRE_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_RMAPutRemoteEnd, + (OTF_FunctionPointer*)HandleRMAPutRemoteEnd, OTF_RMAPUTRE_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_RMAPUTRE_RECORD ); // ... OTF_RMAGET_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_RMAGet, + (OTF_FunctionPointer*)HandleRMAGet, OTF_RMAGET_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_RMAGET_RECORD ); // ... OTF_RMAEND_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_RMAEnd, + (OTF_FunctionPointer*)HandleRMAEnd, OTF_RMAEND_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_RMAEND_RECORD ); // rewrite events @@ -399,35 +393,38 @@ EventsAndStatsC::rewrite() } else // m_scope == SCOPE_STATS { + // create first handler argument + FirstHandlerArg_StatsS fha( wstream ); + // set record handler and its first argument for ... // // ... OTF_FUNCTIONSUMMARY_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_FunctionSummary, + (OTF_FunctionPointer*)HandleFunctionSummary, OTF_FUNCTIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_FUNCTIONSUMMARY_RECORD ); // ... OTF_MESSAGESUMMARY_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_MessageSummary, + (OTF_FunctionPointer*)HandleMessageSummary, OTF_MESSAGESUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_MESSAGESUMMARY_RECORD ); // ... OTF_COLLOPSUMMARY_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_CollOpSummary, + (OTF_FunctionPointer*)HandleCollOpSummary, OTF_COLLOPSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_COLLOPSUMMARY_RECORD ); // ... OTF_FILEOPERATIONSUMMARY_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_FileOpSummary, + (OTF_FunctionPointer*)HandleFileOpSummary, OTF_FILEOPERATIONSUMMARY_RECORD ); - OTF_HandlerArray_setFirstHandlerArg( handler_array, wstream, + OTF_HandlerArray_setFirstHandlerArg( handler_array, &fha, OTF_FILEOPERATIONSUMMARY_RECORD ); // rewrite statistics @@ -444,15 +441,16 @@ EventsAndStatsC::rewrite() } } -#ifdef VT_UNIFY_HOOKS_AEVENTS +#if (defined(VT_UNIFY_HOOKS_AEVENTS) || defined(VT_UNIFY_HOOKS_MARGINS)) if( m_scope == SCOPE_EVENTS ) { - // trigger HooksAsyncEventsC's generic hook for closing event stream + // trigger generic hooks for closing event stream theHooks->triggerGenericHook( - VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_CLOSE, 1, - const_cast( &streamid ) ); + VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_CLOSE | + VT_UNIFY_HOOKS_MARGINS_GENID__EVENT_STREAM_CLOSE, 1, + const_cast( &streamid ) ); } -#endif // VT_UNIFY_HOOKS_AEVENTS +#endif // VT_UNIFY_HOOKS_AEVENTS || VT_UNIFY_HOOKS_MARGINS // close writer stream OTF_WStream_close( wstream ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.h index f0711e5bd8..2ae8477fd5 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_events_stats.h @@ -37,11 +37,11 @@ public: private: - // rewrite events/statistics - bool rewrite(); + // rewrite events/statistics + bool rewrite(); - // scope to process by this class (events or statistics) - ScopeTypeT m_scope; + // scope to process by this class (events or statistics) + ScopeTypeT m_scope; }; diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.cc index b4dd086272..2c638035a0 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.cc @@ -23,10 +23,8 @@ #include -// key-value list "record handler" -// translate local key tokens to global tokens void -Handle_KeyValueList( const uint32_t & proc, OTF_KeyValueList * kvs ) +HandleKeyValueList( const uint32_t & proc, OTF_KeyValueList * kvs ) { // get number of key-value pairs uint32_t keys_num = OTF_KeyValueList_getCount( kvs ); @@ -58,7 +56,7 @@ Handle_KeyValueList( const uint32_t & proc, OTF_KeyValueList * kvs ) // int -Handle_DefComment( LargeVectorC * locDefs, +HandleDefComment( FirstHandlerArg_DefsS * fha, uint32_t streamid, const char * comment ) { // get common string identifiers as std::string's for more convenient use @@ -142,13 +140,14 @@ Handle_DefComment( LargeVectorC * locDefs, } // add local definition to vector - locDefs->push_back( new DefRec_DefCommentS( streamid, 0, type, _comment ) ); + fha->loc_defs.push_back( new DefRec_DefCommentS + ( streamid, 0, type, _comment ) ); return OTF_RETURN_OK; } int -Handle_DefCreator( LargeVectorC * locDefs, +HandleDefCreator( FirstHandlerArg_DefsS * fha, uint32_t streamid, const char * creator ) { std::string _creator(creator); @@ -158,13 +157,13 @@ Handle_DefCreator( LargeVectorC * locDefs, &streamid, &_creator ); // add local definition to vector - locDefs->push_back( new DefRec_DefCreatorS( _creator ) ); + fha->loc_defs.push_back( new DefRec_DefCreatorS( _creator ) ); return OTF_RETURN_OK; } int -Handle_DefTimerResolution( LargeVectorC * locDefs, +HandleDefTimerResolution( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint64_t ticksPerSecond ) { // trigger read record hook @@ -172,13 +171,13 @@ Handle_DefTimerResolution( LargeVectorC * locDefs, &streamid, &ticksPerSecond ); // add local definition to vector - locDefs->push_back( new DefRec_DefTimerResolutionS( ticksPerSecond ) ); + fha->loc_defs.push_back( new DefRec_DefTimerResolutionS( ticksPerSecond ) ); return OTF_RETURN_OK; } int -Handle_DefTimeRange( LargeVectorC * locDefs, +HandleDefTimeRange( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint64_t minTime, uint64_t maxTime ) { // trigger read record hook @@ -186,14 +185,14 @@ Handle_DefTimeRange( LargeVectorC * locDefs, &streamid, &minTime, &maxTime ); // add local definition to vector - locDefs->push_back( new DefRec_DefTimeRangeS - ( streamid, minTime, maxTime ) ); + fha->loc_defs.push_back( new DefRec_DefTimeRangeS + ( streamid, minTime, maxTime ) ); return OTF_RETURN_OK; } int -Handle_DefProcess( LargeVectorC * locDefs, +HandleDefProcess( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t parent ) { std::string _name(name); @@ -203,18 +202,20 @@ Handle_DefProcess( LargeVectorC * locDefs, &streamid, &deftoken, &_name, &parent ); // add local definition to vector - locDefs->push_back( new DefRec_DefProcessS( deftoken, _name, parent ) ); + fha->loc_defs.push_back( new DefRec_DefProcessS( deftoken, _name, parent ) ); return OTF_RETURN_OK; } int -Handle_DefProcessGroup( LargeVectorC * locDefs, +HandleDefProcessGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t n, uint32_t * array ) { // get common string identifiers as std::string's for more convenient use // + static const std::string all_name = + VT_UNIFY_STRID_ALL_PROCGRP; static const std::string node_prefix = VT_UNIFY_STRID_NODE_PROCGRP; static const std::string mpi_comm_world_name = @@ -223,12 +224,8 @@ Handle_DefProcessGroup( LargeVectorC * locDefs, VT_UNIFY_STRID_MPI_COMM_SELF_PROCGRP; static const std::string mpi_comm_other_name = VT_UNIFY_STRID_MPI_COMM_OTHER_PROCGRP; - static const std::string omp_team_name = - VT_UNIFY_STRID_OMP_TEAM_PROCGRP; - static const std::string gpu_comm_name = - VT_UNIFY_STRID_GPU_COMM_PROCGRP; - static const std::string gpu_group_name = - VT_UNIFY_STRID_GPU_GROUP_PROCGRP; + static const std::string mpi_group_name = + VT_UNIFY_STRID_MPI_GROUP_PROCGRP; static const std::string user_comm_prefix = VT_UNIFY_STRID_USER_COMM_PROCGRP; @@ -236,15 +233,20 @@ Handle_DefProcessGroup( LargeVectorC * locDefs, // trigger read record hook theHooks->triggerReadRecordHook( HooksC::Record_DefProcessGroup, 5, - &streamid, &deftoken, &_name, &n, array ); + &streamid, &deftoken, &_name, &n, &array ); // determine process group type // DefRec_DefProcessGroupS::ProcessGroupTypeT type; - if( _name.length() > node_prefix.length() && - _name.compare( 0, node_prefix.length(), node_prefix ) == 0 ) + if( _name.compare( all_name ) == 0 ) + { + type = DefRec_DefProcessGroupS::TYPE_ALL; + _name = ""; + } + else if( _name.length() > node_prefix.length() && + _name.compare( 0, node_prefix.length(), node_prefix ) == 0 ) { type = DefRec_DefProcessGroupS::TYPE_NODE; // cut identifier prefix from node name @@ -265,19 +267,9 @@ Handle_DefProcessGroup( LargeVectorC * locDefs, type = DefRec_DefProcessGroupS::TYPE_MPI_COMM_OTHER; _name = ""; } - else if( _name.compare( omp_team_name ) == 0 ) + else if( _name.compare( mpi_group_name ) == 0 ) { - type = DefRec_DefProcessGroupS::TYPE_OMP_TEAM; - _name = ""; - } - else if( _name.compare( gpu_comm_name ) == 0 ) - { - type = DefRec_DefProcessGroupS::TYPE_GPU_COMM; - _name = ""; - } - else if( _name.compare( gpu_group_name ) == 0 ) - { - type = DefRec_DefProcessGroupS::TYPE_GPU_GROUP; + type = DefRec_DefProcessGroupS::TYPE_MPI_GROUP; _name = ""; } else if( _name.length() > user_comm_prefix.length() && @@ -294,15 +286,30 @@ Handle_DefProcessGroup( LargeVectorC * locDefs, } // add local definition to vector - locDefs->push_back( new DefRec_DefProcessGroupS - ( streamid, deftoken, type, _name, n, - array ) ); + fha->loc_defs.push_back( new DefRec_DefProcessGroupS + ( streamid, deftoken, type, _name, n, + array ) ); return OTF_RETURN_OK; } int -Handle_DefSclFile( LargeVectorC * locDefs, +HandleDefProcessGroupAttributes( FirstHandlerArg_DefsS * fha, + uint32_t streamid, uint32_t group, uint32_t attributes ) +{ + // trigger read record hook + theHooks->triggerReadRecordHook( HooksC::Record_DefProcessGroupAttributes, 3, + &streamid, &group, &attributes ); + + // add local definition to vector + fha->loc_defs.push_back( new DefRec_DefProcessGroupAttributesS + ( streamid, group, attributes ) ); + + return OTF_RETURN_OK; +} + +int +HandleDefSclFile( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * filename ) { std::string _filename(filename); @@ -312,14 +319,14 @@ Handle_DefSclFile( LargeVectorC * locDefs, &streamid, &deftoken, &_filename ); // add local definition to vector - locDefs->push_back( new DefRec_DefSclFileS - ( streamid, deftoken, _filename ) ); + fha->loc_defs.push_back( new DefRec_DefSclFileS + ( streamid, deftoken, _filename ) ); return OTF_RETURN_OK; } int -Handle_DefScl( LargeVectorC * locDefs, +HandleDefScl( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, uint32_t sclfile, uint32_t sclline ) { // trigger read record hook @@ -327,14 +334,14 @@ Handle_DefScl( LargeVectorC * locDefs, &streamid, &deftoken, &sclfile, &sclline ); // add local definition to vector - locDefs->push_back( new DefRec_DefSclS - ( streamid, deftoken, sclfile, sclline ) ); + fha->loc_defs.push_back( new DefRec_DefSclS + ( streamid, deftoken, sclfile, sclline ) ); return OTF_RETURN_OK; } int -Handle_DefFileGroup( LargeVectorC * locDefs, +HandleDefFileGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name ) { std::string _name(name); @@ -344,14 +351,14 @@ Handle_DefFileGroup( LargeVectorC * locDefs, &streamid, &deftoken, &_name ); // add local definition to vector - locDefs->push_back( new DefRec_DefFileGroupS - ( streamid, deftoken, _name ) ); + fha->loc_defs.push_back( new DefRec_DefFileGroupS + ( streamid, deftoken, _name ) ); return OTF_RETURN_OK; } int -Handle_DefFile( LargeVectorC * locDefs, +HandleDefFile( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t group ) { std::string _name(name); @@ -361,14 +368,14 @@ Handle_DefFile( LargeVectorC * locDefs, &streamid, &deftoken, &_name, &group ); // add local definition to vector - locDefs->push_back( new DefRec_DefFileS - ( streamid, deftoken, _name, group ) ); + fha->loc_defs.push_back( new DefRec_DefFileS + ( streamid, deftoken, _name, group ) ); return OTF_RETURN_OK; } int -Handle_DefFunctionGroup( LargeVectorC * locDefs, +HandleDefFunctionGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name ) { std::string _name(name); @@ -378,14 +385,14 @@ Handle_DefFunctionGroup( LargeVectorC * locDefs, &streamid, &deftoken, &_name ); // add local definition to vector - locDefs->push_back( new DefRec_DefFunctionGroupS - ( streamid, deftoken, _name ) ); + fha->loc_defs.push_back( new DefRec_DefFunctionGroupS + ( streamid, deftoken, _name ) ); return OTF_RETURN_OK; } int -Handle_DefFunction( LargeVectorC * locDefs, +HandleDefFunction( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t group, uint32_t scltoken ) { @@ -396,15 +403,15 @@ Handle_DefFunction( LargeVectorC * locDefs, &streamid, &deftoken, &_name, &group, &scltoken ); // add local definition to vector - locDefs->push_back( new DefRec_DefFunctionS - ( streamid, deftoken, _name, group, - scltoken ) ); + fha->loc_defs.push_back( new DefRec_DefFunctionS + ( streamid, deftoken, _name, group, + scltoken ) ); return OTF_RETURN_OK; } int -Handle_DefCollOp( LargeVectorC * locDefs, +HandleDefCollOp( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t collOp, const char * name, uint32_t type ) { std::string _name(name); @@ -414,14 +421,14 @@ Handle_DefCollOp( LargeVectorC * locDefs, &streamid, &collOp, &_name, &type ); // add local definition to vector - locDefs->push_back( new DefRec_DefCollOpS - ( streamid, collOp, _name, type ) ); + fha->loc_defs.push_back( new DefRec_DefCollOpS + ( streamid, collOp, _name, type ) ); return OTF_RETURN_OK; } int -Handle_DefCounterGroup( LargeVectorC * locDefs, +HandleDefCounterGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name ) { std::string _name(name); @@ -431,14 +438,14 @@ Handle_DefCounterGroup( LargeVectorC * locDefs, &streamid, &deftoken, &_name ); // add local definition to vector - locDefs->push_back( new DefRec_DefCounterGroupS - ( streamid, deftoken, _name ) ); + fha->loc_defs.push_back( new DefRec_DefCounterGroupS + ( streamid, deftoken, _name ) ); return OTF_RETURN_OK; } int -Handle_DefCounter( LargeVectorC * locDefs, +HandleDefCounter( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t properties, uint32_t countergroup, const char * unit ) { @@ -450,15 +457,38 @@ Handle_DefCounter( LargeVectorC * locDefs, &streamid, &deftoken, &_name, &properties, &countergroup, &unit ); // add local definition to vector - locDefs->push_back( new DefRec_DefCounterS - ( streamid, deftoken, _name, properties, - countergroup, _unit ) ); + fha->loc_defs.push_back( new DefRec_DefCounterS + ( streamid, deftoken, _name, properties, + countergroup, _unit ) ); return OTF_RETURN_OK; } int -Handle_DefKeyValue( LargeVectorC * locDefs, +HandleDefCounterAssignments( FirstHandlerArg_DefsS * fha, + uint32_t streamid, uint32_t counter, uint32_t n, uint32_t * array ) +{ + assert( n == 1 ); // only one process group assignment per counter allowed + assert( array ); + + uint32_t procgrp = *array; + + // trigger read record hook + theHooks->triggerReadRecordHook( HooksC::Record_DefCounterAssignments, 3, + &streamid, &counter, &procgrp ); + + // register local process group assignment + theDefinitions->groupCounters()->setGroup( streamid, counter, procgrp ); + + // add local definition to vector + fha->loc_defs.push_back( new DefRec_DefCounterAssignmentsS + ( streamid, counter, procgrp ) ); + + return OTF_RETURN_OK; +} + +int +HandleDefKeyValue( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t key, OTF_Type type, const char * name, const char * description ) { @@ -469,7 +499,7 @@ Handle_DefKeyValue( LargeVectorC * locDefs, &streamid, &key, &type, &_name ); // add local definition to vector - locDefs->push_back( new DefRec_DefKeyValueS + fha->loc_defs.push_back( new DefRec_DefKeyValueS ( streamid, key, type, _name ) ); return OTF_RETURN_OK; @@ -479,9 +509,8 @@ Handle_DefKeyValue( LargeVectorC * locDefs, // int -Handle_DefMarker( LargeVectorC * locDefs, - uint32_t streamid, uint32_t deftoken, const char * name, - uint32_t type ) +HandleDefMarker( FirstHandlerArg_MarkersS * fha, + uint32_t streamid, uint32_t deftoken, const char * name, uint32_t type ) { std::string _name( name ); @@ -490,16 +519,15 @@ Handle_DefMarker( LargeVectorC * locDefs, &streamid, &deftoken, &_name, &type ); // add local marker definition to vector - locDefs->push_back( new DefRec_DefMarkerS - ( streamid, deftoken, type, _name ) ); + fha->loc_defs.push_back( new DefRec_DefMarkerS + ( streamid, deftoken, type, _name ) ); return OTF_RETURN_OK; } int -Handle_MarkerSpot( LargeVectorC * locSpots, - uint64_t time, uint32_t proc, uint32_t marker, - const char * text ) +HandleMarkerSpot( FirstHandlerArg_MarkersS * fha, + uint64_t time, uint32_t proc, uint32_t marker, const char * text ) { std::string _text( text ); @@ -508,8 +536,8 @@ Handle_MarkerSpot( LargeVectorC * locSpots, &time, &proc, &marker, &_text ); // add local marker spot to vector - locSpots->push_back( new MarkersC::MarkerSpotS - ( proc, time, marker, _text ) ); + fha->loc_spots.push_back( new MarkersC::MarkerSpotS + ( proc, time, marker, _text ) ); return OTF_RETURN_OK; } @@ -518,7 +546,7 @@ Handle_MarkerSpot( LargeVectorC * locSpots, // int -Handle_EventComment( OTF_WStream * wstream, +HandleEventComment( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, const char * comment, OTF_KeyValueList * kvs ) { int ret = OTF_RETURN_OK; @@ -530,7 +558,7 @@ Handle_EventComment( OTF_WStream * wstream, &time, &proc, &_comment, &kvs ); // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); #ifdef VT_ETIMESYNC // update time sync. parameters, if necessary @@ -555,11 +583,11 @@ Handle_EventComment( OTF_WStream * wstream, // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_EventComment, 6, - &wstream, &time, &proc, &_comment, &kvs, &do_write ); + &(fha->wstream), &time, &proc, &_comment, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeEventCommentKV( wstream, time, proc, + OTF_WStream_writeEventCommentKV( fha->wstream, time, proc, _comment.c_str(), kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -568,7 +596,7 @@ Handle_EventComment( OTF_WStream * wstream, } int -Handle_Enter( OTF_WStream * wstream, +HandleEnter( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t func, uint32_t proc, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -603,18 +631,19 @@ Handle_Enter( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_Enter, 7, - &wstream, &time, &global_func, &proc, &global_scl, &kvs, &do_write ); + &(fha->wstream), &time, &global_func, &proc, &global_scl, &kvs, + &do_write ); // write record if( do_write && - OTF_WStream_writeEnterKV( wstream, time, global_func, proc, + OTF_WStream_writeEnterKV( fha->wstream, time, global_func, proc, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -622,7 +651,7 @@ Handle_Enter( OTF_WStream * wstream, } int -Handle_Leave( OTF_WStream * wstream, +HandleLeave( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t func, uint32_t proc, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -661,18 +690,19 @@ Handle_Leave( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_Leave, 7, - &wstream, &time, &global_func, &proc, &global_scl, &kvs, &do_write ); + &(fha->wstream), &time, &global_func, &proc, &global_scl, &kvs, + &do_write ); // write record if( do_write && - OTF_WStream_writeLeaveKV( wstream, time, global_func, proc, + OTF_WStream_writeLeaveKV( fha->wstream, time, global_func, proc, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -680,7 +710,7 @@ Handle_Leave( OTF_WStream * wstream, } int -Handle_Counter( OTF_WStream * wstream, +HandleCounter( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t counter, uint64_t value, OTF_KeyValueList * kvs ) { @@ -692,36 +722,55 @@ Handle_Counter( OTF_WStream * wstream, theHooks->triggerReadRecordHook( HooksC::Record_Counter, 5, &time, &proc, &counter, &value, &kvs ); + // get global token factory for DefProcessGroup + static const TokenFactoryScopeI * tkfac_defprocgrp = + theTokenFactory->getScope( DEF_REC_TYPE__DefProcessGroup ); + // get global token factory for DefCounter static const TokenFactoryScopeI * tkfac_defcntr = theTokenFactory->getScope( DEF_REC_TYPE__DefCounter ); + // try to get local process group token (!=0 if it's a group counter) + uint32_t procgrp = + theDefinitions->groupCounters()->getGroup( proc, counter ); + + // translate local process group token, if necessary + // + uint32_t global_procgrp = procgrp; + if( procgrp != 0 ) + { + global_procgrp = tkfac_defprocgrp->translate( proc, procgrp ); + assert( global_procgrp != 0 ); + } + // translate local counter token // uint32_t global_counter = tkfac_defcntr->translate( proc, counter ); assert( global_counter != 0 ); // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook - theHooks->triggerWriteRecordHook( HooksC::Record_Counter, 7, - &wstream, &time, &proc, &global_counter, &value, &kvs, &do_write ); + theHooks->triggerWriteRecordHook( HooksC::Record_Counter, 8, + &(fha->wstream), &time, &proc, &global_procgrp, &global_counter, &value, + &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeCounterKV( wstream, time, proc, global_counter, - value, kvs ) == 0 ) + OTF_WStream_writeCounterKV( fha->wstream, time, + global_procgrp ? global_procgrp : proc, global_counter, value, + kvs ) == 0 ) ret = OTF_RETURN_ABORT; return ret; } int -Handle_BeginFileOp( OTF_WStream * wstream, +HandleBeginFileOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint64_t matchid, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -747,18 +796,18 @@ Handle_BeginFileOp( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_BeginFileOp, 7, - &wstream, &time, &proc, &matchid, &global_scl, &kvs, &do_write ); + &(fha->wstream), &time, &proc, &matchid, &global_scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeBeginFileOperationKV( wstream, time, proc, matchid, + OTF_WStream_writeBeginFileOperationKV( fha->wstream, time, proc, matchid, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -766,7 +815,7 @@ Handle_BeginFileOp( OTF_WStream * wstream, } int -Handle_EndFileOp( OTF_WStream * wstream, +HandleEndFileOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t file, uint64_t matchid, uint64_t handleid, uint32_t operation, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ) @@ -802,27 +851,28 @@ Handle_EndFileOp( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_EndFileOp, 12, - &wstream, &time, &proc, &global_file, &matchid, &handleid, &operation, - &bytes, &global_scl, &kvs, &do_write ); + &(fha->wstream), &time, &proc, &global_file, &matchid, &handleid, + &operation, &bytes, &global_scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeEndFileOperationKV( wstream, time, proc, global_file, - matchid, handleid, operation, bytes, global_scl, kvs ) == 0 ) + OTF_WStream_writeEndFileOperationKV( fha->wstream, time, proc, + global_file, matchid, handleid, operation, bytes, global_scl, + kvs ) == 0 ) ret = OTF_RETURN_ABORT; return ret; } int -Handle_SendMsg( OTF_WStream * wstream, +HandleSendMsg( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t sender, uint32_t receiver, uint32_t comm, uint32_t tag, uint32_t length, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -857,7 +907,7 @@ Handle_SendMsg( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( sender, kvs ); + HandleKeyValueList( sender, kvs ); // correct time time = theTimeSync->correctTime( sender, time ); @@ -873,12 +923,12 @@ Handle_SendMsg( OTF_WStream * wstream, // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_SendMsg, 10, - &wstream, &time, &sender, &receiver, &global_comm, &tag, &length, + &(fha->wstream), &time, &sender, &receiver, &global_comm, &tag, &length, &global_scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeSendMsgKV( wstream, time, sender, receiver, + OTF_WStream_writeSendMsgKV( fha->wstream, time, sender, receiver, global_comm, tag, length, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -886,7 +936,7 @@ Handle_SendMsg( OTF_WStream * wstream, } int -Handle_RecvMsg( OTF_WStream * wstream, +HandleRecvMsg( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t receiver, uint32_t sender, uint32_t comm, uint32_t tag, uint32_t length, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -921,7 +971,7 @@ Handle_RecvMsg( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( receiver, kvs ); + HandleKeyValueList( receiver, kvs ); // correct time time = theTimeSync->correctTime( receiver, time ); @@ -937,12 +987,12 @@ Handle_RecvMsg( OTF_WStream * wstream, // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_RecvMsg, 10, - &wstream, &time, &receiver, &sender, &global_comm, &tag, &length, &scl, - &kvs, &do_write ); + &(fha->wstream), &time, &receiver, &sender, &global_comm, &tag, &length, + &scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeRecvMsgKV( wstream, time, receiver, sender, + OTF_WStream_writeRecvMsgKV( fha->wstream, time, receiver, sender, global_comm, tag, length, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -950,7 +1000,7 @@ Handle_RecvMsg( OTF_WStream * wstream, } int -Handle_BeginCollOp( OTF_WStream * wstream, +HandleBeginCollOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t operation, uint64_t matchid, uint32_t comm, uint32_t root, uint64_t sent, uint64_t recvd, uint32_t scl, OTF_KeyValueList * kvs ) @@ -996,19 +1046,19 @@ Handle_BeginCollOp( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_BeginCollOp, 12, - &wstream, &time, &proc, &global_operation, &matchid, &global_comm, &root, - &sent, &recvd, &global_scl, &kvs, &do_write ); + &(fha->wstream), &time, &proc, &global_operation, &matchid, &global_comm, + &root, &sent, &recvd, &global_scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeBeginCollectiveOperationKV( wstream, time, proc, + OTF_WStream_writeBeginCollectiveOperationKV( fha->wstream, time, proc, global_operation, matchid, global_comm, root, sent, recvd, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -1017,7 +1067,7 @@ Handle_BeginCollOp( OTF_WStream * wstream, } int -Handle_EndCollOp( OTF_WStream * wstream, +HandleEndCollOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint64_t matchid, OTF_KeyValueList * kvs ) { int ret = OTF_RETURN_OK; @@ -1029,18 +1079,18 @@ Handle_EndCollOp( OTF_WStream * wstream, &time, &proc, &matchid, &kvs ); // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_EndCollOp, 6, - &wstream, &time, &proc, &matchid, &kvs, &do_write ); + &(fha->wstream), &time, &proc, &matchid, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeEndCollectiveOperationKV( wstream, time, proc, + OTF_WStream_writeEndCollectiveOperationKV( fha->wstream, time, proc, matchid, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -1048,7 +1098,7 @@ Handle_EndCollOp( OTF_WStream * wstream, } int -Handle_RMAPut( OTF_WStream * wstream, +HandleRMAPut( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t origin, uint32_t dest, uint32_t comm, uint32_t tag, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -1083,27 +1133,27 @@ Handle_RMAPut( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_RMAPut, 11, - &wstream, &time, &proc, &origin, &dest, &global_comm, &tag, &bytes, + &(fha->wstream), &time, &proc, &origin, &dest, &global_comm, &tag, &bytes, &global_scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeRMAPutKV( wstream, time, proc, origin, dest, global_comm, - tag, bytes, global_scl, kvs ) == 0 ) + OTF_WStream_writeRMAPutKV( fha->wstream, time, proc, origin, dest, + global_comm, tag, bytes, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; return ret; } int -Handle_RMAPutRemoteEnd( OTF_WStream * wstream, +HandleRMAPutRemoteEnd( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t origin, uint32_t dest, uint32_t comm, uint32_t tag, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -1138,27 +1188,27 @@ Handle_RMAPutRemoteEnd( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_RMAPutRemoteEnd, 11, - &wstream, &time, &proc, &origin, &dest, &global_comm, &tag, &bytes, + &(fha->wstream), &time, &proc, &origin, &dest, &global_comm, &tag, &bytes, &global_scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeRMAPutRemoteEndKV( wstream, time, proc, origin, dest, - global_comm, tag, bytes, global_scl, kvs ) == 0 ) + OTF_WStream_writeRMAPutRemoteEndKV( fha->wstream, time, proc, origin, + dest, global_comm, tag, bytes, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; return ret; } int -Handle_RMAGet( OTF_WStream * wstream, +HandleRMAGet( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t origin, uint32_t dest, uint32_t comm, uint32_t tag, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -1193,27 +1243,27 @@ Handle_RMAGet( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_RMAGet, 11, - &wstream, &time, &proc, &origin, &dest, &global_comm, &tag, &bytes, + &(fha->wstream), &time, &proc, &origin, &dest, &global_comm, &tag, &bytes, &global_scl, &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeRMAGetKV( wstream, time, proc, origin, dest, global_comm, - tag, bytes, global_scl, kvs ) == 0 ) + OTF_WStream_writeRMAGetKV( fha->wstream, time, proc, origin, dest, + global_comm, tag, bytes, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; return ret; } int -Handle_RMAEnd( OTF_WStream * wstream, +HandleRMAEnd( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t remote, uint32_t comm, uint32_t tag, uint32_t scl, OTF_KeyValueList * kvs ) { @@ -1248,19 +1298,19 @@ Handle_RMAEnd( OTF_WStream * wstream, } // translate local key token(s) - Handle_KeyValueList( proc, kvs ); + HandleKeyValueList( proc, kvs ); // correct time time = theTimeSync->correctTime( proc, time ); // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_RMAEnd, 9, - &wstream, &time, &proc, &remote, &global_comm, &tag, &global_scl, &kvs, - &do_write ); + &(fha->wstream), &time, &proc, &remote, &global_comm, &tag, &global_scl, + &kvs, &do_write ); // write record if( do_write && - OTF_WStream_writeRMAEndKV( wstream, time, proc, remote, global_comm, + OTF_WStream_writeRMAEndKV( fha->wstream, time, proc, remote, global_comm, tag, global_scl, kvs ) == 0 ) ret = OTF_RETURN_ABORT; @@ -1271,7 +1321,7 @@ Handle_RMAEnd( OTF_WStream * wstream, // int -Handle_FunctionSummary( OTF_WStream * wstream, +HandleFunctionSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t func, uint32_t proc, uint64_t invocations, uint64_t exclTime, uint64_t inclTime ) { @@ -1297,12 +1347,12 @@ Handle_FunctionSummary( OTF_WStream * wstream, // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_FunctionSummary, 8, - &wstream, &time, &global_func, &proc, &invocations, &exclTime, + &(fha->wstream), &time, &global_func, &proc, &invocations, &exclTime, &inclTime, &do_write ); // write record if( do_write && - OTF_WStream_writeFunctionSummary( wstream, time, global_func, + OTF_WStream_writeFunctionSummary( fha->wstream, time, global_func, proc, invocations, exclTime, inclTime ) == 0 ) ret = OTF_RETURN_ABORT; @@ -1310,7 +1360,7 @@ Handle_FunctionSummary( OTF_WStream * wstream, } int -Handle_MessageSummary( OTF_WStream * wstream, +HandleMessageSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t proc, uint32_t peer, uint32_t comm, uint32_t type, uint64_t sentNum, uint64_t recvNum, uint64_t sentBytes, uint64_t recvBytes ) { @@ -1341,12 +1391,12 @@ Handle_MessageSummary( OTF_WStream * wstream, // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_MessageSummary, 10, - &wstream, &time, &proc, &peer, &global_comm, &sentNum, &recvNum, + &(fha->wstream), &time, &proc, &peer, &global_comm, &sentNum, &recvNum, &sentBytes, &recvBytes, &do_write ); // write record if( do_write && - OTF_WStream_writeMessageSummary( wstream, time, proc, peer, + OTF_WStream_writeMessageSummary( fha->wstream, time, proc, peer, global_comm, type, sentNum, recvNum, sentBytes, recvBytes ) == 0 ) ret = OTF_RETURN_ABORT; @@ -1354,7 +1404,7 @@ Handle_MessageSummary( OTF_WStream * wstream, } int -Handle_CollOpSummary( OTF_WStream * wstream, +HandleCollOpSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t proc, uint32_t comm, uint32_t collop, uint64_t sentNum, uint64_t recvNum, uint64_t sentBytes, uint64_t recvBytes ) { @@ -1398,12 +1448,12 @@ Handle_CollOpSummary( OTF_WStream * wstream, // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_CollOpSummary, 10, - &wstream, &time, &proc, &global_comm, &global_collop, &sentNum, &recvNum, - &sentBytes, &recvBytes, &do_write ); + &(fha->wstream), &time, &proc, &global_comm, &global_collop, &sentNum, + &recvNum, &sentBytes, &recvBytes, &do_write ); // write record if( do_write && - OTF_WStream_writeCollopSummary( wstream, time, proc, + OTF_WStream_writeCollopSummary( fha->wstream, time, proc, global_comm, global_collop, sentNum, recvNum, sentBytes, recvBytes ) == 0 ) ret = OTF_RETURN_ABORT; @@ -1412,7 +1462,7 @@ Handle_CollOpSummary( OTF_WStream * wstream, } int -Handle_FileOpSummary( OTF_WStream * wstream, +HandleFileOpSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t file, uint32_t proc, uint64_t nopen, uint64_t nclose, uint64_t nread, uint64_t nwrite, uint64_t nseek, uint64_t bytesRead, uint64_t bytesWrite ) @@ -1440,12 +1490,12 @@ Handle_FileOpSummary( OTF_WStream * wstream, // trigger write record hook theHooks->triggerWriteRecordHook( HooksC::Record_FileOpSummary, 12, - &wstream, &time, &global_file, &proc, &nopen, &nclose, &nread, &nwrite, - &nseek, &bytesRead, &bytesWrite, &do_write ); + &(fha->wstream), &time, &global_file, &proc, &nopen, &nclose, &nread, + &nwrite, &nseek, &bytesRead, &bytesWrite, &do_write ); // write record if( do_write && - OTF_WStream_writeFileOperationSummary( wstream, time, global_file, + OTF_WStream_writeFileOperationSummary( fha->wstream, time, global_file, proc, nopen, nclose, nread, nwrite, nseek, bytesRead, bytesWrite ) == 0 ) ret = OTF_RETURN_ABORT; diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.h index 5fdd3d10b6..b24ed71234 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_handlers.h @@ -19,158 +19,203 @@ #include "otf.h" +// +// first handler argument structures for reading ... +// + +// ... definitions +struct FirstHandlerArg_DefsS +{ + FirstHandlerArg_DefsS( LargeVectorC & _loc_defs ) + : loc_defs( _loc_defs ) {} + + LargeVectorC & loc_defs; + +}; + +// ... marker +struct FirstHandlerArg_MarkersS +{ + FirstHandlerArg_MarkersS( LargeVectorC & _loc_defs, + LargeVectorC & _loc_spots ) + : loc_defs( _loc_defs ), loc_spots( _loc_spots ) {} + + LargeVectorC & loc_defs; + LargeVectorC & loc_spots; + +}; + +// ... events +struct FirstHandlerArg_EventsS +{ + FirstHandlerArg_EventsS( OTF_WStream *& _wstream ) + : wstream( _wstream ) {} + + OTF_WStream * wstream; + +}; + +// ... statistics +typedef FirstHandlerArg_EventsS FirstHandlerArg_StatsS; + // key-value list "record handler" // translate local key tokens to global tokens -void Handle_KeyValueList( const uint32_t & proc, OTF_KeyValueList * kvs ); +void HandleKeyValueList( const uint32_t & proc, OTF_KeyValueList * kvs ); // definition record handlers // -int Handle_DefComment( LargeVectorC * locDefs, +int HandleDefComment( FirstHandlerArg_DefsS * fha, uint32_t streamid, const char * comment ); -int Handle_DefCreator( LargeVectorC * locDefs, +int HandleDefCreator( FirstHandlerArg_DefsS * fha, uint32_t streamid, const char * creator ); -int Handle_DefTimerResolution( LargeVectorC * locDefs, +int HandleDefTimerResolution( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint64_t ticksPerSecond ); -int Handle_DefTimeRange( LargeVectorC * locDefs, +int HandleDefTimeRange( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint64_t minTime, uint64_t maxTime ); -int Handle_DefProcess( LargeVectorC * locDefs, +int HandleDefProcess( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t parent ); -int Handle_DefProcessGroup( LargeVectorC * locDefs, +int HandleDefProcessGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t n, uint32_t * array ); -int Handle_DefSclFile( LargeVectorC * locDefs, +int HandleDefProcessGroupAttributes( FirstHandlerArg_DefsS * fha, + uint32_t streamid, uint32_t group, uint32_t attributes ); + +int HandleDefSclFile( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * filename ); -int Handle_DefScl( LargeVectorC * locDefs, +int HandleDefScl( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, uint32_t sclfile, uint32_t sclline ); -int Handle_DefFileGroup( LargeVectorC * locDefs, +int HandleDefFileGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name ); -int Handle_DefFile( LargeVectorC * locDefs, +int HandleDefFile( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t group ); -int Handle_DefFunctionGroup( LargeVectorC * locDefs, +int HandleDefFunctionGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name ); -int Handle_DefFunction( LargeVectorC * locDefs, +int HandleDefFunction( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t group, uint32_t scltoken ); -int Handle_DefCollOp( LargeVectorC * locDefs, +int HandleDefCollOp( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t collOp, const char * name, uint32_t type ); -int Handle_DefCounterGroup( LargeVectorC * locDefs, +int HandleDefCounterGroup( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name ); -int Handle_DefCounter( LargeVectorC * locDefs, +int HandleDefCounter( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t properties, uint32_t countergroup, const char * unit ); -int Handle_DefKeyValue( LargeVectorC * locDefs, +int HandleDefCounterAssignments( FirstHandlerArg_DefsS * fha, + uint32_t streamid, uint32_t counter, uint32_t n, uint32_t * array ); + +int HandleDefKeyValue( FirstHandlerArg_DefsS * fha, uint32_t streamid, uint32_t key, OTF_Type type, const char * name, const char * description ); // marker record handlers // -int Handle_DefMarker( LargeVectorC * locDefs, +int HandleDefMarker( FirstHandlerArg_MarkersS * fha, uint32_t streamid, uint32_t deftoken, const char * name, uint32_t type ); -int Handle_MarkerSpot( LargeVectorC * locSpots, +int HandleMarkerSpot( FirstHandlerArg_MarkersS * fha, uint64_t time, uint32_t proc, uint32_t marker, const char * text ); // event record handlers // -int Handle_EventComment( OTF_WStream * wstream, +int HandleEventComment( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, const char * comment, OTF_KeyValueList * kvs ); -int Handle_Enter( OTF_WStream * wstream, +int HandleEnter( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t func, uint32_t proc, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_Leave( OTF_WStream * wstream, +int HandleLeave( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t func, uint32_t proc, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_Counter( OTF_WStream * wstream, +int HandleCounter( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t counter, uint64_t value, OTF_KeyValueList * kvs ); -int Handle_BeginFileOp( OTF_WStream * wstream, +int HandleBeginFileOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint64_t matchid, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_EndFileOp( OTF_WStream * wstream, +int HandleEndFileOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t file, uint64_t matchid, uint64_t handleid, uint32_t operation, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_SendMsg( OTF_WStream * wstream, +int HandleSendMsg( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t sender, uint32_t receiver, uint32_t comm, uint32_t tag, uint32_t length, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_RecvMsg( OTF_WStream * wstream, +int HandleRecvMsg( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t receiver, uint32_t sender, uint32_t comm, uint32_t tag, uint32_t length, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_BeginCollOp( OTF_WStream * wstream, +int HandleBeginCollOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t operation, uint64_t matchid, uint32_t comm, uint32_t root, uint64_t sent, uint64_t recvd, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_EndCollOp( OTF_WStream * wstream, +int HandleEndCollOp( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint64_t matchid, OTF_KeyValueList * kvs ); -int Handle_RMAPut( OTF_WStream * wstream, +int HandleRMAPut( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t origin, uint32_t dest, uint32_t comm, uint32_t tag, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_RMAPutRemoteEnd( OTF_WStream * wstream, +int HandleRMAPutRemoteEnd( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t origin, uint32_t dest, uint32_t comm, uint32_t tag, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_RMAGet( OTF_WStream * wstream, +int HandleRMAGet( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t origin, uint32_t dest, uint32_t comm, uint32_t tag, uint64_t bytes, uint32_t scl, OTF_KeyValueList * kvs ); -int Handle_RMAEnd( OTF_WStream * wstream, +int HandleRMAEnd( FirstHandlerArg_EventsS * fha, uint64_t time, uint32_t proc, uint32_t remote, uint32_t comm, uint32_t tag, uint32_t scl, OTF_KeyValueList * kvs ); // summary record handlers // -int Handle_FunctionSummary( OTF_WStream * wstream, +int HandleFunctionSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t func, uint32_t proc, uint64_t invocations, uint64_t exclTime, uint64_t inclTime ); -int Handle_MessageSummary( OTF_WStream * wstream, +int HandleMessageSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t proc, uint32_t peer, uint32_t comm, uint32_t type, uint64_t sentNum, uint64_t recvNum, uint64_t sentBytes, uint64_t recvBytes ); -int Handle_CollOpSummary( OTF_WStream * wstream, +int HandleCollOpSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t proc, uint32_t comm, uint32_t collop, uint64_t sentNum, uint64_t recvNum, uint64_t sentBytes, uint64_t recvBytes ); -int Handle_FileOpSummary( OTF_WStream * wstream, +int HandleFileOpSummary( FirstHandlerArg_StatsS * fha, uint64_t time, uint32_t file, uint32_t proc, uint64_t nopen, uint64_t nclose, uint64_t nread, uint64_t nwrite, uint64_t nseek, uint64_t bytesRead, uint64_t bytesWrite ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.cc index 6d3f11d31a..86547ada9f 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.cc @@ -22,6 +22,9 @@ #ifdef VT_UNIFY_HOOKS_AEVENTS # include "hooks/vt_unify_hooks_aevents.h" #endif // VT_UNIFY_HOOKS_AEVENTS +#ifdef VT_UNIFY_HOOKS_MARGINS +# include "hooks/vt_unify_hooks_margins.h" +#endif // VT_UNIFY_HOOKS_MARGINS #ifdef VT_UNIFY_HOOKS_MSGMATCH # include "hooks/vt_unify_hooks_msgmatch.h" #endif // VT_UNIFY_HOOKS_MSGMATCH @@ -31,6 +34,10 @@ #ifdef VT_UNIFY_HOOKS_TDB # include "hooks/vt_unify_hooks_tdb.h" #endif // VT_UNIFY_HOOKS_TDB +#ifdef VT_UNIFY_HOOKS_THUMB +# include "hooks/vt_unify_hooks_thumb.h" +#endif // VT_UNIFY_HOOKS_THUMB + HooksC * theHooks = 0; // instance of class HooksC @@ -70,6 +77,11 @@ HooksC::registerHooks() m_hooks.push_back( new HooksMsgMatchC() ); #endif // VT_UNIFY_HOOKS_MSGMATCH +#ifdef VT_UNIFY_HOOKS_THUMB + if( HooksThumbC::isEnabled() ) + m_hooks.push_back( new HooksThumbC() ); +#endif // VT_UNIFY_HOOKS_THUMB + #ifdef VT_UNIFY_HOOKS_PROF if( HooksProfC::isEnabled() ) m_hooks.push_back( new HooksProfC() ); @@ -79,6 +91,11 @@ HooksC::registerHooks() if( HooksTdbC::isEnabled() ) m_hooks.push_back( new HooksTdbC() ); #endif // VT_UNIFY_HOOKS_TDB + +#ifdef VT_UNIFY_HOOKS_MARGINS + if( HooksProcessMarginsC::isEnabled() ) + m_hooks.push_back( new HooksProcessMarginsC() ); +#endif // VT_UNIFY_HOOKS_MSGMATCH } void diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.h index 7e5e42d66a..2dcb950ddb 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_hooks.h @@ -19,24 +19,28 @@ #include -// ids of generic hooks -// (NOTE: all ids must be unique over all hook classes) +// generic hooks' identifier bits // enum { - // HooksRawC's ids (example; not used) + // HooksRawC's (example; not used) // - VT_UNIFY_HOOKS_RAW_GENID__SOMETHING1 = 100, - VT_UNIFY_HOOKS_RAW_GENID__SOMETHING2 = 101, + VT_UNIFY_HOOKS_RAW_GENID__SOMETHING1 = 1<<0, + VT_UNIFY_HOOKS_RAW_GENID__SOMETHING2 = 1<<1, - // HooksAsyncEventsC's ids + // HooksAsyncEventsC's // - VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_OPEN = 200, - VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_CLOSE = 201, + VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_OPEN = 1<<2, + VT_UNIFY_HOOKS_AEVENTS_GENID__EVENT_STREAM_CLOSE = 1<<3, - // HooksTdbC's ids + // HooksTdbC's // - VT_UNIFY_HOOKS_TDB_GENID__STARTSTOPTIME_EPOCH = 300 + VT_UNIFY_HOOKS_TDB_GENID__STARTSTOPTIME_EPOCH = 1<<4, + + // HooksProcessMarginsC's + // + VT_UNIFY_HOOKS_MARGINS_GENID__EVENT_STREAM_OPEN = 1<<5, + VT_UNIFY_HOOKS_MARGINS_GENID__EVENT_STREAM_CLOSE = 1<<6 }; @@ -86,6 +90,7 @@ public: Record_DefTimerResolution, Record_DefTimeRange, Record_DefProcessGroup, + Record_DefProcessGroupAttributes, Record_DefProcess, Record_DefSclFile, Record_DefScl, @@ -96,6 +101,7 @@ public: Record_DefCollOp, Record_DefCounterGroup, Record_DefCounter, + Record_DefCounterAssignments, Record_DefKeyValue, // summary records diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_markers.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_markers.cc index bfe4e9a9a2..c785590e30 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_markers.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_markers.cc @@ -31,28 +31,22 @@ MarkersC * theMarkers = 0; // instance of class MarkersC MarkersC::MarkersC() : m_tkfacScope( 0 ) { - assert( theTokenFactory ); - MASTER { - // create token factory scope for marker definitions + // create global token factory scope for marker definitions // m_tkfacScope = new TokenFactoryScopeC( &m_globDefs ); assert( m_tkfacScope ); - theTokenFactory->addScope( DEF_REC_TYPE__DefMarker, m_tkfacScope ); } } MarkersC::~MarkersC() { - assert( theTokenFactory ); - MASTER { - // delete token factory scope of def. marker records - // - theTokenFactory->deleteScope( DEF_REC_TYPE__DefMarker ); + // delete global token factory scope for marker definitions + delete m_tkfacScope; } } @@ -118,8 +112,53 @@ MarkersC::cleanUp() char filename1[STRBUFSIZE]; char filename2[STRBUFSIZE]; + // remove local marker files, if necessary + // + if( Params.doclean ) + { + int streams_num = (int)MyStreamIds.size(); + int i; + +#if defined(HAVE_OMP) && HAVE_OMP +# pragma omp parallel for private(i, filename1) +#endif // HAVE_OMP + for( i = 0; i < streams_num; i++ ) + { + const uint32_t & streamid = MyStreamIds[i]; + + // try to remove file without compression suffix + OTF_getFilename( Params.in_file_prefix.c_str(), streamid, + OTF_FILETYPE_MARKER, STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + PVPrint( 3, " Removed %s\n", filename1 ); + + // try to remove file with compression suffix + OTF_getFilename( Params.in_file_prefix.c_str(), streamid, + OTF_FILETYPE_MARKER | OTF_FILECOMPRESSION_COMPRESSED, + STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + PVPrint( 3, " Removed %s\n", filename1 ); + } + } + MASTER { + // remove previous created marker output file + // + + // try to remove file without compression suffix + OTF_getFilename( Params.out_file_prefix.c_str(), 0, + OTF_FILETYPE_MARKER, STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + VPrint( 3, " Removed %s\n", filename1 ); + + // try to remove file with compression suffix + OTF_getFilename( Params.out_file_prefix.c_str(), 0, + OTF_FILETYPE_MARKER | OTF_FILECOMPRESSION_COMPRESSED, + STRBUFSIZE, filename1 ); + if( remove( filename1 ) == 0 ) + VPrint( 3, " Removed %s\n", filename1 ); + // rename temporary marker output file // @@ -143,42 +182,9 @@ MarkersC::cleanUp() VPrint( 3, " Renamed %s to %s\n", filename1, filename2 ); } - // remove local marker files, if necessary - // - if( Params.doclean ) - { - int streams_num = (int)MyStreamIds.size(); - int i; - -#if defined(HAVE_OMP) && HAVE_OMP -# pragma omp parallel for private(i, filename1) -#endif // HAVE_OMP - for( i = 0; i < streams_num; i++ ) - { - const uint32_t & streamid = MyStreamIds[i]; - - bool removed = false; - - // get file name without compression suffix - OTF_getFilename( Params.in_file_prefix.c_str(), streamid, - OTF_FILETYPE_MARKER, STRBUFSIZE, filename1 ); - - // try to remove file - if( !( removed = ( remove( filename1 ) == 0 ) ) ) - { - // if failed, get file name with compression suffix - OTF_getFilename( Params.in_file_prefix.c_str(), streamid, - OTF_FILETYPE_MARKER | OTF_FILECOMPRESSION_COMPRESSED, - STRBUFSIZE, filename1 ); - - // try to remove file again - removed = ( remove( filename1 ) == 0 ); - } - - if( removed ) - PVPrint( 3, " Removed %s\n", filename1 ); - } - } +#ifdef VT_MPI + SyncError( &error ); +#endif // VT_MPI return !error; } @@ -353,24 +359,31 @@ MarkersC::readLocal( const uint32_t & streamId, // first handler argument for ... // + + // create record handler array + // OTF_HandlerArray * handler_array = OTF_HandlerArray_open(); assert( handler_array ); + // create first handler argument + FirstHandlerArg_MarkersS fha( locDefs, locSpots ); + + // set record handler and its first argument for ... + // + // ... OTF_DEFMARKER_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_DefMarker, + (OTF_FunctionPointer*)HandleDefMarker, OTF_DEFMARKER_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locDefs, - OTF_DEFMARKER_RECORD ); + &fha, OTF_DEFMARKER_RECORD ); // ... OTF_MARKER_RECORD OTF_HandlerArray_setHandler( handler_array, - (OTF_FunctionPointer*)Handle_MarkerSpot, + (OTF_FunctionPointer*)HandleMarkerSpot, OTF_MARKER_RECORD ); OTF_HandlerArray_setFirstHandlerArg( handler_array, - &locSpots, - OTF_MARKER_RECORD ); + &fha, OTF_MARKER_RECORD ); // read local markers // diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.h index 06a8b7073e..fe2d4f7e5f 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.h @@ -34,7 +34,8 @@ public: virtual ~TokenFactoryScopeI() {} // create global definition - virtual uint32_t create( const void * localDef ) = 0; + virtual uint32_t create( const void * localDef, + uint32_t globalToken = 0 ) = 0; // set token translation for process virtual void setTranslation( const uint32_t & process, @@ -81,7 +82,7 @@ public: ~TokenFactoryScopeC(); // create global definition - uint32_t create( const void * localDef ); + uint32_t create( const void * localDef, uint32_t globalToken = 0 ); // set token translation for process inline void setTranslation( const uint32_t & process, diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.hh b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.hh index 80ccaeffb8..9ff9e8477e 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.hh +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_tkfac_scope.hh @@ -34,12 +34,10 @@ TokenFactoryScopeC::~TokenFactoryScopeC() template uint32_t -TokenFactoryScopeC::create( const void * localDef ) +TokenFactoryScopeC::create( const void * localDef, uint32_t globalToken ) { const T & local_def = *static_cast(localDef); - uint32_t global_token; - // search for already created global definition typename std::set::const_iterator it = m_globDefs->find( local_def ); @@ -47,7 +45,7 @@ TokenFactoryScopeC::create( const void * localDef ) // if( it != m_globDefs->end() ) { - global_token = it->deftoken; + globalToken = it->deftoken; } // otherwise, create global definition // @@ -56,16 +54,20 @@ TokenFactoryScopeC::create( const void * localDef ) T global_def = local_def; global_def.loccpuid = 0; - global_def.deftoken = global_token = getNextToken(); + + if( globalToken == 0 ) + global_def.deftoken = globalToken = getNextToken(); + else + global_def.deftoken = globalToken; m_globDefs->insert( global_def ).first; } // set token translation for process, if necessary if( local_def.loccpuid != 0 && local_def.deftoken != 0 ) - setTranslation( local_def.loccpuid, local_def.deftoken, global_token ); + setTranslation( local_def.loccpuid, local_def.deftoken, globalToken ); - return global_token; + return globalToken; } template diff --git a/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc b/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc index 29f21768c6..87e839f969 100644 --- a/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc +++ b/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc @@ -71,26 +71,22 @@ struct ConfigS { ConfigS() : lang_type( LANG_CC ), inst_type( INST_TYPE_MANUAL ), inst_avail( 0 ), - showme_flags( 0 ), be_verbose( false ), comp_only( false ), - outfile_given( false ), uses_mpi( false ), uses_threads( false ), - uses_openmp( false ), opari_keep_rcfile( false ) - { - opari_rcfile = "opari.rc"; - opari_tabfile = - std::make_pair( std::string( "opari.tab.c" ), - std::string( "opari.tab.o" ) ); - } + showme_flags( 0 ), be_verbose( false ), cleanup( true ), + comp_only( false ), outfile_given( false ), uses_mpi( false ), + uses_threads( false ), uses_openmp( false ), preprocess( false ), + opari_rcfile( DEFAULT_OPARI_RCFILE() ), + opari_tabfile( DEFAULT_OPARI_TABFILE() ), opari_keep_rcfile( false ) {} // set language type inline bool setLanguage( const LangTypeT lang ); - // language type is set to Fortran? + // is language type set to Fortran? inline bool fortran() const; // set compiler command inline void setCompilerCmd( const std::string& cmd ); - // add compiler argument + // add compiler argument(s) inline void addCompilerArg( const std::string& arg ); // add library to link @@ -105,23 +101,26 @@ struct ConfigS // set OPARI rc file inline void setOpariRcFile( const std::string& file ); - // add OPARI argument + // add/set OPARI argument(s) inline void addOpariArg( const std::string& arg ); - // add TAU instumentor argument + // add/set TAU instumentor argument(s) inline void addTauinstArg( const std::string& arg ); - // add TAU parser argument + // add/set TAU parser argument(s) inline void addTauinstParseArg( const std::string& arg ); + // add/set C preprocessor flag(s) + inline void addPrepFlag( const std::string& flag ); + // set flag for MPI usage - inline void setUsesMpi( const bool set, const bool ovwrt = false ); + inline void setUsesMpi( bool set, bool ovwrt = false ); // set flag for Thread usage - inline void setUsesThreads( const bool set, const bool ovwrt = false ); + inline void setUsesThreads( bool set, bool ovwrt = false ); // set flag for OpenMP usage - inline void setUsesOpenMP( const bool set, const bool ovwrt = false ); + inline void setUsesOpenMP( bool set, bool ovwrt = false ); // set available instrumentation type inline bool setInstAvail( const std::string& type ); @@ -131,9 +130,19 @@ struct ConfigS inline bool setInstType( const InstTypeT type ); inline bool setInstType( const std::string& type ); - // instrumentation type is available? + // get instrumentation type + inline InstTypeT getInstType() const; + + // get name of instrumentation type + inline std::string getInstTypeName() const; + + // is instrumentation type available? inline bool isInstAvail( const InstTypeT type ) const; + static const std::string DEFAULT_OPARI_RCFILE() { return "opari.rc"; } + static const std::pair DEFAULT_OPARI_TABFILE() + { return std::make_pair( "opari.tab.c", "opari.tab.o" ); } + LangTypeT lang_type; // language type InstTypeT inst_type; // instrumentation type // (e.g. compinst, manual, ...) @@ -141,6 +150,7 @@ struct ConfigS int inst_avail; // bitmask for available instr.-types int showme_flags; // bitmask for showme flags bool be_verbose; // Flag: be verbose? + bool cleanup; // Flag: remove intermediate files? bool comp_only; // Flag: compile only? bool outfile_given; // Flag: output file given? bool uses_mpi; // Flag: uses MPI? @@ -160,6 +170,11 @@ struct ConfigS std::string vt_pomplib; // VT's POMP library std::string vt_dynattlib; // VT's Dyninst attach library + std::string prep_cmd; // C preprocessor command + std::string prep_flags; // C preprocessor flags + bool preprocess; // preprocess source files before parsing + // by OPARI and/or PDT + std::string comp_cmdenv; // compiler command env. name std::string comp_flagsenv; // compiler flags env. name std::string comp_cmd; // compiler command @@ -222,6 +237,10 @@ int showOrExecuteCommand( std::string& cmd ); // (only necessary for Fortran) void getIncFilesFromTabFile( std::vector& incfiles ); +// add string to string-list or reset string-list to the given string +inline void addOrSetStringList( std::string& list, const std::string& str, + bool reset = false ); + // remove leading, trailing, and double spaces from a string inline void trimString( std::string& str ); @@ -284,16 +303,16 @@ readDataFile() std::string( vt_installdirs_get( VT_INSTALLDIR_DATADIR ) ) + "/" + std::string( ExeName ) + "-wrapper-data.txt"; - const uint32_t keys_num = 29; + const uint32_t keys_num = 31; const std::string keys[] = { "version", "language", "compiler_env", "compiler_flags_env", - "compiler", "compiler_flags", "linker_flags", "libs", "includedir", - "libdir", "vtlib", "vtmpilib", "vtmtlib", "vthyblib", "vtpomplib", - "vtdynattlib", "opari_bin", "opari_opts", "opari_tab_compiler", - "opari_tab_compiler_flags", "compinst_compiler_flags", - "dyninst_compiler_flags", "tauinst_bin", "tauinst_opts", - "tauinst_parse_bin", "tauinst_parse_opts", - "inst_avail", "inst_default", "partype_default" + "compiler", "compiler_flags", "linker_flags", "libs", "preprocessor", + "preprocessor_flags", "includedir", "libdir", "vtlib", "vtmpilib", + "vtmtlib", "vthyblib", "vtpomplib", "vtdynattlib", "opari_bin", + "opari_opts", "opari_tab_compiler", "opari_tab_compiler_flags", + "compinst_compiler_flags", "dyninst_compiler_flags", "tauinst_bin", + "tauinst_opts", "tauinst_parse_bin", "tauinst_parse_opts", "inst_avail", + "inst_default", "partype_default" }; std::ifstream in( data_file.c_str() ); @@ -421,97 +440,107 @@ readDataFile() Config.comp_libs = value; break; } - case 9: // includedir + case 9: // preprocessor + { + Config.prep_cmd = value; + break; + } + case 10: // preprocessor flags + { + Config.prep_flags = value; + break; + } + case 11: // includedir { Config.vt_incdir = "-I" + value; break; } - case 10: // libdir + case 12: // libdir { Config.vt_libdir = "-L" + value; break; } - case 11: // vtlib + case 13: // vtlib { Config.vt_seqlib = value; break; } - case 12: // vtmpilib + case 14: // vtmpilib { Config.vt_mpilib = value; break; } - case 13: // vtmtlib + case 15: // vtmtlib { Config.vt_mtlib = value; break; } - case 14: // vthyblib + case 16: // vthyblib { Config.vt_hyblib = value; break; } - case 15: // vtpomplib + case 17: // vtpomplib { Config.vt_pomplib = value; break; } - case 16: // vtdynattlib + case 18: // vtdynattlib { Config.vt_dynattlib = value; break; } - case 17: // opari_bin + case 19: // opari_bin { Config.opari_cmd = value; break; } - case 18: // opari_opts + case 20: // opari_opts { Config.opari_args = value; break; } - case 19: // opari_tab_compiler + case 21: // opari_tab_compiler { Config.opari_tab_compcmd = value; break; } - case 20: // opari_tab_compiler_flags + case 22: // opari_tab_compiler_flags { Config.opari_tab_compflags = value; break; } - case 21: // compinst_compiler_flags + case 23: // compinst_compiler_flags { Config.compinst_flags = value; break; } - case 22: // dyninst_compiler_flags + case 24: // dyninst_compiler_flags { Config.dyninst_flags = value; break; } - case 23: // tauinst_bin + case 25: // tauinst_bin { Config.tauinst_cmd = value; break; } - case 24: // tauinst_opts + case 26: // tauinst_opts { Config.tauinst_args = value; break; } - case 25: // tauinst_parse_bin + case 27: // tauinst_parse_bin { Config.tauinst_parsecmd = value; break; } - case 26: // tauinst_parse_opts + case 28: // tauinst_parse_opts { Config.tauinst_parseargs = value; break; } - case 27: // inst_avail + case 29: // inst_avail { char cvalue[128]; strncpy( cvalue, value.c_str(), sizeof( cvalue ) - 1 ); @@ -542,7 +571,7 @@ readDataFile() break; } - case 28: // inst_default + case 30: // inst_default { if( !Config.setInstType( value ) ) { @@ -554,7 +583,7 @@ readDataFile() } break; } - case 29: // partype_default + case 31: // partype_default { if( value.compare( "seq" ) == 0 ) { @@ -743,6 +772,12 @@ parseCommandLine( int argc, char** argv ) { Config.be_verbose = true; } + // -vt:nocleanup + // + else if( arg.compare( "-vt:nocleanup" ) == 0 ) + { + Config.cleanup = false; + } // -vt:inst // else if( arg.compare( "-vt:inst" ) == 0 ) @@ -775,9 +810,17 @@ parseCommandLine( int argc, char** argv ) return false; } - size_t opari_args_len = args[i+1].length()+1; + i++; + if( args[i][0] == '!' ) + { + Config.opari_args = ""; + Config.opari_rcfile = ConfigS::DEFAULT_OPARI_RCFILE(); + Config.opari_tabfile = ConfigS::DEFAULT_OPARI_TABFILE(); + } + + size_t opari_args_len = args[i].length()+1; char* opari_args = new char[opari_args_len]; - strncpy( opari_args, args[++i].c_str(), opari_args_len - 1 ); + strncpy( opari_args, args[i].c_str(), opari_args_len - 1 ); opari_args[opari_args_len - 1] = '\0'; char* token = strtok( opari_args, " " ); @@ -849,6 +892,38 @@ parseCommandLine( int argc, char** argv ) Config.addTauinstParseArg( args[++i] ); } + // -vt:preprocess + // + else if( arg.compare( "-vt:preprocess" ) == 0 ) + { + Config.preprocess = true; + } + // -vt:cpp + // + else if( arg.compare( "-vt:cpp" ) == 0 ) + { + if( i == args.size() - 1 ) + { + std::cerr << ExeName << ": expected -- -vt:cpp" + << std::endl; + return false; + } + + Config.prep_cmd = args[++i]; + } + // -vt:cppflags + // + else if( arg.compare( "-vt:cppflags" ) == 0 ) + { + if( i == args.size() - 1 ) + { + std::cerr << ExeName << ": expected -- -vt:cppflags" + << std::endl; + return false; + } + + Config.addPrepFlag( args[++i] ); + } // -vt:seq // else if( arg.compare( "-vt:seq" ) == 0 ) @@ -957,7 +1032,8 @@ parseCommandLine( int argc, char** argv ) // -vt:help, -vt:version, -vt:show, -vt:showme, vt:showme-compile, // -vt:showme-link, -vt:seq, -vt:mpi, -vt:mt, -vt:hyb, - // -vt:inst, -vt:opari, -vt:tau, -vt:pdt + // -vt:inst, -vt:opari, -vt:tau, -vt:pdt, -vt:preprocess, -vt:cpp, + // -vt:cppflags // (processed above; ignore here) // if( arg.compare( "-vt:help" ) == 0 || @@ -967,6 +1043,7 @@ parseCommandLine( int argc, char** argv ) arg.compare( "-vt:showme-compile" ) == 0 || arg.compare( "-vt:showme-link" ) == 0 || arg.compare( "-vt:verbose" ) == 0 || + arg.compare( "-vt:nocleanup" ) == 0 || arg.compare( "-vt:seq" ) == 0 || arg.compare( "-vt:mpi" ) == 0 || arg.compare( "-vt:mt" ) == 0 || @@ -975,7 +1052,10 @@ parseCommandLine( int argc, char** argv ) arg.compare( "-vt:opari" ) == 0 || arg.compare( "-vt:noopari" ) == 0 || arg.compare( "-vt:tau" ) == 0 || - arg.compare( "-vt:pdt" ) == 0 ) + arg.compare( "-vt:pdt" ) == 0 || + arg.compare( "-vt:preprocess" ) == 0 || + arg.compare( "-vt:cpp" ) == 0 || + arg.compare( "-vt:cppflags" ) == 0 ) { // do nothing @@ -983,7 +1063,9 @@ parseCommandLine( int argc, char** argv ) if( arg.compare("-vt:inst") == 0 || arg.compare("-vt:opari") == 0 || arg.compare("-vt:tau") == 0 || - arg.compare("-vt:pdt") == 0 ) + arg.compare("-vt:pdt") == 0 || + arg.compare("-vt:cpp") == 0 || + arg.compare("-vt:cppflags") == 0 ) { i++; } @@ -1071,6 +1153,8 @@ parseCommandLine( int argc, char** argv ) else if( arg.compare( 0, 2, "-I" ) == 0 || arg.compare( 0, 2, "-D" ) == 0 ) { + if( Config.preprocess ) + Config.addPrepFlag( arg ); if( Config.inst_type == INST_TYPE_TAUINST ) Config.addTauinstParseArg( arg ); Config.addCompilerArg( arg ); @@ -1079,6 +1163,8 @@ parseCommandLine( int argc, char** argv ) // else if( Config.fortran() && arg.compare( 0, 6, "-WF,-D" ) == 0 ) { + if( Config.preprocess ) + Config.addPrepFlag( arg.substr( 4 ) ); if( Config.inst_type == INST_TYPE_TAUINST ) Config.addTauinstParseArg( arg.substr( 4 ) ); Config.addCompilerArg( arg ); @@ -1205,43 +1291,64 @@ doWrap() std::string::size_type si; + // preprocess source file + // + if( Config.preprocess ) + { + // create output file name of C preprocessor + // + std::string cpp_file = src_file; + si = cpp_file.rfind( '.' ); + assert( si != std::string::npos ); + cpp_file.insert( si, ".cpp" ); + + files_to_remove.push_back( cpp_file ); + + // add macro definition '_OPENMP' to preprocessor flags, if OpenMP + // is enabled + if( Config.uses_openmp ) + Config.addPrepFlag( "-D_OPENMP" ); + + // compose C preprocessor command + // + cmd = + Config.prep_cmd + " " + + Config.prep_flags + " " + + src_file + " " + + " -o " + cpp_file; + + // show/execute C preprocessor command + if( ( rc = showOrExecuteCommand( cmd ) ) != 0 ) + return rc; + + src_file = cpp_file; + } + // run OPARI command on source file // if( Config.uses_openmp ) { - // compose OPARI command - // - cmd = - Config.opari_cmd + " " + - Config.opari_args + " " + - "-rcfile " + Config.opari_rcfile + " " + - "-table " + Config.opari_tabfile.first + " " + - src_file; - - // show/execute OPARI command - if( ( rc = showOrExecuteCommand( cmd ) ) != 0 ) - return rc; - - // create OPARI modified source file name + // create output file name of OPARI // - std::string mod_file = src_file; - si = mod_file.rfind( '.' ); + std::string pomp_file = src_file; + si = pomp_file.rfind( '.' ); assert( si != std::string::npos ); - mod_file.insert( si, ".mod" ); + pomp_file.insert( si, ".pomp" ); // convert Fortran source file suffix to upper case, in order to // invoke the C preprocessor before compiling // if( Config.fortran() ) { - si = mod_file.rfind( ".f" ); - if( si != std::string::npos ) mod_file.replace( si, 2, ".F" ); + si = pomp_file.rfind( ".f" ); + if( si != std::string::npos ) + pomp_file.replace( si, 2, ".F" ); } - files_to_remove.push_back( mod_file ); + files_to_remove.push_back( pomp_file ); - // create OPARI include file name + // create OPARI include file name (only necessary for C/C++) // if( !Config.fortran() ) { @@ -1249,25 +1356,70 @@ doWrap() files_to_remove.push_back( inc_file ); } - src_file = mod_file; + // compose OPARI command + // + cmd = + Config.opari_cmd + " " + + Config.opari_args + " " + + "-rcfile " + Config.opari_rcfile + " " + + "-table " + Config.opari_tabfile.first + " " + + src_file + " " + + pomp_file; + + // show/execute OPARI command + if( ( rc = showOrExecuteCommand( cmd ) ) != 0 ) + return rc; + + src_file = pomp_file; } // run PDT parser and TAU instrumentor command on source file // if( Config.inst_type == INST_TYPE_TAUINST ) { + // create output file name of the PDT parser + // + std::string pdb_file = src_file; + si = src_file.rfind( '/' ); + if( si != std::string::npos ) + pdb_file = src_file.substr( si+1 ); + si = pdb_file.rfind( '.' ); + assert( si != std::string::npos ); + pdb_file.replace( si, 4, ".pdb" ); + + files_to_remove.push_back( pdb_file ); + + // create output file name of the TAU instrumentor + // + + std::string tau_file = src_file; + si = tau_file.rfind( '.' ); + assert( si != std::string::npos ); + tau_file.insert( si, ".tau" ); + + // convert Fortran source file suffix to upper case, in order to + // invoke the C preprocessor before compiling + // (already done if OPARI was invoked) + // + if( Config.fortran() && !Config.uses_openmp ) + { + si = tau_file.rfind( ".f" ); + if( si != std::string::npos ) + tau_file.replace( si, 2, ".F" ); + } + + files_to_remove.push_back( tau_file ); + // adjust PDT parser options, if source file is instrumented by OPARI // if( Config.uses_openmp ) { - // current directory to find OPARI generated header files - // (only necessary for C/C++) - // + // add current working directory to include search path to find OPARI + // generated header files (only necessary for C/C++) if( !Config.fortran() ) Config.addTauinstParseArg( "-I." ); - // macro definition '_OPENMP' - // (the PDT parser has no own option to enable OpenMP) + // add macro definition '_OPENMP', if OpenMP is enabled Config.addTauinstParseArg( "-D_OPENMP" ); } @@ -1283,46 +1435,6 @@ doWrap() if( ( rc = showOrExecuteCommand( cmd ) ) != 0 ) return rc; - // create PDB file name - // - - std::string pdb_file = src_file; - si = src_file.rfind( '/' ); - if( si != std::string::npos ) - pdb_file = src_file.substr( si+1 ); - - si = pdb_file.rfind( '.' ); - assert( si != std::string::npos ); - pdb_file.replace( si, 4, ".pdb" ); - - files_to_remove.push_back( pdb_file ); - - // create TAU modified source file name - // - std::string tau_file = src_file; - if( Config.uses_openmp ) - { - si = tau_file.rfind( ".mod" ); - assert( si != std::string::npos ); - tau_file.replace( si, 4, ".tau" ); - } - else - { - si = tau_file.rfind( '.' ); - assert( si != std::string::npos ); - tau_file.insert( si, ".tau" ); - - // convert Fortran source file suffix to upper case, in order to - // invoke the C preprocessor before compiling - // - if( Config.fortran() ) - { - si = tau_file.rfind( ".f" ); - if( si != std::string::npos ) tau_file.replace( si, 2, ".F" ); - } - } - files_to_remove.push_back( tau_file ); - // compose TAU instrumentor command // cmd = @@ -1359,6 +1471,8 @@ doWrap() } } + // adjust compiler flags, if source file is instrumented by OPARI + // if( Config.uses_openmp ) { // add current working directory to include search path to find OPARI @@ -1503,12 +1617,17 @@ doWrap() return rc; } - // remove intermediate files (in non-verbose mode) + // remove intermediate files // - if( Config.showme_flags == 0 && !Config.be_verbose ) + if( Config.showme_flags == 0 && Config.cleanup ) { for( i = 0; i < files_to_remove.size(); i++ ) + { + if( Config.be_verbose ) + std::cout << "+++ remove " << files_to_remove[i] << std::endl; + remove( files_to_remove[i].c_str() ); + } } return 0; @@ -1604,10 +1723,7 @@ showUsage() std::cout << std::endl << " " << ExeName << " - " << str_lang << " compiler wrapper for VampirTrace." << std::endl << std::endl - << " Syntax: " << ExeName << " [-vt:help] [-vt:version] [-vt:" << str_lang_suffix << " ] [-vt:inst ] " << std::endl - << " [-vt:] " << "[-vt:opari ] [-vt:noopari]" << std::endl - << " [-vt:tau ] [-vt:pdt ] [-vt:verbose]" << std::endl - << " [-vt:showme|-vt:showme-compile|-vt:showme-link] ..." << std::endl + << " Syntax: " << ExeName << " [options] ..." << std::endl << std::endl << " options:" << std::endl << " -vt:help Show this help message." << std::endl @@ -1615,6 +1731,7 @@ showUsage() << " -vt:version Show VampirTrace version." << std::endl << std::endl << " -vt:" << str_lang_suffix << " Set the underlying compiler command." << std::endl + << " (default: " << Config.comp_cmd << ")" << std::endl << std::endl << " -vt:inst Set the instrumentation type." << std::endl << std::endl @@ -1625,14 +1742,28 @@ showUsage() << " dyninst binary by using Dyninst" << std::endl << " tauinst automatic source code instrumentation by using PDT/TAU" << std::endl << std::endl - << " -vt:opari Set options for the OPARI command." << std::endl - << " (see " << vt_installdirs_get(VT_INSTALLDIR_DATADIR) << "/doc/opari/Readme.html for more information)" << std::endl + << " default: " << Config.getInstTypeName() << std::endl + << std::endl + << " -vt:opari <[!]args> Set/add options for the OPARI command." << std::endl + << " (see " << vt_installdirs_get(VT_INSTALLDIR_DATADIR) << "/doc/opari/Readme.html for more information, default: " << Config.opari_args << ")" << std::endl << std::endl << " -vt:noopari Disable instrumentation of OpenMP contructs by OPARI." << std::endl << std::endl - << " -vt:tau Set options for the TAU instrumentor command." << std::endl + << " -vt:tau <[!]args> Set/add options for the TAU instrumentor command." << std::endl + << " (default: " << Config.tauinst_args << ")" << std::endl << std::endl - << " -vt:pdt Set options for the PDT parse command." << std::endl + << " -vt:pdt <[!]args> Set/add options for the PDT parse command." << std::endl + << " (default: " << Config.tauinst_parseargs << ")" << std::endl + << std::endl + << " -vt:preprocess Preprocess the source files before parsing" << std::endl + << " by OPARI and/or PDT." << std::endl + << std::endl + << " -vt:cpp Set C preprocessor command." << std::endl + << " (default: " << Config.prep_cmd << ")" << std::endl + << std::endl + << " -vt:cppflags <[!]flags>" << std::endl + << " Set/add flags for the C preprocessor." << std::endl + << " (default: " << Config.prep_flags << ")" << std::endl << std::endl << " -vt:" << std::endl << " Enforce application's parallelization type." << std::endl @@ -1646,6 +1777,8 @@ showUsage() << std::endl << " -vt:verbose Enable verbose mode." << std::endl << std::endl + << " -vt:nocleanup Do not remove intermediate files." << std::endl + << std::endl << " -vt:show[me] Do not invoke the underlying compiler." << std::endl << " Instead, show the command line(s) that would be" << std::endl << " executed to compile and link the program." << std::endl @@ -1792,6 +1925,20 @@ getIncFilesFromTabFile( std::vector& incfiles ) } } +void +addOrSetStringList( std::string& list, const std::string& str, bool reset ) +{ + if( reset ) + { + list = str; + } + else + { + if( list.length() > 0 ) list += " "; + list += str; + } +} + void trimString( std::string& str ) { @@ -1896,15 +2043,15 @@ ConfigS::setCompilerCmd( const std::string& cmd ) void ConfigS::addCompilerArg( const std::string& arg ) { - if( comp_args.length() > 0 ) comp_args += " "; - comp_args += arg; + assert( arg.length() > 0 ); + addOrSetStringList( Config.comp_args, arg ); } void ConfigS::addCompilerLib( const std::string& lib ) { - if( comp_libs.length() > 0 ) comp_libs += " "; - comp_libs += lib; + assert( lib.length() > 0 ); + addOrSetStringList( Config.comp_libs, lib ); } void @@ -1933,24 +2080,26 @@ ConfigS::addModSrcFile( const std::string& file ) // add (modified) source file name to compiler arguments // - std::string mod_file = file; - - si = mod_file.rfind( '.' ); + si = file.rfind( '.' ); assert( si != std::string::npos ); + std::string base = file.substr( 0, si ); + std::string suffix = file.substr( si ); + std::string mod_file = base; + + if( preprocess ) + mod_file += ".cpp"; + if( uses_openmp ) + mod_file += ".pomp"; if( inst_type == INST_TYPE_TAUINST ) - mod_file.insert( si, ".tau" ); - else - mod_file.insert( si, ".mod" ); + mod_file += ".tau"; // convert Fortran source file suffix to upper case, in order to // invoke the C preprocessor before compiling - // - if( fortran() ) - { - si = mod_file.rfind( ".f" ); - if( si != std::string::npos ) mod_file.replace( si, 2, ".F" ); - } + if( fortran() && suffix.compare( 0, 2, ".f" ) == 0 ) + suffix.replace( 0, 2, ".F" ); + + mod_file += suffix; addCompilerArg( mod_file ); } @@ -1986,26 +2135,33 @@ ConfigS::setOpariRcFile( const std::string& file ) void ConfigS::addOpariArg( const std::string& arg ) { - if( opari_args.length() > 0 ) opari_args += " "; - opari_args += arg; + assert( arg.length() > 0 ); + addOrSetStringList( Config.opari_args, arg, arg[0] == '!' ); } void ConfigS::addTauinstArg( const std::string& arg ) { - if( tauinst_args.length() > 0 ) tauinst_args += " "; - tauinst_args += arg; + assert( arg.length() > 0 ); + addOrSetStringList( Config.tauinst_args, arg, arg[0] == '!' ); } void ConfigS::addTauinstParseArg( const std::string& arg ) { - if( tauinst_parseargs.length() > 0 ) tauinst_parseargs += " "; - tauinst_parseargs += arg; + assert( arg.length() > 0 ); + addOrSetStringList( Config.tauinst_parseargs, arg, arg[0] == '!' ); } void -ConfigS::setUsesMpi( const bool set, const bool ovwrt ) +ConfigS::addPrepFlag( const std::string& flag ) +{ + assert( flag.length() > 0 ); + addOrSetStringList( Config.prep_flags, flag, flag[0] == '!' ); +} + +void +ConfigS::setUsesMpi( bool set, bool ovwrt ) { static bool first = true; @@ -2037,7 +2193,7 @@ ConfigS::setUsesMpi( const bool set, const bool ovwrt ) } void -ConfigS::setUsesThreads( const bool set, const bool ovwrt ) +ConfigS::setUsesThreads( bool set, bool ovwrt ) { static bool first = true; @@ -2059,7 +2215,7 @@ ConfigS::setUsesThreads( const bool set, const bool ovwrt ) } void -ConfigS::setUsesOpenMP( const bool set, const bool ovwrt ) +ConfigS::setUsesOpenMP( bool set, bool ovwrt ) { static bool first = true; @@ -2133,6 +2289,37 @@ ConfigS::setInstType( const std::string& type ) return false; } +InstTypeT +ConfigS::getInstType() const +{ + return inst_type; +} + +std::string +ConfigS::getInstTypeName() const +{ + std::string name; + + switch( inst_type ) + { + case INST_TYPE_COMPINST: + name = "compinst"; + break; + case INST_TYPE_MANUAL: + name = "manual"; + break; + case INST_TYPE_DYNINST: + name = "dyninst"; + break; + case INST_TYPE_TAUINST: + default: + name = "tauinst"; + break; + } + + return name; +} + bool ConfigS::isInstAvail( const InstTypeT type ) const { diff --git a/ompi/contrib/vt/vt/tools/vtwrapper/vtc++-wrapper-data.txt.in b/ompi/contrib/vt/vt/tools/vtwrapper/vtc++-wrapper-data.txt.in index 41c7df56a9..db148eba5f 100644 --- a/ompi/contrib/vt/vt/tools/vtwrapper/vtc++-wrapper-data.txt.in +++ b/ompi/contrib/vt/vt/tools/vtwrapper/vtc++-wrapper-data.txt.in @@ -6,12 +6,14 @@ compiler=@VT_WRAPPER_CXX_COMPILER@ compiler_flags=@PTHREAD_CFLAGS@ @VT_WRAPPER_CXX_EXTRA_COMPILER_FLAGS@ linker_flags=@VT_WRAPPER_CXX_EXTRA_LINKER_FLAGS@ libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_CXX_EXTRA_LIBS@ +preprocessor=@VT_WRAPPER_CXX_CPP@ +preprocessor_flags=@VT_WRAPPER_CXX_EXTRA_CPPFLAGS@ includedir=${includedir} libdir=${libdir} vtlib=@VT_WRAPPER_VTLIB@ -vtmpilib=@VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_CXXLIBS@ @PMPILIB@ +vtmpilib=@VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_CXXLIBS@ @MPILIBDIR@ @PMPILIB@ vtmtlib=@VT_WRAPPER_VTMTLIB@ @PTHREAD_LIBS@ -vthyblib=@VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_CXXLIBS@ @PMPILIB@ @PTHREAD_LIBS@ +vthyblib=@VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_CXXLIBS@ @MPILIBDIR@ @PMPILIB@ @PTHREAD_LIBS@ vtpomplib=@VTPOMPLIB@ vtdynattlib=@VTDYNATTLIB@ opari_bin=@VT_WRAPPER_OPARI_BIN@ diff --git a/ompi/contrib/vt/vt/tools/vtwrapper/vtcc-wrapper-data.txt.in b/ompi/contrib/vt/vt/tools/vtwrapper/vtcc-wrapper-data.txt.in index 3d8858e445..17dd81e089 100644 --- a/ompi/contrib/vt/vt/tools/vtwrapper/vtcc-wrapper-data.txt.in +++ b/ompi/contrib/vt/vt/tools/vtwrapper/vtcc-wrapper-data.txt.in @@ -6,12 +6,14 @@ compiler=@VT_WRAPPER_CC_COMPILER@ compiler_flags=@PTHREAD_CFLAGS@ @VT_WRAPPER_CC_EXTRA_COMPILER_FLAGS@ linker_flags=@VT_WRAPPER_CC_EXTRA_LINKER_FLAGS@ libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_CC_EXTRA_LIBS@ +preprocessor=@VT_WRAPPER_CC_CPP@ +preprocessor_flags=@VT_WRAPPER_CC_EXTRA_CPPFLAGS@ includedir=${includedir} libdir=${libdir} vtlib=@VT_WRAPPER_VTLIB@ -vtmpilib=@VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_CLIBS@ @PMPILIB@ +vtmpilib=@VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_CLIBS@ @MPILIBDIR@ @PMPILIB@ vtmtlib=@VT_WRAPPER_VTMTLIB@ @PTHREAD_LIBS@ -vthyblib=@VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_CLIBS@ @PMPILIB@ @PTHREAD_LIBS@ +vthyblib=@VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_CLIBS@ @MPILIBDIR@ @PMPILIB@ @PTHREAD_LIBS@ vtpomplib=@VTPOMPLIB@ vtdynattlib=@VTDYNATTLIB@ opari_bin=@VT_WRAPPER_OPARI_BIN@ diff --git a/ompi/contrib/vt/vt/tools/vtwrapper/vtf77-wrapper-data.txt.in b/ompi/contrib/vt/vt/tools/vtwrapper/vtf77-wrapper-data.txt.in index e754207706..74129730e2 100644 --- a/ompi/contrib/vt/vt/tools/vtwrapper/vtf77-wrapper-data.txt.in +++ b/ompi/contrib/vt/vt/tools/vtwrapper/vtf77-wrapper-data.txt.in @@ -6,12 +6,14 @@ compiler=@VT_WRAPPER_F77_COMPILER@ compiler_flags=@VT_WRAPPER_F77_EXTRA_COMPILER_FLAGS@ linker_flags=@VT_WRAPPER_F77_EXTRA_LINKER_FLAGS@ libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_F77_EXTRA_LIBS@ +preprocessor=@VT_WRAPPER_F77_CPP@ +preprocessor_flags=@VT_WRAPPER_F77_EXTRA_CPPFLAGS@ includedir=${includedir} libdir=${libdir} vtlib=@VT_WRAPPER_VTLIB@ -vtmpilib=@FMPILIB@ @VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_FLIBS@ @PMPILIB@ +vtmpilib=@FMPILIB@ @VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_FLIBS@ @MPILIBDIR@ @PMPILIB@ vtmtlib=@VT_WRAPPER_VTMTLIB@ @PTHREAD_LIBS@ -vthyblib=@FMPILIB@ @VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_FLIBS@ @PMPILIB@ @PTHREAD_LIBS@ +vthyblib=@FMPILIB@ @VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_FLIBS@ @MPILIBDIR@ @PMPILIB@ @PTHREAD_LIBS@ vtpomplib=@VTPOMPLIB@ vtdynattlib=@VTDYNATTLIB@ opari_bin=@VT_WRAPPER_OPARI_BIN@ diff --git a/ompi/contrib/vt/vt/tools/vtwrapper/vtf90-wrapper-data.txt.in b/ompi/contrib/vt/vt/tools/vtwrapper/vtf90-wrapper-data.txt.in index fbbb07acc7..7679518ae2 100644 --- a/ompi/contrib/vt/vt/tools/vtwrapper/vtf90-wrapper-data.txt.in +++ b/ompi/contrib/vt/vt/tools/vtwrapper/vtf90-wrapper-data.txt.in @@ -6,12 +6,14 @@ compiler=@VT_WRAPPER_FC_COMPILER@ compiler_flags=@VT_WRAPPER_FC_EXTRA_COMPILER_FLAGS@ linker_flags=@VT_WRAPPER_FC_EXTRA_LINKER_FLAGS@ libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_FC_EXTRA_LIBS@ +preprocessor=@VT_WRAPPER_FC_CPP@ +preprocessor_flags=@VT_WRAPPER_FC_EXTRA_CPPFLAGS@ includedir=${includedir} libdir=${libdir} vtlib=@VT_WRAPPER_VTLIB@ -vtmpilib=@FMPILIB@ @VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_FCLIBS@ @PMPILIB@ +vtmpilib=@FMPILIB@ @VT_WRAPPER_VTMPILIB@ @VTMPIUNIFYLIB@ @UNIMCI_FCLIBS@ @MPILIBDIR@ @PMPILIB@ vtmtlib=@VT_WRAPPER_VTMTLIB@ @PTHREAD_LIBS@ -vthyblib=@FMPILIB@ @VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_FCLIBS@ @PMPILIB@ @PTHREAD_LIBS@ +vthyblib=@FMPILIB@ @VT_WRAPPER_VTHYBLIB@ @VTMPIUNIFYLIB@ @UNIMCI_FCLIBS@ @MPILIBDIR@ @PMPILIB@ @PTHREAD_LIBS@ vtpomplib=@VTPOMPLIB@ vtdynattlib=@VTDYNATTLIB@ opari_bin=@VT_WRAPPER_OPARI_BIN@ diff --git a/ompi/contrib/vt/vt/tools/vtwrapper/vtnvcc-wrapper-data.txt.in b/ompi/contrib/vt/vt/tools/vtwrapper/vtnvcc-wrapper-data.txt.in index d629d73745..2421523f0c 100644 --- a/ompi/contrib/vt/vt/tools/vtwrapper/vtnvcc-wrapper-data.txt.in +++ b/ompi/contrib/vt/vt/tools/vtwrapper/vtnvcc-wrapper-data.txt.in @@ -6,6 +6,8 @@ compiler=@VT_WRAPPER_NVCC_COMPILER@ compiler_flags=-Xcompiler="@PTHREAD_CFLAGS@" @VT_WRAPPER_NVCC_EXTRA_COMPILER_FLAGS@ linker_flags=@VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS@ libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_NVCC_EXTRA_LIBS@ +preprocessor=@VT_WRAPPER_NVCC_CPP@ +preprocessor_flags=@VT_WRAPPER_NVCC_EXTRA_CPPFLAGS@ includedir=${includedir} libdir=${libdir} vtlib=@VT_WRAPPER_VTLIB@ diff --git a/ompi/contrib/vt/vt/vtlib/Makefile.am b/ompi/contrib/vt/vt/vtlib/Makefile.am index e7a2517aef..aa6d0d781f 100644 --- a/ompi/contrib/vt/vt/vtlib/Makefile.am +++ b/ompi/contrib/vt/vt/vtlib/Makefile.am @@ -164,29 +164,40 @@ IOWRAPSOURCES = IOWRAPCFLAGS = endif -GPUSOURCES = +if AMHAVEGPU +GPUSOURCES = vt_gpu.c GPUHEADERS = vt_gpu.h -GPUCFLAGS = +GPUCFLAGS = -DVT_GPU GPULIBRARY = +if AMBUILDCUDA +GPUHEADERS += vt_cuda_driver_api.h +GPUCFLAGS += -DVT_CUDA +endif + if AMBUILDCUDAWRAP #GPUSOURCES += vt_cudawrap.c vt_cuda.c -GPUHEADERS += vt_cuda_driver_api.h GPUCFLAGS += -DVT_CUDAWRAP $(CUDATKINCDIR) endif if AMBUILDCUDARTWRAP -GPUSOURCES += vt_cudartwrap.c vt_cudart.c vt_gpu.c +GPUSOURCES += vt_cudartwrap.c vt_cudart.c GPUHEADERS += vt_cudartwrap.h vt_cuda_runtime_api.h GPUCFLAGS += -DVT_CUDARTWRAP $(CUDATKINCDIR) +endif + if AMBUILDCUPTI -if AMBUILDCUDAWRAP -GPUSOURCES += vt_cudacupti.c -GPUHEADERS += vt_cudacupti.h vt_cupti_events.h +GPUSOURCES += vt_cupti_events.c vt_cupti_callback.c +GPUHEADERS += vt_cupti_events.h vt_cupti_callback.h vt_cupti.h GPULIBRARY += $(CUPTILIBDIR) $(CUPTILIB) -GPUCFLAGS += -DVT_CUDACUPTI $(CUPTIINCDIR) -endif +GPUCFLAGS += -DVT_CUPTI $(CUPTIINCDIR) endif + +else +GPUSOURCES = +GPUHEADERS = +GPUCFLAGS = +GPULIBRARY = endif if AMHAVEMEMHOOKS @@ -296,7 +307,6 @@ VT_COMMON_SOURCES = \ vt_fbindings.h \ vt_fork.h \ vt_getcpu.h \ - vt_gpu.h \ vt_iowrap.h \ vt_iowrap_helper.h \ vt_java.h \ diff --git a/ompi/contrib/vt/vt/vtlib/vt_cudacupti.h b/ompi/contrib/vt/vt/vtlib/vt_cudacupti.h deleted file mode 100644 index 34dab58a23..0000000000 --- a/ompi/contrib/vt/vt/vtlib/vt_cudacupti.h +++ /dev/null @@ -1,124 +0,0 @@ -/** - * VampirTrace - * http://www.tu-dresden.de/zih/vampirtrace - * - * Copyright (c) 2005-2011, ZIH, TU Dresden, Federal Republic of Germany - * - * Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing - * Centre, Federal Republic of Germany - * - * See the file COPYING in the package base directory for details - **/ - -#ifndef VT_CUDACUPTI_H -#define VT_CUDACUPTI_H - -#include "vt_inttypes.h" /* VampirTrace integer types */ - -#if (defined(VT_CUDACUPTI)) - -#include "vt_cupti_events.h" - -/* - * VampirTrace CUPTI event (single linked list element) - */ -typedef struct vtcuptievt_t -{ - CUpti_EventID cuptiEvtID; /**< CUPTI event ID */ - uint32_t vtCID; /**< VampirTrace counter ID */ - /*CUpti_EventDomainID cuptiDomainID; *< CUPTI domain ID */ - struct vtcuptievt_t *next; -}vt_cupti_evt_t; - -/* - * Structure that stores events to be trace for specific device capability - * (single linked list element) - */ -typedef struct vtcuptidev_t -{ - int dev_major; /**< Major CUDA device capability */ - int dev_minor; /**< Minor CUDA device capability */ - CUdevice cuDev; /**< CUDA device */ - vt_cupti_evt_t *vtcuptiEvtList; /**< list of events to be traced for this device*/ - size_t evtNum; /**< Number of tracable CUPTI events */ - struct vtcuptidev_t *next; -}vt_cupti_dev_t; - -/* - * VampirTrace CUPTI event group and its counters and properties. - */ -typedef struct vtcuptigrp_t -{ - CUpti_EventGroup evtGrp; /**< CUPTI event group, created for this context */ - CUpti_EventID *cuptiEvtIDs; /**< CUPTI event IDs to be traced */ - uint32_t *vtCIDs; /**< VampirTrace counter ids */ - size_t evtNum; /**< number of CUPTI events in this group */ - uint8_t enabled; /**< is the threads CUPTI capturing enabled */ - struct vtcuptigrp_t *next; -}vt_cupti_grp_t; - -/* - * The VampirTrace CUPTI context has the CUDA context as key and contains - * further information about its device and counters. - */ -typedef struct vtcuptictx_t -{ - CUcontext cuCtx; /**< CUDA context (primary key) */ - vt_cupti_dev_t *vtDevCap; /**< pointer to device capability (events, ...) */ - vt_cupti_grp_t *vtGrpList; /**< list of VT CUPTI event groups */ - uint64_t *counterData; /**< preallocated buffer for counter data */ - CUpti_EventID *cuptiEvtIDs; /**< preallocated buffer for CUPTI event IDs*/ - struct vtcuptictx_t *next; -}vt_cupti_ctx_t; - -/* - * Initialize Mutex, VampirTrace ids and registers the finalize function. - * This may be done implicitly by vt_cupti_count(). - */ -void vt_cupti_init(void); - -/* - * Finalizes the VampirTrace CUPTI implementation. - */ -void vt_cupti_finalize(void); - -/* - * Finalizes CUPTI device. - * - * @param cleanExit 1 to cleanup CUPTI event group, otherwise 0 - * @param ptid VampirTrace process/thread id - */ -void vt_cupti_finalize_device(uint32_t ptid, uint8_t cleanExit); - - -/* - * Returns the VampirTrace CUPTI context for the CUDA context associated with - * the calling host thread. - * - * @param ptid the VampirTrace thread id of the calling host thread - */ -vt_cupti_ctx_t* vt_cupti_getCurrentContext(uint32_t ptid); - -/* - * Request the CUTPI counter values and write it to the given VampirTrace - * stream with the given timestamps. - * - * @param vtcuptiCtx pointer to the VampirTrace CUPTI context - * @param strmid the stream id for the counter values - * @param time the VampirTrace timestamps - */ -void vt_cupti_writeCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, uint64_t *time); - -/* - * Reset the VampirTrace counter values (to zero) for active CUPTI counters. - * - * @param vtcuptiCtx pointer to the VampirTrace CUPTI context - * @param strmid the stream id for the counter values - * @param time the VampirTrace timestamps - */ -void vt_cupti_resetCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, uint64_t *time); - -#endif /* defined(VT_CUDACUPTI) */ - -#endif /* VT_CUDACUPTI_H */ - diff --git a/ompi/contrib/vt/vt/vtlib/vt_cudart.c b/ompi/contrib/vt/vt/vtlib/vt_cudart.c index a7bd27d107..d45f0849d6 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_cudart.c +++ b/ompi/contrib/vt/vt/vtlib/vt_cudart.c @@ -22,13 +22,14 @@ #include "vt_gpu.h" /* common for GPU */ #include "vt_memhook.h" /* Switch memory tracing on/off */ -/* CUDA counter support */ -#include "vt_cudacupti.h" +#if (defined(VT_CUPTI)) +#include "vt_cupti_events.h" /* Support for CUPTI events */ +#endif #include #include -/* mutexes for locking the CUDA runtime wrap environment */ +/* mutex for locking the CUDA runtime wrap environment */ #if (defined(VT_MT) || defined(VT_HYB)) VTThrdMutex* VTThrdMutexCudart = NULL; #endif /* VT_MT || VT_HYB */ @@ -40,7 +41,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL; * VampirTrace CUDA wrapper has to finalize before CUDA does its clean-up!!! */ #define REGISTER_FINALIZE \ - if(!finalize_registered){ \ + if(vt_cudart_trace_enabled && !finalize_registered){ \ CUDARTWRAP_LOCK(); \ if(!finalize_registered){ \ atexit(vt_cudartwrap_finalize); \ @@ -80,7 +81,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL; else if(syncLevel > 0){ \ time = vt_pform_wtime(); \ if(syncLevel > 1) vt_enter(ptid, &time, rid_sync); \ - checkCUDACall(cudaThreadSynchronize_ptr(), "vtcudaSync() failed!"); \ + checkCUDACall(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \ if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);} \ } \ CUDARTWRAP_LOCK(); \ @@ -99,13 +100,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL; if(do_traceE){ \ if(_kind == cudaMemcpyHostToDevice){ \ vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, \ - vt_gpu_commCID, 0, _bytes); \ + vt_gpu_commCID, 0, (uint64_t)_bytes); \ }else if(_kind == cudaMemcpyDeviceToHost){ \ vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, \ - vt_gpu_commCID, 0, _bytes); \ + vt_gpu_commCID, 0, (uint64_t)_bytes); \ }else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \ vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \ - vt_gpu_commCID, 0, _bytes); \ + vt_gpu_commCID, 0, (uint64_t)_bytes); \ } \ } \ } \ @@ -126,14 +127,16 @@ VTThrdMutex* VTThrdMutexCudart = NULL; } \ } -#if (defined(VT_CUDACUPTI)) -#define CUDA_MEMCPY_ASYNC(kind, bytes, stream, _call) \ - if(trace_cupti) CUDA_MEMCPY_ASYNC_CUPTI(kind, bytes, stream, _call) \ - else CUDA_MEMCPY_ASYNC_EVT(kind, bytes, stream, _call) -#else +#if (defined(VT_CUPTI)) +# define CUDA_MEMCPY_ASYNC(kind, bytes, stream, _call) \ + if(trace_cupti_events) \ + CUDA_MEMCPY_ASYNC_CUPTI(kind, bytes, stream, _call) \ + else \ + CUDA_MEMCPY_ASYNC_EVT(kind, bytes, stream, _call) +# else #define CUDA_MEMCPY_ASYNC(kind, bytes, stream, _call) \ CUDA_MEMCPY_ASYNC_EVT(kind, bytes, stream, _call) -#endif +# endif /* * Records a memory copy and stores it in the entry buffer. @@ -165,7 +168,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL; } \ } -#define CUDA_MEMCPY_ASYNC_CUPTI(_kind, _bytes, _stream, _call){ \ +#define CUDA_MEMCPY_ASYNC_CUPTI(_kind, _bytes, _stream, _call){ \ uint8_t do_trace = 0; /* is trace on */ \ uint64_t time = 0; \ uint32_t ptid = 0; \ @@ -180,7 +183,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL; do_trace = vt_enter(ptid, &time, VT_LIBWRAP_FUNC_ID); \ if(do_trace){ \ if(syncLevel > 1) vt_enter(ptid, &time, rid_sync); \ - checkCUDACall(cudaDeviceSynchronize_ptr(),"vtcudaSync() failed!"); \ + checkCUDACall(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \ if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);} \ if(_kind == cudaMemcpyHostToDevice){ \ vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, \ @@ -196,7 +199,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL; } \ _call /* the CUDA memCpy call itself */ \ if(vt_cudart_trace_enabled){ \ - checkCUDACall(cudaDeviceSynchronize_ptr(),"vtcudaSync() failed!"); \ + checkCUDACall(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \ time = vt_pform_wtime(); \ if(do_trace){ \ if(_kind == cudaMemcpyDeviceToDevice){ \ @@ -217,7 +220,9 @@ VTThrdMutex* VTThrdMutexCudart = NULL; } \ } -#define checkCUDACall(ecode, msg) __checkCUDACall(ecode, msg, __FILE__,__LINE__) +#define checkCUDACall(_err, _msg) \ + if(cudaSuccess != _err) \ + __checkCUDACall(_err, _msg, __FILE__,__LINE__) /* library wrapper object */ VTLibwrap* vt_cudart_lw = VT_LIBWRAP_NULL; @@ -235,7 +240,7 @@ static uint8_t finalize_registered = 0; uint8_t vt_cudart_trace_enabled = 0; /* flag: write GPU idle time as region in CUDA stream 0? */ -static uint8_t show_gpu_idle = 0; +static uint8_t vt_cudart_gpu_idle = 0; /* flag: synchronization and flush points during runtime enabled? */ static uint8_t syncLevel = 3; @@ -246,26 +251,23 @@ static uint8_t trace_kernels = 1; /* region filter for kernel filtering */ static RFG_Filter* vt_cudart_filter = NULL; -/* flag: abort program on CUDA error, if enabled */ -static uint8_t vt_cudart_error = 0; - /* flag: tracing of asynchronous memory copies enabled? */ static uint8_t trace_memcpyAsync = 1; /* flag: tracing of cudaMalloc*() and cudaFree*() enabled? */ static uint8_t trace_gpumem = 0; -/* flag: trace nvidia cupti counters */ -static uint8_t trace_cupti = 0; +/* flag: trace NVIDIA CUPTI events/counters */ +static uint8_t trace_cupti_events = 0; /* flag: sampling for CUPTI counter values enabled? */ -static uint8_t cupti_sampling = 0; +static uint8_t cupti_event_sampling = 0; /* flag: event based tracing (kernels, memcpyAsync) enabled? */ static uint8_t trace_events = 1; /* number of bytes used to buffer asynchronous tasks */ -static size_t asyncBufSize = VTGPU_MAX_BSIZE; +static size_t vt_cudart_bufSize = VTGPU_MAX_BSIZE; /* flag: CUDA wrapper already finalized? */ static uint8_t finalized = 0; @@ -278,10 +280,10 @@ static uint32_t rid_check, rid_create, rid_sync, rid_flush; static uint32_t rid_idle = VT_NO_ID; /* global counter IDs */ -static uint32_t cid_blocksPerGrid; /* number of blocks per grid */ -static uint32_t cid_threadsPerBlock; /* number of threads per block */ +static uint32_t cid_blocksPerGrid; /* number of blocks per grid */ +static uint32_t cid_threadsPerBlock; /* number of threads per block */ static uint32_t cid_threadsPerKernel; /* number of threads per kernel */ -static uint32_t cid_cudaMalloc; /* CUDA GPU memory allocation counter */ +static uint32_t cid_cudaMalloc; /* CUDA GPU memory allocation counter */ /* structure for VampirTrace - CUDA time synchronization */ typedef struct @@ -332,6 +334,14 @@ typedef struct uint32_t rid; /**< VampirTrace region id */ }VTCUDAKernel; +/* kernel configure stack element */ +typedef struct +{ + VTCUDAStrm *strm; /**< corresponding stream/thread */ + uint32_t blocksPerGrid; /**< number of blocks per grid */ + uint32_t threadsPerBlock; /**< number of threads per block */ +}VTCUDAknconf; + /* structure for an asynchronous CUDA memory copy call */ typedef struct { @@ -343,14 +353,6 @@ typedef struct size_t byteCount; /**< number of bytes */ }VTCUDAMemcpy; -/* kernel configure stack element */ -typedef struct -{ - VTCUDAStrm *strm; /**< corresponding stream/thread */ - uint32_t blocksPerGrid; /**< number of blocks per grid */ - uint32_t threadsPerBlock; /**< number of threads per block */ -}VTCUDAknconf; - /* structure of a VampirTrace CUDA malloc (initiated with cudaMalloc*() */ typedef struct vtcMallocStruct { @@ -384,10 +386,29 @@ typedef struct vtcudaDev_st /* list of CUDA devices */ static VTCUDADevice* cudaDevices = NULL; +/* + * The structure of a CUDA kernel element. The list will be filled in + * __cudaRegisterFunction() and used in cudaLaunch() to get function name from + * function pointer. + */ +typedef struct knSymbol_st { + const char* pointer; /**< the host function */ + const char* knSymbolName; /**< name of the CUDA kernel symbol */ + char name[VTGPU_KERNEL_STRING_SIZE]; /**< clean name of the cuda kernel */ + uint32_t rid; /**< region id for this kernel */ + struct knSymbol_st *next; /**< pointer to next kernel element */ +}VTCUDAkernelSymbol; + +/* + * List of all CUDA kernel symbols (their full qualified names and the mapped + * VampirTrace region IDs. + */ +static VTCUDAkernelSymbol *kernelListHead = NULL; + /* maximum events needed for task buffer size */ static size_t maxEvtNum = VTGPU_MAX_BSIZE / sizeof(VTCUDAKernel); -/* pointer to cuda functions which should not be traced */ +/* pointer to CUDA functions which should not be traced */ static cudaError_t (*cudaGetDeviceCount_ptr)(int*) = VT_LIBWRAP_NULL; static cudaError_t (*cudaGetDevice_ptr)(int*) = VT_LIBWRAP_NULL; static cudaError_t (*cudaGetDeviceProperties_ptr)(struct cudaDeviceProp *, int) = VT_LIBWRAP_NULL; @@ -410,9 +431,13 @@ static const char *(*cudaGetErrorString_ptr)(cudaError_t) = VT_LIBWRAP_NULL; */ static void VTCUDAflush(VTCUDADevice*, uint32_t); static VTCUDADevice* VTCUDAgetDevice(uint32_t ptid); +static VTCUDAkernelSymbol* getKernelByHostFunction(const char* hostFun); +static void extractKernelName(VTCUDAkernelSymbol *e, const char* devFunc); -/* Checks if a CUDA runtime API call returns successful and respectively prints +/* + * Checks if a CUDA runtime API call returns successful and respectively prints * the error. + * * @param ecode the CUDA error code * @param msg a message to get more detailed information about the error * @param the corresponding file @@ -421,17 +446,15 @@ static VTCUDADevice* VTCUDAgetDevice(uint32_t ptid); static void __checkCUDACall(cudaError_t ecode, const char* msg, const char *file, const int line) { - if(cudaSuccess != ecode){ - if(msg != NULL) vt_cntl_msg(1, msg); - if(vt_cudart_error) - vt_error_msg("[CUDA Error <%s>:%i] %s", file, line, - cudaGetErrorString_ptr(ecode)); - else - vt_warning("[CUDA <%s>:%i] %s", file, line, + if(msg != NULL) vt_cntl_msg(1, "[CUDART] %s", msg); + + if(vt_gpu_error){ + vt_error_msg("[CUDA Error <%s>:%i] %s", file, line, cudaGetErrorString_ptr(ecode)); + }else{ + vt_warning("[CUDA <%s>:%i] %s", file, line, + cudaGetErrorString_ptr(ecode)); } - /*vt_cntl_msg(1, "[<%s>:%i] '%s'", - cudaGetErrorString_ptr(cudaGetLastError_ptr()));*/ } /* @@ -470,74 +493,76 @@ void vt_cudartwrap_init(void) if(vt_cudart_trace_enabled){ size_t minTaskSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAMemcpy); size_t minBufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf); + +#if defined(VT_CUPTI) + /* do not use wrapper environment for CUDA API tracing */ + if(vt_env_cupti_api_callback()){ + vt_cudart_trace_enabled = 0; + return; + } + + if(vt_env_cupti_metrics() == NULL){ + trace_cupti_events = 0; + }else{ + trace_cupti_events = 1; + cupti_event_sampling = (uint8_t)vt_env_cupti_sampling(); + } +#endif syncLevel = (uint8_t)vt_env_cudatrace_sync(); trace_kernels = (uint8_t)vt_env_cudatrace_kernel(); trace_memcpyAsync = (uint8_t)vt_env_cudatrace_memcpyasync(); - - vt_cudart_error = (uint8_t)vt_env_cudatrace_error(); - -#if (defined(VT_CUDACUPTI)) - if(vt_env_cupti_metrics() == NULL){ - trace_cupti = 0; - }else{ - trace_cupti = 1; - cupti_sampling = (uint8_t)vt_env_cupti_sampling(); - } -#endif trace_events = 0; - if(trace_memcpyAsync){ - minTaskSize = sizeof(VTCUDAMemcpy); - minBufSize = sizeof(VTCUDAMemcpy); - trace_events = 1; - } - - if(trace_kernels){ - if(sizeof(VTCUDAKernel) < minTaskSize) minTaskSize = sizeof(VTCUDAKernel); - if(sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf) > minBufSize) - minBufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf); - trace_events = 1; + /* check whether CUPTI event gathering is enabled */ +#if defined(VT_CUPTI) + if(!trace_cupti_events) +#endif + { + if(trace_memcpyAsync){ + minTaskSize = sizeof(VTCUDAMemcpy); + minBufSize = sizeof(VTCUDAMemcpy); + trace_events = 1; + } + + if(trace_kernels){ + if(sizeof(VTCUDAKernel) < minTaskSize) minTaskSize = sizeof(VTCUDAKernel); + if(sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf) > minBufSize) + minBufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf); + trace_events = 1; + } } /* if events are used */ if(trace_events){ /* get user-defined task buffer size and check it */ - asyncBufSize = vt_env_cudatrace_bsize(); - if(asyncBufSize < minBufSize){ - if(asyncBufSize > 0){ + vt_cudart_bufSize = vt_env_cudatrace_bsize(); + + if(vt_cudart_bufSize < minBufSize){ + if(vt_cudart_bufSize > 0){ vt_warning("[CUDART] Minimal buffer size is %d bytes", minBufSize); } - asyncBufSize = VTGPU_DEFAULT_BSIZE; - }else if(VTGPU_MAX_BSIZE < asyncBufSize){ + vt_cudart_bufSize = VTGPU_DEFAULT_BSIZE; + }else if(VTGPU_MAX_BSIZE < vt_cudart_bufSize){ vt_warning("[CUDART] Current CUDA buffer size requires %d CUDA events.\n" "The recommended max. CUDA buffer size is %d. " - "(export VT_CUDA_BUFFER_SIZE=2097152)", - 2*asyncBufSize/minTaskSize, VTGPU_MAX_BSIZE); + "(export VT_CUDATRACE_BUFFER_SIZE=2097152)", + 2*vt_cudart_bufSize/minTaskSize, VTGPU_MAX_BSIZE); /* TODO: dynamic event creation for more than 2097152 bytes CUDA buffer size */ } /* determine maximum necessary VT-events (=2 CUDA events) */ - maxEvtNum = asyncBufSize / minTaskSize; - -#if (defined(VT_CUDACUPTI)) - if(trace_cupti){ - maxEvtNum = 0; - trace_events = 0; - /*asyncBufSize = (sizeof(VTCUDAKernel) > sizeof(VTCUDAMemcpy)) ? sizeof(VTCUDAKernel) : sizeof(VTCUDAMemcpy);*/ - asyncBufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf); - } -#endif + maxEvtNum = vt_cudart_bufSize / minTaskSize; vt_cntl_msg(2,"[CUDART] Current CUDA buffer size: %d bytes \n" "(Kernel: %d bytes, MemcpyAsync: %d bytes, " - "Pre-created events: %d)", asyncBufSize, + "Pre-created events: %d)", vt_cudart_bufSize, sizeof(VTCUDAKernel), sizeof(VTCUDAMemcpy), maxEvtNum); } - show_gpu_idle = (uint8_t)vt_env_cudatrace_idle() & trace_kernels; + vt_cudart_gpu_idle = (uint8_t)vt_env_cudatrace_idle() & trace_kernels; trace_gpumem = (uint8_t)vt_env_cudatrace_gpumem(); /* read filter file for CUDA kernel filtering */ @@ -606,7 +631,7 @@ void vt_cudartwrap_init(void) vt_gpu_init(); /* initialize GPU common stuff */ /* get region IDs for this CUDA Runtime API wrapper (internal tracing) */ - if(show_gpu_idle){ + if(vt_cudart_gpu_idle){ rid_idle = vt_def_region(VT_MASTER_THREAD, "gpu_idle", VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "CUDA_IDLE", VT_FUNCTION); } @@ -623,24 +648,32 @@ void vt_cudartwrap_init(void) { uint32_t cgid_kn = vt_def_counter_group(VT_MASTER_THREAD, "CUDA_KERNEL"); - cid_blocksPerGrid = vt_def_counter(VT_MASTER_THREAD, "blocks_per_grid", - VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, ""); - cid_threadsPerBlock = vt_def_counter(VT_MASTER_THREAD, "threads_per_block", - VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, ""); - cid_threadsPerKernel = vt_def_counter(VT_MASTER_THREAD, "threads_per_kernel", - VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, ""); + cid_blocksPerGrid = vt_def_counter(VT_MASTER_THREAD, "blocks_per_grid", "#", + VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, 0); + cid_threadsPerBlock = vt_def_counter(VT_MASTER_THREAD, "threads_per_block", "#", + VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, 0); + cid_threadsPerKernel = vt_def_counter(VT_MASTER_THREAD, "threads_per_kernel", "#", + VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, 0); } if(trace_gpumem){ - cid_cudaMalloc = vt_def_counter(VT_MASTER_THREAD, "gpu_mem_usage", + cid_cudaMalloc = vt_def_counter(VT_MASTER_THREAD, "gpu_mem_usage", "Bytes", VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, vt_def_counter_group(VT_MASTER_THREAD, "CUDA_MEMORY_USAGE"), - "byte"); + 0); } #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif + +#if (defined(VT_CUPTI)) + if(trace_kernels){ + if(trace_cupti_events){ + vt_cudart_bufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf); + } + } +#endif /* * Register the finalize function of the CUDA wrapper to be called before @@ -650,7 +683,7 @@ void vt_cudartwrap_init(void) */ atexit(vt_cudartwrap_finalize); - /* show CUDA Runtime API, that CUDA Driver API is used as well */ + /* show CUDA Driver API wrapper, that CUDA Runtime API is used as well */ vt_cudartwrap_used = 1; } } @@ -725,23 +758,23 @@ static void VTCUDAcleanupDevice(uint32_t ptid, VTCUDADevice *vtDev, } } -#if (defined(VT_CUDACUPTI)) - if(trace_cupti && cleanEvents && vt_gpu_debug == 0){ +#if (defined(VT_CUPTI)) + if(trace_cupti_events && cleanEvents && vt_gpu_debug == 0){ uint64_t time = vt_pform_wtime(); VTCUDAStrm *curStrm = vtDev->strmList; - vt_cupti_ctx_t* vtcuptiCtx = vt_cupti_getCurrentContext(ptid); + vt_cuptievt_ctx_t* vtcuptiCtx = vt_cuptievt_getCurrentContext(ptid); while(curStrm != NULL){ - vt_cupti_resetCounter(vtcuptiCtx, curStrm->tid, &time); + vt_cuptievt_resetCounter(vtcuptiCtx, curStrm->tid, &time); curStrm = curStrm->next; } - vt_cupti_finalize_device(ptid, cleanEvents); + vt_cuptievt_finalize_device(ptid, cleanEvents); } #endif /* write idle end time to CUDA stream 0 */ - if(show_gpu_idle == 1){ + if(vt_cudart_gpu_idle == 1){ uint64_t idle_end = vt_pform_wtime(); vt_exit(vtDev->strmList->tid, &idle_end); } @@ -855,10 +888,10 @@ void vt_cudartwrap_finalize(void) } VTCUDAcleanupDevice(ptid, vtDev, 0); } - } + } -#if (defined(VT_CUDACUPTI)) - if(trace_cupti) vt_cupti_finalize(); +#if (defined(VT_CUPTI)) + if(trace_cupti_events) vt_cupti_events_finalize(); #endif /* cleanup GPU device list */ @@ -908,8 +941,8 @@ static uint64_t VTCUDAsynchronizeEvt(cudaEvent_t syncEvt) /* error handling */ if(cudaSuccess != ret){ if(cudaErrorInvalidResourceHandle == ret){ - vt_warning("[CUDART] Synchronization stop event is invalid. Context has" - " been destroyed, \nbefore asynchronous tasks could be flushed! " + vt_warning("[CUDART] Synchronization stop event is invalid. Context has " + "been destroyed, \nbefore asynchronous tasks could be flushed! " "Traces might be incomplete!"); }else{ checkCUDACall(ret, NULL); @@ -978,6 +1011,7 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid) { uint64_t serialKernelTime = 0; + uint8_t idleOn = 1; /* GPU idle region is initially entered */ /* conversion factor between VampirTrace and CUDA time */ const double factorX = (double)(syncStopTime - sync->strtTime)/ @@ -1036,11 +1070,20 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid) if(strttime < serialKernelTime && vtDev->concurrentKernels == 0){ strttime = serialKernelTime; } - serialKernelTime = stoptime; + + /* GPU idle time will be written to first CUDA stream in list */ + if(vt_cudart_gpu_idle){ + if(idleOn){ + vt_exit(vtDev->strmList->tid, &strttime); + idleOn = 0; + }else if(strttime > serialKernelTime){ + /* idle is off and kernels are consecutive */ + vt_enter(vtDev->strmList->tid, &serialKernelTime, rid_idle); + vt_exit(vtDev->strmList->tid, &strttime); + } + } /* write VampirTrace events to CUDA threads */ - /* gpu idle time will be written to first cuda stream in list */ - if(show_gpu_idle) vt_exit(vtDev->strmList->tid, &strttime); vt_enter(tid, &strttime, kn->rid); vt_count(tid, &strttime, cid_blocksPerGrid, kn->blocksPerGrid); vt_count(tid, &strttime, cid_threadsPerBlock, kn->threadsPerBlock); @@ -1050,7 +1093,8 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid) vt_count(tid, &stoptime, cid_threadsPerBlock, 0); vt_count(tid, &stoptime, cid_threadsPerKernel, 0); vt_exit(tid, &stoptime); - if(show_gpu_idle) vt_enter(vtDev->strmList->tid, &stoptime, rid_idle); + + if(serialKernelTime < stoptime) serialKernelTime = stoptime; /* go to next entry in buffer */ entry += sizeof(VTCUDAKernel); @@ -1074,7 +1118,11 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid) /* go to next entry in buffer */ entry += sizeof(VTCUDAMemcpy); } - + } /* while(entry < vtDev->buf_pos) */ + + /* enter GPU idle region after last kernel, if exited before */ + if(idleOn == 0){ + vt_enter(vtDev->strmList->tid, &serialKernelTime, rid_idle); } } @@ -1186,13 +1234,13 @@ static VTCUDADevice* VTCUDAcreateDevice(uint32_t ptid, int device) vtDev->sync.lastTime = vtDev->sync.strtTime; /* allocate buffers for asynchronous entries */ - vtDev->asyncbuf = malloc(asyncBufSize); + vtDev->asyncbuf = malloc(vt_cudart_bufSize); if(vtDev->asyncbuf == NULL){ vt_error_msg("malloc of asynchronous CUDA call buffer failed! " "Reduce buffer size with VT_BUFFER_SIZE!"); } vtDev->buf_pos = vtDev->asyncbuf; - vtDev->buf_size = vtDev->asyncbuf + asyncBufSize; + vtDev->buf_size = vtDev->asyncbuf + vt_cudart_bufSize; vtDev->conf_stack = vtDev->buf_size; vtDev->evtbuf = (VTCUDABufEvt*)malloc(maxEvtNum*sizeof(VTCUDABufEvt)); @@ -1211,11 +1259,11 @@ static VTCUDADevice* VTCUDAcreateDevice(uint32_t ptid, int device) } } -#if (defined(VT_CUDACUPTI)) - if(trace_cupti){ - vtDev->asyncbuf = malloc(asyncBufSize); +#if (defined(VT_CUPTI)) + if(trace_cupti_events){ + vtDev->asyncbuf = malloc(vt_cudart_bufSize); vtDev->buf_pos = vtDev->asyncbuf; - vtDev->buf_size = vtDev->asyncbuf + asyncBufSize; + vtDev->buf_size = vtDev->asyncbuf + vt_cudart_bufSize; vtDev->conf_stack = vtDev->buf_size; } #endif @@ -1248,7 +1296,7 @@ static VTCUDADevice* VTCUDAinitDevice(uint32_t ptid, int cudaDev) vtDev->strmList = VTCUDAcreateStream(cudaDev, 0, ptid); /* write enter event for GPU_IDLE on stream 0 (has to be written first */ - if(show_gpu_idle == 1) vt_enter(vtDev->strmList->tid, &vt_start_time, rid_idle); + if(vt_cudart_gpu_idle == 1) vt_enter(vtDev->strmList->tid, &vt_start_time, rid_idle); /* set the counter value for cudaMalloc to 0 in stream 0 */ if(trace_gpumem) vt_count(vtDev->strmList->tid, &time, cid_cudaMalloc, 0); @@ -1378,7 +1426,7 @@ static VTCUDADevice* VTCUDAgetDevice(uint32_t ptid) * * @param kind kind/direction of memory copy * @param count number of bytes for this data transfer - * @param stream the cuda stream + * @param stream the CUDA stream * * @return pointer to the VampirTrace CUDA memory copy structure */ @@ -1394,6 +1442,7 @@ static VTCUDAMemcpy* addMemcpy2Buf(enum cudaMemcpyKind kind, int count, ptid = VT_MY_THREAD; vtDev = VTCUDAcheckThread(stream, ptid, &ptrStrm); + /* check if there is enough buffer space */ if(vtDev->buf_pos + sizeof(VTCUDAMemcpy) > vtDev->conf_stack){ VTCUDAflush(vtDev, ptid); @@ -1520,19 +1569,6 @@ static void vtcudaFree(void *devPtr) vt_warning("[CUDART] free cuda memory, which has not been allocated!"); } -/* The structure of a cuda kernel element. The list will be filled in - * __cudaRegisterFunction() and used in cudaLaunch() to get function name from - * function pointer. - */ -typedef struct kernelele { - const char* pointer; /**< the host function */ - struct kernelele *next; /**< pointer to next kernel element */ - char name[VTGPU_KERNEL_STRING_SIZE]; /**< name of the cuda kernel */ - /*char deviceName[DEVICE_NAME_SIZE]; *< name of the cuda device */ - uint32_t rid; /**< region id for this kernel */ -}kernelelement; -static kernelelement *kernelListHead = NULL; - /* * Parse the device function name: * "_Z..." (no namespace) @@ -1541,7 +1577,7 @@ static kernelelement *kernelListHead = NULL; * @param elem pointer to the kernel element * @param devFunc the CUDA internal kernel function name */ -static void extractKernelName(kernelelement *e, const char* devFunc) +static void extractKernelName(VTCUDAkernelSymbol *e, const char* devFunc) { int i = 0; /* position in device function (source string) */ int nlength = 0; /* length of namespace or kernel */ @@ -1641,13 +1677,14 @@ static void extractKernelName(kernelelement *e, const char* devFunc) * @param hostFun the name of the host function * @param devFunc the name of kernel (device function) */ -static void insertKernelElement(const char* hostFun, const char* devFunc - /*, const char *deviceName*/) +static void insertKernelSymbol(const char* hostFun, const char* devFunc) { - kernelelement* e = (kernelelement*) malloc(sizeof(kernelelement)); + VTCUDAkernelSymbol* e = (VTCUDAkernelSymbol*) malloc(sizeof(VTCUDAkernelSymbol)); e->pointer = hostFun; + e->knSymbolName = devFunc; + /*strncpy(e->deviceName, deviceName, DEVICE_NAME_SIZE);*/ - extractKernelName(e,devFunc); + extractKernelName(e, devFunc); if(vt_cudart_filter){ int32_t climit; @@ -1674,15 +1711,15 @@ static void insertKernelElement(const char* hostFun, const char* devFunc } /* - * Get kernel element from kernel pointer (to lookup name and token). + * Get kernel element from host function pointer (to lookup name and token). * - * @param hostFun the identifier string of the cuda kernel - * @return the kernelNULL, if nothing was found + * @param hostFun the identifier string of the CUDA kernel + * @return the kernel or NULL, if nothing was found * @todo linear search could be replaced with hash */ -static kernelelement* getKernelElement(const char* hostFun) +static VTCUDAkernelSymbol* getKernelByHostFunction(const char* hostFun) { - kernelelement *actual = NULL; + VTCUDAkernelSymbol *actual = NULL; /* lock list operation if multi-threaded */ CUDARTWRAP_LOCK(); @@ -1703,8 +1740,8 @@ static kernelelement* getKernelElement(const char* hostFun) } /* - * This function is being called before execution of a cuda program for every - * cuda kernel (host_runtime.h) + * This function is being called before execution of a CUDA program for every + * CUDA kernel (host_runtime.h) */ void __cudaRegisterFunction(void **, const char *, char *, const char *, int, uint3 *, uint3 *, dim3 *, dim3 *, int *); @@ -1737,7 +1774,7 @@ void __cudaRegisterFunction(void **fatCubinHandle, thread_limit,tid,bid,bDim,gDim,wSize)); if(vt_cudart_trace_enabled && trace_kernels){ - insertKernelElement(hostFun, deviceFun/*, deviceName*/); + insertKernelSymbol(hostFun, deviceFun/*, deviceName*/); } } @@ -1818,14 +1855,13 @@ cudaError_t cudaMallocArray(struct cudaArray **array, const struct cudaChannelF #endif /* -- cuda_runtime_api.h:cudaMemcpy3D -- */ - cudaError_t cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { cudaError_t ret; enum cudaMemcpyKind kind = p->kind; struct cudaExtent extent = p->extent; - int count = extent.height*extent.width*extent.depth; + int count = extent.height * extent.width * extent.depth; CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpy3D", cudaError_t , (const struct cudaMemcpy3DParms *), @@ -1839,13 +1875,12 @@ cudaError_t cudaMemcpy3D(const struct cudaMemcpy3DParms *p) } /* -- cuda_runtime_api.h:cudaMemcpy3DAsync -- */ - cudaError_t cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream) { cudaError_t ret; enum cudaMemcpyKind kind = p->kind; struct cudaExtent extent = p->extent; - int count = extent.height*extent.width*extent.depth; + size_t count = extent.height * extent.width * extent.depth; CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpy3DAsync", cudaError_t , (const struct cudaMemcpy3DParms *, cudaStream_t ), @@ -1859,7 +1894,6 @@ cudaError_t cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t s } /* -- cuda_runtime_api.h:cudaMalloc -- */ - cudaError_t cudaMalloc(void **devPtr, size_t size) { cudaError_t ret; @@ -1881,7 +1915,6 @@ cudaError_t cudaMalloc(void **devPtr, size_t size) } /* -- cuda_runtime_api.h:cudaMallocPitch -- */ - cudaError_t cudaMallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height) { cudaError_t ret; @@ -1903,7 +1936,6 @@ cudaError_t cudaMallocPitch(void **devPtr, size_t *pitch, size_t width, size_t } /* -- cuda_runtime_api.h:cudaFree -- */ - cudaError_t cudaFree(void *devPtr) { cudaError_t ret; @@ -1925,7 +1957,6 @@ cudaError_t cudaFree(void *devPtr) } /* -- cuda_runtime_api.h:cudaFreeArray -- */ - cudaError_t cudaFreeArray(struct cudaArray *array) { cudaError_t ret; @@ -1947,7 +1978,6 @@ cudaError_t cudaFreeArray(struct cudaArray *array) } /* -- cuda_runtime_api.h:cudaMemcpy -- */ - cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -1964,7 +1994,6 @@ cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcp } /* -- cuda_runtime_api.h:cudaMemcpyToArray -- */ - cudaError_t cudaMemcpyToArray(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -1981,7 +2010,6 @@ cudaError_t cudaMemcpyToArray(struct cudaArray *dst, size_t wOffset, size_t hOf } /* -- cuda_runtime_api.h:cudaMemcpyFromArray -- */ - cudaError_t cudaMemcpyFromArray(void *dst, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -1998,7 +2026,6 @@ cudaError_t cudaMemcpyFromArray(void *dst, const struct cudaArray *src, size_t } /* -- cuda_runtime_api.h:cudaMemcpyArrayToArray -- */ - cudaError_t cudaMemcpyArrayToArray(struct cudaArray *dst, size_t wOffsetDst, size_t hOffsetDst, const struct cudaArray *src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -2015,7 +2042,6 @@ cudaError_t cudaMemcpyArrayToArray(struct cudaArray *dst, size_t wOffsetDst, si } /* -- cuda_runtime_api.h:cudaMemcpy2D -- */ - cudaError_t cudaMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -2032,7 +2058,6 @@ cudaError_t cudaMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spit } /* -- cuda_runtime_api.h:cudaMemcpy2DToArray -- */ - cudaError_t cudaMemcpy2DToArray(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -2049,7 +2074,6 @@ cudaError_t cudaMemcpy2DToArray(struct cudaArray *dst, size_t wOffset, size_t h } /* -- cuda_runtime_api.h:cudaMemcpy2DFromArray -- */ - cudaError_t cudaMemcpy2DFromArray(void *dst, size_t dpitch, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -2066,7 +2090,6 @@ cudaError_t cudaMemcpy2DFromArray(void *dst, size_t dpitch, const struct cudaAr } /* -- cuda_runtime_api.h:cudaMemcpy2DArrayToArray -- */ - cudaError_t cudaMemcpy2DArrayToArray(struct cudaArray *dst, size_t wOffsetDst, size_t hOffsetDst, const struct cudaArray *src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -2083,7 +2106,6 @@ cudaError_t cudaMemcpy2DArrayToArray(struct cudaArray *dst, size_t wOffsetDst, } /* -- cuda_runtime_api.h:cudaMemcpyToSymbol -- */ - cudaError_t cudaMemcpyToSymbol(const char *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -2100,7 +2122,6 @@ cudaError_t cudaMemcpyToSymbol(const char *symbol, const void *src, size_t coun } /* -- cuda_runtime_api.h:cudaMemcpyFromSymbol -- */ - cudaError_t cudaMemcpyFromSymbol(void *dst, const char *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { cudaError_t ret; @@ -2117,7 +2138,6 @@ cudaError_t cudaMemcpyFromSymbol(void *dst, const char *symbol, size_t count, s } /* -- cuda_runtime_api.h:cudaMemcpyAsync -- */ - cudaError_t cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2134,7 +2154,6 @@ cudaError_t cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cuda } /* -- cuda_runtime_api.h:cudaMemcpyToArrayAsync -- */ - cudaError_t cudaMemcpyToArrayAsync(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2151,7 +2170,6 @@ cudaError_t cudaMemcpyToArrayAsync(struct cudaArray *dst, size_t wOffset, size_ } /* -- cuda_runtime_api.h:cudaMemcpyFromArrayAsync -- */ - cudaError_t cudaMemcpyFromArrayAsync(void *dst, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2168,7 +2186,6 @@ cudaError_t cudaMemcpyFromArrayAsync(void *dst, const struct cudaArray *src, si } /* -- cuda_runtime_api.h:cudaMemcpy2DAsync -- */ - cudaError_t cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2186,7 +2203,6 @@ cudaError_t cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t } /* -- cuda_runtime_api.h:cudaMemcpy2DToArrayAsync -- */ - cudaError_t cudaMemcpy2DToArrayAsync(struct cudaArray *dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2203,7 +2219,6 @@ cudaError_t cudaMemcpy2DToArrayAsync(struct cudaArray *dst, size_t wOffset, siz } /* -- cuda_runtime_api.h:cudaMemcpy2DFromArrayAsync -- */ - cudaError_t cudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, const struct cudaArray *src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2220,7 +2235,6 @@ cudaError_t cudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, const struct c } /* -- cuda_runtime_api.h:cudaMemcpyToSymbolAsync -- */ - cudaError_t cudaMemcpyToSymbolAsync(const char *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2237,7 +2251,6 @@ cudaError_t cudaMemcpyToSymbolAsync(const char *symbol, const void *src, size_t } /* -- cuda_runtime_api.h:cudaMemcpyFromSymbolAsync -- */ - cudaError_t cudaMemcpyFromSymbolAsync(void *dst, const char *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t ret; @@ -2270,36 +2283,36 @@ cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cu VT_LIBWRAP_FUNC_END(vt_cudart_lw); /* no extra if(trace_enabled) */ if(trace_kernels){ - VTCUDADevice* vtDev; - VTCUDAStrm *ptrStrm; - uint32_t ptid; + VTCUDADevice* vtDev; + VTCUDAStrm *ptrStrm; + uint32_t ptid; - VT_CHECK_THREAD; - ptid = VT_MY_THREAD; + VT_CHECK_THREAD; + ptid = VT_MY_THREAD; - if(vt_is_trace_on(ptid)){ - vtDev = VTCUDAcheckThread(stream, ptid, &ptrStrm); + if(vt_is_trace_on(ptid)){ + vtDev = VTCUDAcheckThread(stream, ptid, &ptrStrm); - /* get kernel configure position */ - vtDev->conf_stack = vtDev->conf_stack - sizeof(VTCUDAknconf); - - /* check if there is enough buffer space */ - if(vtDev->buf_pos + sizeof(VTCUDAKernel) > vtDev->conf_stack){ - VTCUDAflush(vtDev, ptid); + /* get kernel configure position */ + vtDev->conf_stack = vtDev->conf_stack - sizeof(VTCUDAknconf); + + /* check if there is enough buffer space */ if(vtDev->buf_pos + sizeof(VTCUDAKernel) > vtDev->conf_stack){ - vt_error_msg("[CUDART] Not enough buffer space for this kernel!"); + VTCUDAflush(vtDev, ptid); + if(vtDev->buf_pos + sizeof(VTCUDAKernel) > vtDev->conf_stack){ + vt_error_msg("[CUDART] Not enough buffer space to configure kernel!"); + } + } + + /* add kernel configure to stack */ + { + VTCUDAknconf *vtKnconf = (VTCUDAknconf*) vtDev->conf_stack; + + vtKnconf->strm = ptrStrm; + vtKnconf->blocksPerGrid = gridDim.x * gridDim.y * gridDim.z; + vtKnconf->threadsPerBlock = blockDim.x * blockDim.y * blockDim.z; } } - - /* add kernel configure to stack */ - { - VTCUDAknconf *vtKnconf = (VTCUDAknconf*) vtDev->conf_stack; - - vtKnconf->strm = ptrStrm; - vtKnconf->blocksPerGrid = gridDim.x * gridDim.y * gridDim.z; - vtKnconf->threadsPerBlock = blockDim.x * blockDim.y * blockDim.z; - } - } } } @@ -2312,13 +2325,13 @@ cudaError_t cudaLaunch(const char *entry) cudaError_t ret; VTCUDADevice *vtDev = NULL; VTCUDAKernel *kernel = NULL; - kernelelement* e = NULL; + VTCUDAkernelSymbol* e = NULL; uint8_t do_trace = 0; uint32_t ptid = 0; uint64_t time; -#if (defined(VT_CUDACUPTI)) - vt_cupti_ctx_t* vtcuptiCtx = NULL; +#if defined(VT_CUPTI) + vt_cuptievt_ctx_t* vtcuptiCtx = NULL; #endif CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaLaunch", @@ -2336,78 +2349,80 @@ cudaError_t cudaLaunch(const char *entry) if(trace_kernels && do_trace){ /* get kernel element */ - e = getKernelElement(entry); - - /* get the active device */ - vtDev = VTCUDAgetDevice(ptid); - + e = getKernelByHostFunction(entry); if(e != NULL){ - /* check the kernel configure stack for last configured kernel */ - if(vtDev->conf_stack == vtDev->buf_size){ - ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (entry)); - vt_warning("[CUDART] [CUDART] No kernel configure call found for " - "'%s' (device %d, ptid %d)", e->name, vtDev->device, ptid); - return ret; - } - - /* get the kernel, which has been partly filled in configure call */ - kernel = (VTCUDAKernel*)vtDev->buf_pos; - /* set configure information */ - { - VTCUDAknconf *vtKnconf = (VTCUDAknconf*) vtDev->conf_stack; + /* check if the kernel will be traced on the correct thread */ + vtDev = VTCUDAgetDevice(ptid); - kernel->blocksPerGrid = vtKnconf->blocksPerGrid; - kernel->threadsPerBlock = vtKnconf->threadsPerBlock; - kernel->strm = vtKnconf->strm; - } + /* check the kernel configure stack for last configured kernel */ + if(vtDev->conf_stack == vtDev->buf_size){ + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (entry)); + vt_warning("[CUDART] No kernel configure call found for " + "'%s' (device %d, ptid %d)", e->name, vtDev->device, ptid); + return ret; + } - vt_cntl_msg(3, "[CUDART] Launch '%s' (device %d, tid %d, rid %d, strm %d)", - e->name, vtDev->device, vtDev->ptid, - e->rid, (uint64_t)kernel->strm->stream); - - kernel->rid = e->rid; - - /* set type of buffer entry */ - kernel->type = VTCUDABUF_ENTRY_TYPE__Kernel; - - /* get an already created unused event */ - kernel->evt = vtDev->evtbuf_pos; - - if(!trace_cupti){ - /* increment buffers */ - vtDev->evtbuf_pos++; - vtDev->buf_pos += sizeof(VTCUDAKernel); - } - -#if (defined(VT_CUDACUPTI)) - /* zero CUPTI counter */ - if(trace_cupti){ - uint32_t tid = kernel->strm->tid; + /* get buffer space for kernel */ + kernel = (VTCUDAKernel*) vtDev->buf_pos; - checkCUDACall(cudaDeviceSynchronize_ptr(), NULL); + /* set configure information */ + { + VTCUDAknconf *vtKnconf = (VTCUDAknconf*) vtDev->conf_stack; + + kernel->blocksPerGrid = vtKnconf->blocksPerGrid; + kernel->threadsPerBlock = vtKnconf->threadsPerBlock; + kernel->strm = vtKnconf->strm; + + vtDev->conf_stack = vtDev->conf_stack + sizeof(VTCUDAknconf); + } + + vt_cntl_msg(3, "[CUDART] Launch '%s' (device %d, tid %d, rid %d, strm %d)", + e->name, vtDev->device, vtDev->ptid, + e->rid, (uint64_t)kernel->strm->stream); + + kernel->rid = e->rid; + + /* set type of buffer entry */ + kernel->type = VTCUDABUF_ENTRY_TYPE__Kernel; + + /* get an already created unused event */ + kernel->evt = vtDev->evtbuf_pos; + + if(!trace_cupti_events){ + /* increment buffers */ + vtDev->evtbuf_pos++; + vtDev->buf_pos += sizeof(VTCUDAKernel); + } + +#if defined(VT_CUPTI) + + /* zero CUPTI counter */ + if(trace_cupti_events){ + uint32_t tid = kernel->strm->tid; + + checkCUDACall(cudaThreadSynchronize_ptr(), NULL); /* write VT kernel start events */ time = vt_pform_wtime(); - if(show_gpu_idle) vt_exit(vtDev->strmList->tid, &time); + if(vt_cudart_gpu_idle) vt_exit(vtDev->strmList->tid, &time); vt_enter(tid, &time, e->rid); vt_count(tid, &time, cid_blocksPerGrid, kernel->blocksPerGrid); vt_count(tid, &time, cid_threadsPerBlock, kernel->threadsPerBlock); vt_count(tid, &time, cid_threadsPerKernel, kernel->threadsPerBlock * kernel->blocksPerGrid); - vtcuptiCtx = vt_cupti_getCurrentContext(ptid); - vt_cupti_resetCounter(vtcuptiCtx, tid, &time); + vtcuptiCtx = vt_cuptievt_getCurrentContext(ptid); + vt_cuptievt_resetCounter(vtcuptiCtx, tid, &time); }else #endif - checkCUDACall(cudaEventRecord_ptr(kernel->evt->strt, kernel->strm->stream), - "cudaEventRecord(startEvt, strmOfLastKernel) failed!"); - - }/* e != NULL */ - - /* pop this kernel from configure stack */ - vtDev->conf_stack = vtDev->conf_stack + sizeof(VTCUDAknconf); - + checkCUDACall(cudaEventRecord_ptr(kernel->evt->strt, kernel->strm->stream), + "cudaEventRecord(startEvt, strmOfLastKernel) failed!"); + }else{ /* e != NULL */ + /* kernel is filtered -> correct configure stack */ + vtDev = VTCUDAgetDevice(ptid); + vtDev->conf_stack = vtDev->conf_stack + sizeof(VTCUDAknconf); + } /* e != NULL */ } /* trace_kernels && do_trace */ } @@ -2418,47 +2433,45 @@ cudaError_t cudaLaunch(const char *entry) time = vt_pform_wtime(); vt_exit(ptid, &time); - if(do_trace){ - if(e != NULL && trace_kernels){ - REGISTER_FINALIZE; + if(do_trace && e != NULL && trace_kernels){ + REGISTER_FINALIZE; -#if (defined(VT_CUDACUPTI)) - /* synchronize after kernels to get cupti counter values */ - if(trace_cupti){ - cudaError_t ret; - uint32_t tid = kernel->strm->tid; +#if defined(VT_CUPTI) + /* synchronize after kernel launch to get CUPTI counter values */ + if(trace_cupti_events){ + cudaError_t ret; + uint32_t tid = kernel->strm->tid; - vt_enter(ptid, &time, rid_sync); + vt_enter(ptid, &time, rid_sync); - if(cupti_sampling){ - /* sampling of CUPTI counter values */ - do{ - time = vt_pform_wtime(); - vt_cupti_writeCounter(vtcuptiCtx, tid, &time); - /*ret = cudaEventQuery_ptr(kernel->evt->stop);*/ - ret = cudaStreamQuery_ptr(kernel->strm->stream); - }while(ret != cudaSuccess); - }else{ - /*ret = cudaEventSynchronize_ptr(kernel->evt->stop);*/ - checkCUDACall(cudaDeviceSynchronize_ptr(), NULL); - } + if(cupti_event_sampling){ + /* sampling of CUPTI counter values */ + do{ + time = vt_pform_wtime(); + vt_cuptievt_writeCounter(vtcuptiCtx, tid, &time); + /*ret = cudaEventQuery_ptr(kernel->evt->stop);*/ + ret = cudaStreamQuery_ptr(kernel->strm->stream); + }while(ret != cudaSuccess); + }else{ + /*ret = cudaEventSynchronize_ptr(kernel->evt->stop);*/ + checkCUDACall(cudaThreadSynchronize_ptr(), NULL); + } - time = vt_pform_wtime(); - vt_cupti_writeCounter(vtcuptiCtx, tid, &time); - vt_exit(ptid, &time); + time = vt_pform_wtime(); + vt_cuptievt_writeCounter(vtcuptiCtx, tid, &time); + vt_exit(ptid, &time); - /* write VT kernel stop events */ - vt_count(tid, &time, cid_blocksPerGrid, 0); - vt_count(tid, &time, cid_threadsPerBlock, 0); - vt_count(tid, &time, cid_threadsPerKernel, 0); - vt_exit(tid, &time); - if(show_gpu_idle) vt_enter(vtDev->strmList->tid, &time, rid_idle); - }else + /* write VT kernel stop events */ + vt_count(tid, &time, cid_blocksPerGrid, 0); + vt_count(tid, &time, cid_threadsPerBlock, 0); + vt_count(tid, &time, cid_threadsPerKernel, 0); + vt_exit(tid, &time); + if(vt_cudart_gpu_idle) vt_enter(vtDev->strmList->tid, &time, rid_idle); + }else #endif checkCUDACall(cudaEventRecord_ptr(kernel->evt->stop, kernel->strm->stream), - "cudaEventRecord(stopEvt, streamOfCurrentKernel) failed!"); - } /* e != NULL && trace_kernels */ - } + "cudaEventRecord(stopEvt, streamOfCurrentKernel) failed!"); + } /* do_trace && e != NULL && trace_kernels */ } return ret; @@ -2643,4 +2656,78 @@ cudaError_t cudaDeviceSynchronize() return ret; } +/* TODO: has to be implemented as communication */ +/* -- cuda_runtime_api.h:cudaMemcpyPeer -- */ +cudaError_t cudaMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t count) +{ + cudaError_t ret; + + CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyPeer", + cudaError_t , (void *, int , const void *, int , size_t ), NULL, 0); + + CUDARTWRAP_FUNC_START(vt_cudart_lw); + + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (dst, dstDevice, src, srcDevice, count)); + + CUDARTWRAP_FUNC_END(vt_cudart_lw); + + return ret; +} + +/* -- cuda_runtime_api.h:cudaMemcpy3DPeer -- */ +cudaError_t cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) +{ + cudaError_t ret; + /*struct cudaExtent extent = p->extent; + size_t count = extent.height * extent.width * extent.depth;*/ + + CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpy3DPeer", + cudaError_t , (const struct cudaMemcpy3DPeerParms *), NULL, 0); + + CUDARTWRAP_FUNC_START(vt_cudart_lw); + + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (p)); + + CUDARTWRAP_FUNC_END(vt_cudart_lw); + + return ret; +} + +/* -- cuda_runtime_api.h:cudaMemcpyPeerAsync -- */ +cudaError_t cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t count, cudaStream_t stream) +{ + cudaError_t ret; + + CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyPeerAsync", + cudaError_t , (void *, int , const void *, int , size_t , cudaStream_t ), + NULL, 0); + + CUDARTWRAP_FUNC_START(vt_cudart_lw); + + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (dst, dstDevice, src, srcDevice, count, stream)); + + CUDARTWRAP_FUNC_END(vt_cudart_lw); + + return ret; +} + +/* -- cuda_runtime_api.h:cudaMemcpy3DPeerAsync -- */ +cudaError_t cudaMemcpy3DPeerAsync(const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream) +{ + cudaError_t ret; + /*struct cudaExtent extent = p->extent; + size_t count = extent.height * extent.width * extent.depth;*/ + + CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpy3DPeerAsync", + cudaError_t , (const struct cudaMemcpy3DPeerParms *, cudaStream_t ), NULL, 0); + + CUDARTWRAP_FUNC_START(vt_cudart_lw); + + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (p, stream)); + + CUDARTWRAP_FUNC_END(vt_cudart_lw); + + return ret; +} + #endif /* CUDA 4.0 */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_cudartwrap.c b/ompi/contrib/vt/vt/vtlib/vt_cudartwrap.c index 6e3f008412..35022bb848 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_cudartwrap.c +++ b/ompi/contrib/vt/vt/vtlib/vt_cudartwrap.c @@ -1374,75 +1374,6 @@ cudaError_t cudaHostUnregister(void *ptr) return ret; } -/* -- cuda_runtime_api.h:cudaMemcpy3DPeer -- */ -cudaError_t cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) -{ - cudaError_t ret; - - CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpy3DPeer", - cudaError_t , (const struct cudaMemcpy3DPeerParms *), NULL, 0); - - CUDARTWRAP_FUNC_START(vt_cudart_lw); - - ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (p)); - - CUDARTWRAP_FUNC_END(vt_cudart_lw); - - return ret; -} - -/* -- cuda_runtime_api.h:cudaMemcpy3DPeerAsync -- */ -cudaError_t cudaMemcpy3DPeerAsync(const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream) -{ - cudaError_t ret; - - CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpy3DPeerAsync", - cudaError_t , (const struct cudaMemcpy3DPeerParms *, cudaStream_t ), NULL, 0); - - CUDARTWRAP_FUNC_START(vt_cudart_lw); - - ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (p, stream)); - - CUDARTWRAP_FUNC_END(vt_cudart_lw); - - return ret; -} - -/* -- cuda_runtime_api.h:cudaMemcpyPeer -- */ -cudaError_t cudaMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t count) -{ - cudaError_t ret; - - CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyPeer", - cudaError_t , (void *, int , const void *, int , size_t ), NULL, 0); - - CUDARTWRAP_FUNC_START(vt_cudart_lw); - - ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (dst, dstDevice, src, srcDevice, count)); - - CUDARTWRAP_FUNC_END(vt_cudart_lw); - - return ret; -} - -/* -- cuda_runtime_api.h:cudaMemcpyPeerAsync -- */ -cudaError_t cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t count, cudaStream_t stream) -{ - cudaError_t ret; - - CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaMemcpyPeerAsync", - cudaError_t , (void *, int , const void *, int , size_t , cudaStream_t ), - NULL, 0); - - CUDARTWRAP_FUNC_START(vt_cudart_lw); - - ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (dst, dstDevice, src, srcDevice, count, stream)); - - CUDARTWRAP_FUNC_END(vt_cudart_lw); - - return ret; -} - /* -- cuda_runtime_api.h:cudaPointerGetAttributes -- */ cudaError_t cudaPointerGetAttributes(struct cudaPointerAttributes *attributes, void *ptr) { @@ -1511,4 +1442,55 @@ cudaError_t cudaDeviceDisablePeerAccess(int peerDevice) return ret; } +/* -- cuda_runtime_api.h:cudaPeerRegister -- +cudaError_t cudaPeerRegister(void *peerDevicePointer, int peerDevice, unsigned int flags) +{ + cudaError_t ret; + + CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaPeerRegister", + cudaError_t , (void *, int , unsigned int ), NULL, 0); + + CUDARTWRAP_FUNC_START(vt_cudart_lw); + + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (peerDevicePointer, peerDevice, flags)); + + CUDARTWRAP_FUNC_END(vt_cudart_lw); + + return ret; +}*/ + +/* -- cuda_runtime_api.h:cudaPeerUnregister -- +cudaError_t cudaPeerUnregister(void *peerDevicePointer, int peerDevice) +{ + cudaError_t ret; + + CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaPeerUnregister", + cudaError_t , (void *, int ), NULL, 0); + + CUDARTWRAP_FUNC_START(vt_cudart_lw); + + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (peerDevicePointer, peerDevice)); + + CUDARTWRAP_FUNC_END(vt_cudart_lw); + + return ret; +}*/ + +/* -- cuda_runtime_api.h:cudaPeerGetDevicePointer -- +cudaError_t cudaPeerGetDevicePointer(void **pDevice, void *peerDevicePointer, int peerDevice, unsigned int flags) +{ + cudaError_t ret; + + CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaPeerGetDevicePointer", + cudaError_t , (void **, void *, int , unsigned int ), NULL, 0); + + CUDARTWRAP_FUNC_START(vt_cudart_lw); + + ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (pDevice, peerDevicePointer, peerDevice, flags)); + + CUDARTWRAP_FUNC_END(vt_cudart_lw); + + return ret; +}*/ + #endif /* CUDA 4.0 */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_cupti.h b/ompi/contrib/vt/vt/vtlib/vt_cupti.h new file mode 100644 index 0000000000..c28fcfa899 --- /dev/null +++ b/ompi/contrib/vt/vt/vtlib/vt_cupti.h @@ -0,0 +1,30 @@ +/** + * VampirTrace + * http://www.tu-dresden.de/zih/vampirtrace + * + * Copyright (c) 2005-2011, ZIH, TU Dresden, Federal Republic of Germany + * + * Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing + * Centre, Federal Republic of Germany + * + * See the file COPYING in the package base directory for details + **/ + +#ifndef VT_CUPTI_H +#define VT_CUPTI_H + +#ifdef __cplusplus +# define EXTERN extern "C" +#else +# define EXTERN extern +#endif + +/* Disable all compiler warnings before including the actual + CUPTI header file. */ +#ifdef __GNUC__ +# pragma GCC system_header +#endif /* __GNUC__ */ +#include "cupti.h" + +#endif /* VT_CUPTI_H */ + diff --git a/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.c b/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.c new file mode 100644 index 0000000000..599108dd41 --- /dev/null +++ b/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.c @@ -0,0 +1,1673 @@ +/** + * VampirTrace + * http://www.tu-dresden.de/zih/vampirtrace + * + * Copyright (c) 2005-2011, ZIH, TU Dresden, Federal Republic of Germany + * + * Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing + * Centre, Federal Republic of Germany + * + * See the file COPYING in the package base directory for details + **/ + +#include "config.h" /* snprintf */ + +#include "vt_defs.h" /* global definitions */ +#include "vt_env.h" /* get environment variables */ +#include "vt_pform.h" /* VampirTrace time measurement */ +#include "vt_defs.h" /* VampirTrace constants */ +#include "vt_error.h" /* VampirTrace warning and error messages */ +#include "vt_gpu.h" /* common for GPU */ +#include "util/hash.h" + +#include "vt_cupti.h" /* Support for CUPTI */ +#include "vt_cupti_callback.h" +#include "vt_cupti_events.h" /* Support for CUPTI events */ + +#include +#include + + +/* mutex for locking global CUPTI callback lists */ +#if (defined(VT_MT) || defined(VT_HYB)) +static VTThrdMutex* VTThrdMutexCuptiCB = NULL; +# define CUPTI_CB_LOCK() VTThrd_lock(&VTThrdMutexCuptiCB) +# define CUPTI_CB_UNLOCK() VTThrd_unlock(&VTThrdMutexCuptiCB) +#else /* VT_MT || VT_HYB */ +# define CUPTI_CB_LOCK() +# define CUPTI_CB_UNLOCK() +#endif /* VT_MT || VT_HYB */ + +#define CHECK_CUPTI_ERROR(_err, _msg) \ + if(_err != CUPTI_SUCCESS){ \ + vt_cupti_handleError(_err, _msg,__FILE__, __LINE__); \ + } + +#define DISABLE_CUDART_CALLBACK(_cbid) \ + { \ + CUptiResult cuptiErr = cuptiEnableCallback( \ + 0, vt_cupticb_cudart_subscriber, \ + CUPTI_CB_DOMAIN_RUNTIME_API, \ + _cbid); \ + CHECK_CUPTI_ERROR(cuptiErr, "cuptiEnableCallback"); \ + } + +#define ENABLE_CUDART_CALLBACK(_cbid) \ + { \ + CUptiResult cuptiErr = cuptiEnableCallback( \ + 1, vt_cupticb_cudart_subscriber, \ + CUPTI_CB_DOMAIN_RUNTIME_API, \ + _cbid); \ + CHECK_CUPTI_ERROR(cuptiErr, "cuptiEnableCallback"); \ + } + +#define DISABLE_CUDART_CALLBACKS() \ + { \ + CUptiResult cuptiErr = cuptiEnableDomain(0, vt_cupticb_cudart_subscriber, \ + CUPTI_CB_DOMAIN_RUNTIME_API); \ + CHECK_CUPTI_ERROR(cuptiErr, "cuptiEnableDomain"); \ + } + +#define ENABLE_CUDART_CALLBACKS() \ + { \ + CUptiResult cuptiErr = cuptiEnableDomain(1, vt_cupticb_cudart_subscriber, \ + CUPTI_CB_DOMAIN_RUNTIME_API); \ + CHECK_CUPTI_ERROR(cuptiErr, "cuptiEnableDomain"); \ + } + +#define SUSPEND_CALLBACKS(_vtCtx) _vtCtx->callbacks_enabled = 0; +#define RESUME_CALLBACKS(_vtCtx) _vtCtx->callbacks_enabled = 1; + +/* + * structure of a VampirTrace CUPTI CUDA stream + */ +typedef struct vt_cupticb_strm_st +{ + CUstream stream; /**< the CUDA stream */ + uint32_t tid; /**< VT thread id for this stream (unique) */ + struct vt_cupticb_strm_st *next; +}vt_cupticb_strm_t; + +/* + * structure of a VampirTrace CUPTI CUDA runtime kernel + */ +typedef struct vt_cupticb_kernel_st +{ + cudaStream_t stream; /**< the CUDA stream */ + uint32_t blocksPerGrid; /**< number of blocks per grid */ + uint32_t threadsPerBlock; /**< number of threads per block */ + struct vt_cupticb_kernel_st *prev; +}vt_cupticb_kernel_t; + +/* + * structure of a VampirTrace CUDA malloc (initiated with cudaMalloc*() + */ +typedef struct vt_cupticb_gpumem_st +{ + void *memPtr; /**< pointer value to allocated memory */ + size_t size; /**< number of bytes allocated */ + uint32_t tid; /**< thread id used with this malloc */ + struct vt_cupticb_gpumem_st *next; +}vt_cupticb_gpumem_t; + +/* + * VampirTrace CUDA context element + * - used in single linked list + */ +typedef struct vt_cupticb_ctx_st +{ + uint64_t ctxUID; /**< unique CUDA context ID (available in callback) */ + /*CUcontext cuCtx; *< the CUDA context */ + CUdevice dev; /**< the CUDA device */ + vt_cupticb_gpumem_t *gpuMemList; /**< list of allocated GPU memory fields */ + size_t gpuMemAllocated; /**< memory allocated on CUDA device */ + vt_cupticb_strm_t *strmList; /**< CUDA stream list */ + vt_cupticb_kernel_t *kernelData; /**< pointer to top of CUDA runtime kernel + configuration stack */ + uint8_t stack_size; /**< number of params on the stack */ + uint8_t callbacks_enabled; /**< execute callback function? */ + struct vt_cupticb_ctx_st *next; +}vt_cupticb_ctx_t; + +/* list of VampirTrace CUDA contexts */ +static vt_cupticb_ctx_t* vt_cupticb_ctxList = NULL; + +static CUpti_SubscriberHandle vt_cupticb_cudart_subscriber; + +/* flag: tracing of CUDA runtime API enabled? */ +static uint8_t vt_cupticb_trace_cudart = 0; + +/* flag: tracing of kernels enabled? */ +static uint8_t vt_cupticb_trace_kernels = 1; + +/* flag: tracing of asynchronous memory copies enabled? */ +static uint8_t vt_cupticb_trace_mcpyAsync = 0; + +/* flag: use CUPTI events for counter capturing? */ +static uint8_t vt_cupticb_trace_events = 1; + +/* flag: sampling for CUPTI counter values enabled? */ +static uint8_t vt_cupticb_event_sampling = 0; + +/* flag: write GPU idle time as region in CUDA stream 0? */ +static uint8_t vt_cupticb_trace_gpu_idle = 0; + +/* flag: tracing of cudaMalloc*() and cudaFree*() enabled? */ +static uint8_t vt_cupticb_trace_gpu_mem = 0; + +/* initialization and finalization flags */ +static uint8_t vt_cupticb_initialized = 0; +static uint8_t vt_cupticb_finalized = 0; + +/* + * Synchronization Level: + * 0 no extra synchronization + * 1 synchronize before synchronous memory copy or synchronization - correct + * data transfer rates for communication + * 2 show synchronization in extra region group to get host wait time + */ +static uint8_t vt_cupticb_syncLevel = 3; + +/* VampirTrace region ID for GPU idle */ +static uint32_t vt_cupticb_rid_idle = VT_NO_ID; + +/* VampirTrace region ID for synchronization of host and CUDA device*/ +static uint32_t vt_cupticb_rid_sync = VT_NO_ID; + +/* global counter IDs */ +static uint32_t vt_cupticb_cid_blocksPerGrid; +static uint32_t vt_cupticb_cid_threadsPerBlock; +static uint32_t vt_cupticb_cid_threadsPerKernel; +static uint32_t vt_cupticb_cid_cudaMalloc; /* GPU memory allocation counter */ + +/**************** The callback functions to be registered *********************/ + +/* CUDA runtime API callback function */ +/* some of CUPTI API functions have changed */ +#if (defined(CUPTI_API_VERSION) && (CUPTI_API_VERSION >= 2)) +void CUPTIAPI vt_cupticb_cudart(void *, CUpti_CallbackDomain, + CUpti_CallbackId, const void *); +void (*vt_cupticb_cudart_ptr)(void *, CUpti_CallbackDomain, + CUpti_CallbackId, const void *) + = vt_cupticb_cudart; +#else +void CUPTIAPI vt_cupticb_cudart(void *, CUpti_CallbackDomain, + CUpti_CallbackId, const CUpti_CallbackData *); +void (*vt_cupticb_cudart_ptr)(void *, CUpti_CallbackDomain, + CUpti_CallbackId, const CUpti_CallbackData *) + = vt_cupticb_cudart; +#endif + +/******************************************************************************/ + +/*********************** Internal function declarations ***********************/ +static void vt_cupticb_handle_cudart_knconf(const CUpti_CallbackData *); +static void vt_cupticb_handle_cudart_kernel(const CUpti_CallbackData *); + +static void vt_cupticb_handle_cudart_memcpy(const CUpti_CallbackData *, + enum cudaMemcpyKind, uint64_t, uint64_t); +static void vt_cupticb_handle_cudart_mcpyAsync(const CUpti_CallbackData *cbInfo, + enum cudaMemcpyKind kind, uint64_t bytes, cudaStream_t cuStrm); + +static void vt_cupticb_handle_malloc(uint64_t, void *, size_t); +static void vt_cupticb_handle_free(uint64_t ctxUID, void *devPtr); +/******************************************************************************/ + + +/* hashing of CUDA API function and kernel names */ +#define VT_CUPTICB_CUDA_API_HASH_MAX 512 + +static uint32_t vt_cupticb_cudaApiHtab[VT_CUPTICB_CUDA_API_HASH_MAX]; + +static uint32_t vt_cupticb_cudaApiHashFunc(CUpti_CallbackDomain domain, + CUpti_CallbackId cid) +{ + uint32_t idx = 0; + uint8_t offset = 0; + + if(domain == CUPTI_CB_DOMAIN_DRIVER_API) offset = 255; + + idx = offset + (uint32_t)cid; + + if(idx >= VT_CUPTICB_CUDA_API_HASH_MAX) + vt_error_msg("[CUPTI] Hash table for CUDA API callbacks to small!"); + + return (uint32_t)idx; +} + +static void vt_cupticb_cudaApiHashPut(CUpti_CallbackDomain domain, + CUpti_CallbackId cid, uint32_t rid) +{ + vt_cupticb_cudaApiHtab[vt_cupticb_cudaApiHashFunc(domain, cid)] = rid; +} + +static uint32_t vt_cupticb_cudaApiHashGet(CUpti_CallbackDomain domain, + CUpti_CallbackId cid) +{ + return vt_cupticb_cudaApiHtab[vt_cupticb_cudaApiHashFunc(domain, cid)]; +} + + +/************************ hashing of CUDA kernel names ************************/ +#define VT_CUPTICB_KERNEL_HASH_MAX 1021 + +typedef struct vt_cupticb_knHn_st { + char* sname; /**< name of the symbol */ + /*char* fname; *< name of the function */ + uint32_t rid; /**< associated region group identifier */ + struct vt_cupticb_knHn_st* next; /**< bucket for collision */ +} vt_cupticb_kernel_hn_t; + +static vt_cupticb_kernel_hn_t* vt_cupticb_kernel_htab[VT_CUPTICB_KERNEL_HASH_MAX]; + +static void* vt_cupticb_kernelHashPut(const char* n, uint32_t rid) +{ + uint32_t id = (uint32_t)vt_hash((uint8_t*)n, strlen(n), 0) + % VT_CUPTICB_KERNEL_HASH_MAX; + vt_cupticb_kernel_hn_t *add = + (vt_cupticb_kernel_hn_t*)malloc(sizeof(vt_cupticb_kernel_hn_t)); + + add->sname = strdup(n); + /*add->fname = NULL;*/ + add->rid = rid; + add->next = vt_cupticb_kernel_htab[id]; + vt_cupticb_kernel_htab[id] = add; + + return add; +} + +static void* vt_cupticb_kernelHashGet(const char* n) +{ + uint32_t id = (uint32_t)vt_hash((uint8_t*)n, strlen(n), 0) % VT_CUPTICB_KERNEL_HASH_MAX; + vt_cupticb_kernel_hn_t *curr = vt_cupticb_kernel_htab[id]; + + while ( curr ) { + if ( strcmp( curr->sname, n ) == 0 ) + return curr; + + curr = curr->next; + } + + return NULL; +} + +static void vt_cupticb_hashClear(void) +{ + int i; + vt_cupticb_kernel_hn_t* tmp_node; + + for ( i = 0; i < VT_CUPTICB_KERNEL_HASH_MAX; i++ ) + { + while( vt_cupticb_kernel_htab[i] ) + { + tmp_node = vt_cupticb_kernel_htab[i]->next; + free( vt_cupticb_kernel_htab[i]->sname ); + free( vt_cupticb_kernel_htab[i] ); + vt_cupticb_kernel_htab[i] = tmp_node; + } + } +} + +/* + * Handles errors returned from CUPTI function calls. + * + * @param ecode the CUDA driver API error code + * @param msg a message to get more detailed information about the error + * @param the corresponding file + * @param the line the error occurred + */ +static void vt_cupti_handleError(CUptiResult err, const char* msg, + const char *file, const int line) +{ + const char *errstr; + + if(msg != NULL) vt_cntl_msg(1, msg); + + cuptiGetResultString(err, &errstr); + + if(vt_gpu_error){ + vt_error_msg("[CUPTI] %s:%d:'%s'", file, line, errstr); + }else{ + vt_warning("[CUPTI] %s:%d:'%s'", file, line, errstr); + } +} + +/* + * Set a CUPTI callback function for a specific CUDA runtime or driver function + * or for a whole domain (runtime or driver API) + * + * @param subscriber handle to the initialize subscriber + * @param callback the callback function + * @param domain The domain of the callback + * @param cbid The ID of the API function associated with this callback, if it + * is not valid, the whole domain will be enabled + */ +static void vt_cupti_set_callback(CUpti_SubscriberHandle *subscriber, + CUpti_CallbackFunc callback, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid) +{ + CUptiResult cuptiErr; + static uint8_t initflag = 1; + + if(initflag){ + initflag = 0; + + CHECK_CU_ERROR(cuInit(0), "cuInit"); + } + + cuptiErr = cuptiSubscribe(subscriber, callback, NULL); + CHECK_CUPTI_ERROR(cuptiErr, "cuptiSubscribe"); + + if((cbid == CUPTI_RUNTIME_TRACE_CBID_INVALID) || + (cbid == CUPTI_DRIVER_TRACE_CBID_INVALID)){ + cuptiErr = cuptiEnableDomain(1, *subscriber, domain); + CHECK_CUPTI_ERROR(cuptiErr, "cuptiEnableDomain"); + }else{ + cuptiErr = cuptiEnableCallback(1, *subscriber, domain, cbid); + CHECK_CUPTI_ERROR(cuptiErr, "cuptiEnableCallback"); + } +} + +/* + * Creates a VampirTrace CUPTI stream object and returns it. + * + * @param stream the CUDA stream id + * @param ptid the VampirTrace thread ID of the calling thread + * + * @return the created stream object + */ +static vt_cupticb_strm_t* vt_cupticb_createStream(uint32_t ptid, + CUstream stream, + CUdevice device) +{ + char thread_name[16]; + vt_cupticb_strm_t *vtStrm; + + /* allocate memory for stream */ + vtStrm = (vt_cupticb_strm_t*)malloc(sizeof(vt_cupticb_strm_t)); + if(vtStrm == NULL) vt_error_msg("malloc(sizeof(vt_cupti_strm_t)) failed!"); + vtStrm->next = NULL; + vtStrm->stream = stream; + + /* create VT-User-Thread with name and parent id and get its id */ + if(-1 == snprintf(thread_name, 15, "CUDA[%d]", (uint32_t)device)) + vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); + vt_gpu_registerThread(thread_name, ptid, &(vtStrm->tid)); + + /* set the threads property to GPU */ + CUPTI_CB_LOCK(); + vt_gpu_prop[vtStrm->tid] = VTGPU_GPU; + CUPTI_CB_UNLOCK(); + + /* set count values to zero */ + vt_count(vtStrm->tid, &vt_start_time, vt_cupticb_cid_blocksPerGrid, 0); + vt_count(vtStrm->tid, &vt_start_time, vt_cupticb_cid_threadsPerBlock, 0); + vt_count(vtStrm->tid, &vt_start_time, vt_cupticb_cid_threadsPerKernel, 0); + + if(vt_cupticb_trace_gpu_mem) + vt_count(vtStrm->tid, &vt_start_time, vt_cupticb_cid_cudaMalloc, 0); + + return vtStrm; +} + +/* + * Retrieve the VampirTrace CUPTI context by its ID or NULL if not available. + * + * @param cuCtxID ID of the CUDA context + * + * @return the VampirTrace CUPTI context object + */ +static vt_cupticb_ctx_t* vt_cupticb_getCtx(uint64_t cuCtxUID) +{ + vt_cupticb_ctx_t *vtCtx = NULL; + + /*** search CUDA context in list ***/ + CUPTI_CB_LOCK(); + vtCtx = vt_cupticb_ctxList; + while(vtCtx != NULL){ + if(vtCtx->ctxUID == cuCtxUID){ + CUPTI_CB_UNLOCK(); + return vtCtx; + } + vtCtx = vtCtx->next; + } + CUPTI_CB_UNLOCK(); + + return NULL; +} + +/* + * Creates new VampirTrace CUPTI callback context. + * + * @param cuCtxID ID of the CUDA context + * + * @return the VampirTrace CUPTI context object + */ +static vt_cupticb_ctx_t* vt_cupticb_createCtx(uint64_t cuCtxUID) +{ + vt_cupticb_ctx_t *vtCtx = (vt_cupticb_ctx_t*)malloc(sizeof(vt_cupticb_ctx_t)); + if(vtCtx == NULL) + vt_error_msg("Could not allocate memory for vt_cupticb_ctx_t!"); + + vtCtx->ctxUID = cuCtxUID; + + /* enable handling of callbacks */ + vtCtx->callbacks_enabled = 1; + + CHECK_CU_ERROR(cuCtxGetDevice(&vtCtx->dev), NULL); + + /* initialize GPU memory allocation parameter */ + vtCtx->gpuMemList = NULL; + vtCtx->gpuMemAllocated = 0; + + + /* create first empty CUDA stream */ + VT_CHECK_THREAD; + vtCtx->strmList = vt_cupticb_createStream(VT_MY_THREAD, NULL, vtCtx->dev); + + if(vt_cupticb_trace_gpu_idle) + vt_enter(vtCtx->strmList->tid, &vt_start_time, vt_cupticb_rid_idle); + + /* initialize CUDA kernel configure stack */ + { + vt_cupticb_kernel_t *vtKn = NULL; + + vtKn = (vt_cupticb_kernel_t*)malloc(sizeof(vt_cupticb_kernel_t)); + if(vtKn == NULL) + vt_error_msg("Could not allocate memory for vt_cupti_kernel_t!"); + + vtKn->prev = NULL; + vtCtx->kernelData = vtKn; + vtCtx->stack_size = 0; + } + + return vtCtx; +} + +/* + * Lookup a VampirTrace CUPTI callback context by its ID and if not available, + * create a new one. + * + * @param cuCtxID ID of the CUDA context + * + * @return the VampirTrace CUPTI context object + */ +static vt_cupticb_ctx_t* vt_cupticb_checkCtx(uint64_t cuCtxUID) +{ + vt_cupticb_ctx_t *vtCtx = NULL; + + /* search for existing CUPTI context */ + vtCtx = vt_cupticb_getCtx(cuCtxUID); + + if(vtCtx == NULL){ + /* create new CUPTI callback context */ + vtCtx = vt_cupticb_createCtx(cuCtxUID); + + /* prepend CUDA context to global list */ + CUPTI_CB_LOCK(); + vtCtx->next = vt_cupticb_ctxList; + vt_cupticb_ctxList = vtCtx; + CUPTI_CB_UNLOCK(); + } + + return vtCtx; +} + +/* + * Lookup a VampirTrace CUPTI stream and if not available, + * create a new one. + * + * @param ptid the VampirTrace thread ID of the calling thread + * @param vtCtx VampirTrace CUPTI context object + * @param cuStrm pointer to CUDA stream + * + * @return a VampirTrace stream object + */ +static vt_cupticb_strm_t* vt_cupticb_checkStream(uint32_t ptid, + vt_cupticb_ctx_t *vtCtx, + CUstream *cuStrm) +{ + vt_cupticb_strm_t *currStrm, *lastStrm; + + currStrm = vtCtx->strmList; + + /* no VampirTrace stream object available yet, create first stream */ + if(currStrm == NULL){ + if(cuStrm != NULL) + currStrm = vt_cupticb_createStream(ptid, *cuStrm, vtCtx->dev); + else + currStrm = vt_cupticb_createStream(ptid, NULL, vtCtx->dev); + + vtCtx->strmList = currStrm; + + return currStrm; + } + + /* if no CUDA stream is given, return first stream in list */ + if(cuStrm == NULL) return currStrm; + if(*cuStrm == NULL) return currStrm; + + /* lookup the current stream */ + do{ + if(*cuStrm == currStrm->stream) return currStrm; + lastStrm = currStrm; + currStrm = currStrm->next; + }while(currStrm != NULL); + + /* create new VampirTrace Stream structure/object and append it to list */ + lastStrm->next = vt_cupticb_createStream(ptid, *cuStrm, vtCtx->dev); + + return lastStrm->next; +} + +/* + * Parse the device function name: + * "_Z..." (no name space) + * "_ZN......" (with name space) + * + * @param elem pointer to the kernel element + * @param devFunc the CUDA internal kernel function name + */ +static void vt_cupticb_extractKernelName(char *kname, const char* devFunc) +{ + int i = 0; /* position in device function (source string) */ + int nlength = 0; /* length of name space or kernel */ + int ePos = 0; /* position in final kernel string */ + char *curr_elem, kn_templates[VTGPU_KERNEL_STRING_SIZE]; + char *tmpEnd, *tmpElemEnd; + + /*vt_cntl_msg(1,"[CUDART] device function name: %s'", devFunc);*/ + + /* init for both cases: name space available or not */ + if(devFunc[2] == 'N'){ + nlength = atoi(&devFunc[3]); /* get length of first name space */ + i = 4; + }else{ + nlength = atoi(&devFunc[2]); /* get length of kernel */ + i = 3; + } + + /* unless string null termination */ + while(devFunc[i] != '\0'){ + /* found either name space or kernel name (no digits) */ + if(devFunc[i] < '0' || devFunc[i] > '9'){ + /* copy name to kernel function */ + if((ePos + nlength) < VTGPU_KERNEL_STRING_SIZE){ + (void)strncpy(&kname[ePos], &devFunc[i], nlength); + ePos += nlength; /* set next position to write */ + }else{ + nlength = VTGPU_KERNEL_STRING_SIZE - ePos; + (void)strncpy(&kname[ePos], &devFunc[i], nlength); + vt_cntl_msg(1,"[CUDART]: kernel name '%s' contains more than %d chars!", + devFunc, VTGPU_KERNEL_STRING_SIZE); + return; + } + + i += nlength; /* jump over name */ + nlength = atoi(&devFunc[i]); /* get length of next name space or kernel */ + + /* finish if no digit after name space or kernel */ + if(nlength == 0){ + kname[ePos] = '\0'; /* set string termination */ + break; + }else{ + if((ePos + 3) < VTGPU_KERNEL_STRING_SIZE){ + (void)strncpy(&kname[ePos], "::\0", 3); + ePos += 2; + }else{ + vt_cntl_msg(1,"[CUDART]: kernel name '%s' contains more than %d chars!", + devFunc, VTGPU_KERNEL_STRING_SIZE); + return; + } + } + }else i++; + } + + /* copy the end of the kernel name string to extract templates */ + if(-1 == snprintf(kn_templates, VTGPU_KERNEL_STRING_SIZE, "%s", &devFunc[i+1])) + vt_cntl_msg(1, "[CUDART]: Error parsing kernel '%s'", devFunc); + curr_elem = kn_templates; /* should be 'L' */ + + /* search templates (e.g. "_Z10cptCurrentILb1ELi10EEv6SField8SParListifff") */ + tmpEnd=strstr(curr_elem,"EE"); + /* check for templates: curr_elem[0] points to 'L' AND string contains "EE" */ + if(tmpEnd != NULL && curr_elem[0]=='L'){ /* templates exist */ + tmpEnd[1] = '\0'; /* set 2nd 'E' to \0 as string end marker */ + + /* write at position 'I' with '<' */ + /* elem->name[ePos]='<'; */ + if(-1 == snprintf(&(kname[ePos]),VTGPU_KERNEL_STRING_SIZE-ePos,"<")) + vt_cntl_msg(1,"[CUDART] Parsing templates of kernel '%s' failed!", devFunc); + ePos++; /* continue with next character */ + + do{ + int res; + curr_elem++; /* set pointer to template type length or template type */ + /* find end of template element */ + tmpElemEnd = strchr(curr_elem + atoi(curr_elem), 'E'); + tmpElemEnd[0] = '\0'; /* set termination char after template element */ + /* find next non-digit char */ + while(*curr_elem >= '0' && *curr_elem <= '9') curr_elem++; + /* append template value to kernel name */ + if(-1 == (res = snprintf(&(kname[ePos]), + VTGPU_KERNEL_STRING_SIZE-ePos,"%s,",curr_elem))) + vt_cntl_msg(1,"[CUDART]: Parsing templates of kernel '%s' crashed!", devFunc); + ePos += res; /* continue after template value */ + curr_elem =tmpElemEnd + 1; /* set current element to begin of next template */ + }while(tmpElemEnd < tmpEnd); + if((ePos-1) < VTGPU_KERNEL_STRING_SIZE) (void)strncpy(&kname[ePos-1], ">\0", 2); + else vt_cntl_msg(1,"[CUDART]: Templates of '%s' too long for internal buffer!", devFunc); + } /* else: kernel has no templates */ + /*vt_cntl_msg(1,"[CUDART] function name: %s'",e->name);*/ +} + +/* + * This callback function is used to trace the CUDA runtime API. + * + * @param userdata pointer to the user data + * @param domain the callback domain (runtime or driver API) + * @param cbid the ID of the callback function in the given domain + * @param cbInfo information about the callback + */ +void CUPTIAPI vt_cupticb_cudart(void *userdata, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid, +#if (defined(CUPTI_API_VERSION) && (CUPTI_API_VERSION >= 2)) + const void *cbInf) +{ + const CUpti_CallbackData *cbInfo = (CUpti_CallbackData*) cbInf; +#else + const CUpti_CallbackData *cbInfo) +{ +#endif + uint32_t ptid; + uint64_t time; + uint32_t rid_func = VT_NO_ID; + uint32_t hash_api_rid = VT_NO_ID; + uint8_t do_trace = 0; + + if(cbid == CUPTI_RUNTIME_TRACE_CBID_INVALID) return; + + /* internal callback switch + { + vt_cupti_ctx_t *vtCtx = vt_cupti_getContext(cbInfo->contextUid); + + if(NULL != vtCtx && vtCtx->callbacks_enabled == 0) return; + }*/ + + VT_CHECK_THREAD; + ptid = VT_MY_THREAD; + + /* get the VampirTrace region ID for the API function */ + hash_api_rid = vt_cupticb_cudaApiHashGet(domain, cbid); + if(hash_api_rid != VT_NO_ID){ + rid_func = hash_api_rid; + }else{ + rid_func = vt_def_region(VT_MASTER_THREAD, cbInfo->functionName, VT_NO_ID, + VT_NO_LNO, VT_NO_LNO, "CUDART_API", VT_FUNCTION); + + vt_cupticb_cudaApiHashPut(domain, cbid, rid_func); + } + + /*********** write enter and exit records for CUDA runtime API **************/ + time = vt_pform_wtime(); + if(cbInfo->callbackSite == CUPTI_API_ENTER){ + do_trace = vt_enter(ptid, &time, rid_func); + } + + if(cbInfo->callbackSite == CUPTI_API_EXIT){ + vt_exit(ptid, &time); + } + + /* + * Semantic Function Instrumentation + */ + switch(cbid){ + /****************** the CUDA runtime kernel configure call ******************/ + case CUPTI_RUNTIME_TRACE_CBID_cudaConfigureCall_v3020: { + if(vt_cupticb_trace_kernels) + vt_cupticb_handle_cudart_knconf(cbInfo); + + break; + } + + /***** the CUDA runtime kernel launch ******/ + case CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020: { + if(vt_cupticb_trace_kernels) + vt_cupticb_handle_cudart_kernel(cbInfo); + + break; + } + /****************************************************************************/ + + /********************** CUDA memory allocation ******************************/ + case CUPTI_RUNTIME_TRACE_CBID_cudaMalloc_v3020: { + if(vt_cupticb_trace_gpu_mem && cbInfo->callbackSite == CUPTI_API_EXIT){ + cudaMalloc_v3020_params *params = + (cudaMalloc_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_malloc(cbInfo->contextUid, *(params->devPtr), + params->size); + } + + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMallocPitch_v3020: { + if(vt_cupticb_trace_gpu_mem && cbInfo->callbackSite == CUPTI_API_EXIT){ + cudaMallocPitch_v3020_params *params = + (cudaMallocPitch_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_malloc(cbInfo->contextUid, *(params->devPtr), + params->height * (*(params->pitch))); + } + + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMallocArray_v3020: { + if(vt_cupticb_trace_gpu_mem && cbInfo->callbackSite == CUPTI_API_EXIT){ + cudaMallocArray_v3020_params *params = + (cudaMallocArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_malloc(cbInfo->contextUid, *(params->array), + params->height * params->width); + } + + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3D_v3020: { + if(vt_cupticb_trace_gpu_mem && cbInfo->callbackSite == CUPTI_API_EXIT){ + cudaMalloc3D_v3020_params *params = + (cudaMalloc3D_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_malloc(cbInfo->contextUid, params->pitchedDevPtr->ptr, + params->pitchedDevPtr->pitch * params->extent.height * params->extent.depth); + } + + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMalloc3DArray_v3020: { + if(vt_cupticb_trace_gpu_mem && cbInfo->callbackSite == CUPTI_API_EXIT){ + cudaMalloc3DArray_v3020_params *params = + (cudaMalloc3DArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_malloc(cbInfo->contextUid, *(params->array), + params->extent.width * params->extent.height * params->extent.depth); + } + + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaFree_v3020: { + if(vt_cupticb_trace_gpu_mem && cbInfo->callbackSite == CUPTI_API_ENTER){ + vt_cupticb_handle_free(cbInfo->contextUid, + ((cudaFree_v3020_params *)cbInfo->functionParams)->devPtr); + } + + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaFreeArray_v3020: { + if(vt_cupticb_trace_gpu_mem && cbInfo->callbackSite == CUPTI_API_ENTER){ + vt_cupticb_handle_free(cbInfo->contextUid, + ((cudaFreeArray_v3020_params *)cbInfo->functionParams)->array); + } + + break; + } + + /****************** synchronous CUDA memory copies **************************/ + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020: { + cudaMemcpy_v3020_params *params = + (cudaMemcpy_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)params->count, + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2D_v3020: { + cudaMemcpy2D_v3020_params *params = + (cudaMemcpy2D_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)(params->height * params->width), + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArray_v3020: { + cudaMemcpyToArray_v3020_params *params = + (cudaMemcpyToArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)params->count, + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArray_v3020: { + cudaMemcpy2DToArray_v3020_params *params = + (cudaMemcpy2DToArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)(params->height * params->width), + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArray_v3020: { + cudaMemcpyFromArray_v3020_params *params = + (cudaMemcpyFromArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)params->count, + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArray_v3020: { + cudaMemcpy2DFromArray_v3020_params *params = + (cudaMemcpy2DFromArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)(params->height * params->width), + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyArrayToArray_v3020: { + cudaMemcpyArrayToArray_v3020_params *params = + (cudaMemcpyArrayToArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)params->count, + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DArrayToArray_v3020: { + cudaMemcpy2DArrayToArray_v3020_params *params = + (cudaMemcpy2DArrayToArray_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)(params->height * params->width), + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbol_v3020: { + cudaMemcpyToSymbol_v3020_params *params = + (cudaMemcpyToSymbol_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)params->count, + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbol_v3020: { + cudaMemcpyFromSymbol_v3020_params *params = + (cudaMemcpyFromSymbol_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->kind, + (uint64_t)params->count, + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3D_v3020: { + cudaMemcpy3D_v3020_params *params = + (cudaMemcpy3D_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, params->p->kind, + (uint64_t)(params->p->extent.height * params->p->extent.width * + params->p->extent.depth), + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeer_v4000: { + cudaMemcpyPeer_v4000_params *params = + (cudaMemcpyPeer_v4000_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, cudaMemcpyDeviceToDevice, + (uint64_t)params->count, + time); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeer_v4000: { + cudaMemcpy3DPeer_v4000_params *params = + (cudaMemcpy3DPeer_v4000_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_memcpy(cbInfo, cudaMemcpyDeviceToDevice, + (uint64_t)(params->p->extent.height * params->p->extent.width * + params->p->extent.depth), + time); + break; + } + /**************************************************************************/ + + /******************** asynchronous memory copies **************************/ + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyAsync_v3020: { + cudaMemcpyAsync_v3020_params *params = + (cudaMemcpyAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)params->count, + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToArrayAsync_v3020: { + cudaMemcpyToArrayAsync_v3020_params *params = + (cudaMemcpyToArrayAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)params->count, + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromArrayAsync_v3020: { + cudaMemcpyFromArrayAsync_v3020_params *params = + (cudaMemcpyFromArrayAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)params->count, + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DAsync_v3020: { + cudaMemcpy2DAsync_v3020_params *params = + (cudaMemcpy2DAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)(params->height * params->width), + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DToArrayAsync_v3020: { + cudaMemcpy2DToArrayAsync_v3020_params *params = + (cudaMemcpy2DToArrayAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)(params->height * params->width), + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy2DFromArrayAsync_v3020: { + cudaMemcpy2DFromArrayAsync_v3020_params *params = + (cudaMemcpy2DFromArrayAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)(params->height * params->width), + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyToSymbolAsync_v3020: { + cudaMemcpyToSymbolAsync_v3020_params *params = + (cudaMemcpyToSymbolAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)params->count, + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyFromSymbolAsync_v3020: { + cudaMemcpyFromSymbolAsync_v3020_params *params = + (cudaMemcpyFromSymbolAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->kind, + (uint64_t)params->count, + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DAsync_v3020: { + cudaMemcpy3DAsync_v3020_params *params = + (cudaMemcpy3DAsync_v3020_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, params->p->kind, + (uint64_t)(params->p->extent.height * params->p->extent.width * + params->p->extent.depth), + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpyPeerAsync_v4000: { + cudaMemcpyPeerAsync_v4000_params *params = + (cudaMemcpyPeerAsync_v4000_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, cudaMemcpyDeviceToDevice, + (uint64_t)params->count, + params->stream); + break; + } + + case CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy3DPeerAsync_v4000: { + cudaMemcpy3DPeerAsync_v4000_params *params = + (cudaMemcpy3DPeerAsync_v4000_params *)cbInfo->functionParams; + + vt_cupticb_handle_cudart_mcpyAsync(cbInfo, cudaMemcpyDeviceToDevice, + (uint64_t)(params->p->extent.height * params->p->extent.width * + params->p->extent.depth), + params->stream); + break; + } + /**************************************************************************/ + + default: break; + } + /****************************************************************************/ +} + +/* + * This function handles the cudaConfigureCall callback. + * Kernel configuration data are written on the kernel configure stack. + * + * @param cbInfo information about the callback + */ +static void vt_cupticb_handle_cudart_knconf(const CUpti_CallbackData *cbInfo) +{ + /* configure call parameter have to be saved for kernel launch on a per + * thread basis. */ + if(cbInfo->callbackSite == CUPTI_API_EXIT){ + vt_cupticb_kernel_t *vtParams = NULL; + vt_cupticb_ctx_t *vtCtx = vt_cupticb_checkCtx(cbInfo->contextUid); + cudaConfigureCall_v3020_params * params = + (cudaConfigureCall_v3020_params *) cbInfo->functionParams; + + /* Is another kernel already configured? */ + if(vtCtx->stack_size > 0){ + /* memory already allocated */ + if(vtCtx->kernelData->prev == NULL){ + vtParams = (vt_cupticb_kernel_t*)malloc(sizeof(vt_cupticb_kernel_t)); + if(vtParams == NULL) + vt_error_msg("Could not allocate memory for vt_cupti_kernel_t!"); + + vtParams->prev = NULL; + }else{ + vtParams = vtCtx->kernelData->prev; + } + + /* add to kernel configure call parameter stack */ + vtParams->prev = vtCtx->kernelData; + vtCtx->kernelData = vtParams; + }else{ + vtParams = vtCtx->kernelData; + } + + vtParams->blocksPerGrid = params->gridDim.x * params->gridDim.y + * params->gridDim.z; + vtParams->threadsPerBlock = params->blockDim.x * params->blockDim.y + * params->blockDim.z; + + vtParams->stream = params->stream; + + (vtCtx->stack_size)++; + } +} + +/* + * This function can be called at the beginning and end of a CUDA kernel launch. + * Time stamps will be written to the corresponding CUDA stream. + * !!! The kernel has to be configured (cudaConfigureCall) !!! + + * @param cbInfo information about the callback + */ +static void vt_cupticb_handle_cudart_kernel(const CUpti_CallbackData *cbInfo) +{ + uint64_t time; + + if(cbInfo->callbackSite == CUPTI_API_ENTER){ + uint32_t knRID = VT_NO_ID; + const char *symName = cbInfo->symbolName; + vt_cupticb_strm_t *vtStrm = NULL; + vt_cupticb_kernel_hn_t *hn = NULL; + vt_cupticb_ctx_t *vtCtx = NULL; + uint32_t ptid; + + VT_CHECK_THREAD; + ptid = VT_MY_THREAD; + + /* get the VampirTrace region ID for the kernel */ + hn = vt_cupticb_kernelHashGet(symName); + + if(hn){ + knRID = hn->rid; + }else{ + char knName[VTGPU_KERNEL_STRING_SIZE]; + + vt_cupticb_extractKernelName(knName, symName); + knRID = vt_def_region(VT_MASTER_THREAD, knName, VT_NO_ID, + VT_NO_LNO, VT_NO_LNO, "CUDA_KERNEL", VT_FUNCTION); + + hn = vt_cupticb_kernelHashPut(symName, knRID); + /*hn->fname = knName;*/ + } + + /* get the VampirTrace thread ID the kernel is running on */ + { + vtCtx = vt_cupticb_checkCtx(cbInfo->contextUid); + + vtStrm = vt_cupticb_checkStream(ptid, vtCtx, &vtCtx->kernelData->stream); + + /* save address into 64 Bit correlation value for exit callback */ + *cbInfo->correlationData = (uint64_t)vtStrm; + } + + /* write the event records */ + CHECK_CU_ERROR(cuCtxSynchronize(), NULL); + + /* write VT kernel start events */ + time = vt_pform_wtime(); + + if(vt_cupticb_trace_gpu_idle) vt_exit(vtCtx->strmList->tid, &time); + vt_enter(vtStrm->tid, &time, knRID); + + vt_count(vtStrm->tid, &time, vt_cupticb_cid_blocksPerGrid, + vtCtx->kernelData->blocksPerGrid); + vt_count(vtStrm->tid, &time, vt_cupticb_cid_threadsPerBlock, + vtCtx->kernelData->threadsPerBlock); + vt_count(vtStrm->tid, &time, vt_cupticb_cid_threadsPerKernel, + vtCtx->kernelData->threadsPerBlock * + vtCtx->kernelData->blocksPerGrid); + + if(vt_cupticb_trace_events){ + vt_cuptievt_ctx_t *vtcuptiCtx = vt_cuptievt_getCurrentContext(ptid); + vt_cuptievt_resetCounter(vtcuptiCtx, vtStrm->tid, &time); + } + + /* take the configure parameters from stack */ + (vtCtx->stack_size)--; + if(vtCtx->stack_size > 0){ + vtCtx->kernelData = vtCtx->kernelData->prev; + } + } + + if(cbInfo->callbackSite == CUPTI_API_EXIT){ + vt_cupticb_strm_t *vtStrm = (vt_cupticb_strm_t *)(*cbInfo->correlationData); + uint32_t tid = vtStrm->tid; + vt_cupticb_ctx_t *vtCtx = vt_cupticb_checkCtx(cbInfo->contextUid); + uint32_t ptid; + + VT_CHECK_THREAD; + ptid = VT_MY_THREAD; + + if(vt_cupticb_trace_events){ + vt_cuptievt_ctx_t *vtcuptiCtx = vt_cuptievt_getCurrentContext(ptid); + + time = vt_pform_wtime(); + vt_enter(ptid, &time, vt_cupticb_rid_sync); + + if(vt_cupticb_event_sampling){ + CUresult ret = CUDA_SUCCESS; + /* sampling of CUPTI counter values */ + do{ + time = vt_pform_wtime(); + vt_cuptievt_writeCounter(vtcuptiCtx, tid, &time); + ret = cuStreamQuery(vtStrm->stream); + }while(ret != CUDA_SUCCESS); + }else{ + CHECK_CU_ERROR(cuCtxSynchronize(), NULL); + } + + time = vt_pform_wtime(); + vt_cuptievt_writeCounter(vtcuptiCtx, tid, &time); + vt_exit(ptid, &time); + }else{ + /*SUSPEND_CALLBACKS(vtCtx);*/ + if(vt_cupticb_syncLevel > 0){ + time = vt_pform_wtime(); + vt_enter(ptid, &time, vt_cupticb_rid_sync); + CHECK_CU_ERROR(cuCtxSynchronize(), NULL); + time = vt_pform_wtime(); + vt_exit(ptid, &time); + } + /*RESUME_CALLBACKS(vtCtx);*/ + } + + /* write VT kernel stop events */ + vt_count(tid, &time, vt_cupticb_cid_blocksPerGrid, 0); + vt_count(tid, &time, vt_cupticb_cid_threadsPerBlock, 0); + vt_count(tid, &time, vt_cupticb_cid_threadsPerKernel, 0); + + vt_exit(tid, &time); + + if(vt_cupticb_trace_gpu_idle){ + vt_enter(vtCtx->strmList->tid, &time, vt_cupticb_rid_idle); + } + } +} + +/* + * Increases the "Allocated CUDA memory" counter. + * + * @param ctxUID CUDA context identifier (@see CUPTI callback info) + * @param devPtr pointer to the allocated memory (needed for vtcudaFree()) + * @param size the number of bytes allocated + */ +static void vt_cupticb_handle_malloc(uint64_t ctxUID, void *devPtr, + size_t size) +{ + uint64_t vtTime; + vt_cupticb_ctx_t *vtCtx = vt_cupticb_checkCtx(ctxUID); + vt_cupticb_gpumem_t *vtMalloc = (vt_cupticb_gpumem_t*)malloc(sizeof(vt_cupticb_gpumem_t)); + + vtMalloc->memPtr = devPtr; + vtMalloc->size = size; + + /* add malloc entry to list */ + vtMalloc->next = vtCtx->gpuMemList; + vtCtx->gpuMemList = vtMalloc; + + /* increase allocated memory counter */ + vtCtx->gpuMemAllocated += size; + + /* check if first CUDA stream is available */ + if(vtCtx->strmList == NULL){ + VT_CHECK_THREAD; + vt_cupticb_checkStream(VT_MY_THREAD, vtCtx, NULL); + } + + /* write counter value */ + vtTime = vt_pform_wtime(); + vt_count(vtCtx->strmList->tid, &vtTime, vt_cupticb_cid_cudaMalloc, + (uint64_t)(vtCtx->gpuMemAllocated)); +} + +/* + * Decreases the "Allocated CUDA memory" counter. + * + * @param ctxUID CUDA context identifier (@see CUPTI callback info) + * @param devPtr pointer to the allocated memory + */ +static void vt_cupticb_handle_free(uint64_t ctxUID, void *devPtr) +{ + uint64_t vtTime; + vt_cupticb_ctx_t *vtCtx = vt_cupticb_checkCtx(ctxUID); + vt_cupticb_gpumem_t *curMalloc = NULL; + vt_cupticb_gpumem_t *lastMalloc = NULL; + + if(devPtr == NULL) return; + + curMalloc = vtCtx->gpuMemList; + lastMalloc = vtCtx->gpuMemList; + + while(curMalloc != NULL){ + if(devPtr == curMalloc->memPtr){ + + /* decrease allocated counter value and write it */ + vtTime = vt_pform_wtime(); + vtCtx->gpuMemAllocated -= curMalloc->size; + vt_count(vtCtx->strmList->tid, &vtTime, vt_cupticb_cid_cudaMalloc, + (uint64_t)(vtCtx->gpuMemAllocated)); + + + /* set pointer over current element to next one */ + lastMalloc->next = curMalloc->next; + + /* if current element is the first list entry, set the list entry */ + if(curMalloc == vtCtx->gpuMemList){ + vtCtx->gpuMemList = curMalloc->next; + } + + /* free VT memory of CUDA malloc */ + curMalloc->next = NULL; + free(curMalloc); + curMalloc = NULL; + + /* set mallocList to NULL, if last element freed */ + if(vtCtx->gpuMemAllocated == 0) { + vtCtx->gpuMemList = NULL; + } + return; + } + + lastMalloc = curMalloc; + curMalloc = curMalloc->next; + } + + vt_warning("[CUPTICB] free CUDA memory, which has not been allocated!"); +} + +/* + * Handle synchronous CUDA runtime memory copy calls. + * + * @param cbInfo information about the callback + * @param kind + * @param bytes + * @param time + */ +static void vt_cupticb_handle_cudart_memcpy( + const CUpti_CallbackData *cbInfo, + enum cudaMemcpyKind kind, + uint64_t bytes, uint64_t time) +{ + uint32_t strmID; + uint32_t ptid; + + VT_CHECK_THREAD; + ptid = VT_MY_THREAD; + + if(cbInfo->callbackSite == CUPTI_API_ENTER){ + /* get the VampirTrace thread ID the kernel is running on */ + { + vt_cupticb_ctx_t *vtCtx = vt_cupticb_checkCtx(cbInfo->contextUid); + vt_cupticb_strm_t *vtStrm = vt_cupticb_checkStream(ptid, vtCtx, NULL); + + strmID = vtStrm->tid; + + /* save address into 64 Bit correlation value for exit callback */ + *cbInfo->correlationData = (uint64_t)vtStrm; + + /* synchronize to get host waiting time */ + /*DISABLE_CUDART_DOMAIN(); + DISABLE_CUDART_CALLBACK(CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020); + SUSPEND_CALLBACKS(vtCtx);*/ + if(vt_cupticb_syncLevel > 0){ + if(vt_cupticb_syncLevel > 1) vt_enter(ptid, &time, vt_cupticb_rid_sync); + CHECK_CU_ERROR(cuCtxSynchronize(), NULL); + time = vt_pform_wtime(); + if(vt_cupticb_syncLevel > 1) vt_exit(ptid, &time); + } + /*RESUME_CALLBACKS(vtCtx); + ENABLE_CUDART_CALLBACKS(); + ENABLE_CUDART_CALLBACK(CUPTI_RUNTIME_TRACE_CBID_cudaDeviceSynchronize_v3020); + */ + } + + CUPTI_CB_LOCK(); + if(kind != cudaMemcpyDeviceToDevice) vt_gpu_prop[ptid] |= VTGPU_GPU_COMM; + vt_gpu_prop[strmID] |= VTGPU_GPU_COMM; + CUPTI_CB_UNLOCK(); + + /*time = vt_pform_wtime();*/ + if(kind == cudaMemcpyHostToDevice){ + vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, + vt_gpu_commCID, 0, bytes); + }else if(kind == cudaMemcpyDeviceToHost){ + vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, + vt_gpu_commCID, 0, bytes); + }else if(kind == cudaMemcpyDeviceToDevice){ + vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, + vt_gpu_commCID, 0, bytes); + } + } + + if(cbInfo->callbackSite == CUPTI_API_EXIT){ + strmID = ((vt_cupticb_strm_t *)(*cbInfo->correlationData))->tid; + + /*time = vt_pform_wtime();*/ + if(kind == cudaMemcpyDeviceToDevice){ + vt_mpi_rma_end(strmID, &time, vt_gpu_commCID, 0); + }else if(kind != cudaMemcpyHostToHost){ + vt_mpi_rma_end(ptid, &time, vt_gpu_commCID, 0); + } + } +} + +/* + * Handle asynchronous CUDA runtime memory copy calls. + * + * @param cbInfo information about the callback + * @param kind the direction of the transfer + * @param bytes the number of transfered bytes + * @param cuStrm the CUDA stream + */ +static void vt_cupticb_handle_cudart_mcpyAsync(const CUpti_CallbackData *cbInfo, + enum cudaMemcpyKind kind, + uint64_t bytes, + cudaStream_t cuStrm) +{ + uint32_t strmID; + uint32_t ptid; + uint64_t time; + + if(!vt_cupticb_trace_mcpyAsync) return; + + VT_CHECK_THREAD; + ptid = VT_MY_THREAD; + + if(cbInfo->callbackSite == CUPTI_API_ENTER){ + /* get the VampirTrace thread ID the kernel is running on */ + { + vt_cupticb_ctx_t *vtCtx = vt_cupticb_checkCtx(cbInfo->contextUid); + vt_cupticb_strm_t *vtStrm = vt_cupticb_checkStream(ptid, vtCtx, &cuStrm); + + strmID = vtStrm->tid; + + /* save address into 64 Bit correlation value for exit callback */ + *cbInfo->correlationData = (uint64_t)vtStrm; + } + + CUPTI_CB_LOCK(); + if(kind != cudaMemcpyDeviceToDevice) vt_gpu_prop[ptid] |= VTGPU_GPU_COMM; + vt_gpu_prop[strmID] |= VTGPU_GPU_COMM; + CUPTI_CB_UNLOCK(); + + time = vt_pform_wtime(); + if(kind == cudaMemcpyHostToDevice){ + vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, + vt_gpu_commCID, 0, bytes); + }else if(kind == cudaMemcpyDeviceToHost){ + vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, + vt_gpu_commCID, 0, bytes); + }else if(kind == cudaMemcpyDeviceToDevice){ + vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, + vt_gpu_commCID, 0, bytes); + } + } + + if(cbInfo->callbackSite == CUPTI_API_EXIT){ + strmID = ((vt_cupticb_strm_t *)(*cbInfo->correlationData))->tid; + + time = vt_pform_wtime(); + + /* synchronize to get host waiting time */ + if(vt_cupticb_syncLevel > 0){ + if(vt_cupticb_syncLevel > 1) vt_enter(ptid, &time, vt_cupticb_rid_sync); + CHECK_CU_ERROR(cuCtxSynchronize(), NULL); + time = vt_pform_wtime(); + if(vt_cupticb_syncLevel > 1) vt_exit(ptid, &time); + } + + if(kind == cudaMemcpyDeviceToDevice){ + vt_mpi_rma_end(strmID, &time, vt_gpu_commCID, 0); + }else if(kind != cudaMemcpyHostToHost){ + vt_mpi_rma_end(ptid, &time, vt_gpu_commCID, 0); + } + } +} + +static void vt_cupti_callback_finalizeContext(vt_cupticb_ctx_t *vtCtx) +{ + uint32_t ptid; + + VT_CHECK_THREAD; + ptid = VT_MY_THREAD; + + if(vt_cupticb_trace_events && vt_gpu_debug == 0){ + uint64_t time = vt_pform_wtime(); + vt_cupticb_strm_t *curStrm = vtCtx->strmList; + vt_cuptievt_ctx_t* vtcuptiCtx = vt_cuptievt_getCurrentContext(ptid); + + while(curStrm != NULL){ + vt_cuptievt_resetCounter(vtcuptiCtx, curStrm->tid, &time); + curStrm = curStrm->next; + } + + vt_cuptievt_finalize_device(ptid, 0); + } + + /* write idle end time to CUDA stream 0 */ + if(vt_cupticb_trace_gpu_idle){ + uint64_t idle_end = vt_pform_wtime(); + vt_exit(vtCtx->strmList->tid, &idle_end); + } + + /* cleanup stream list */ + if(vtCtx->strmList != NULL){ + free(vtCtx->strmList); + vtCtx->strmList = NULL; + } +} + +/* -------------START: Implementation of public functions ------------------ */ +/* ------------------------------------------------------------------------- */ + +/** + * Initialize the VampirTrace CUPTI callback implementation. + */ +void vt_cupti_callback_init() +{ + if(!vt_cupticb_initialized){ +#if (defined(VT_MT) || defined(VT_HYB)) + VTThrd_createMutex(&VTThrdMutexCuptiCB); +#endif + CUPTI_CB_LOCK(); + if(!vt_cupticb_initialized){ + vt_cntl_msg(2, "[CUPTI Callback] Initializing ... "); + + /* get some environment variables */ + vt_cupticb_trace_cudart = (uint8_t)vt_env_cudarttrace(); + vt_cupticb_trace_kernels = (uint8_t)vt_env_cudatrace_kernel(); + vt_cupticb_trace_mcpyAsync = (uint8_t)vt_env_cudatrace_memcpyasync(); + vt_cupticb_trace_gpu_mem = (uint8_t)vt_env_cudatrace_gpumem(); + vt_cupticb_trace_gpu_idle = (uint8_t)vt_env_cudatrace_idle(); + vt_cupticb_syncLevel = (uint8_t)vt_env_cudatrace_sync(); + + /* check for CUPTI event capturing */ + if(vt_env_cupti_metrics() == NULL){ + vt_cupticb_trace_events = 0; + }else{ + vt_cupticb_trace_events = 1; + vt_cupticb_event_sampling = (uint8_t)vt_env_cupti_sampling(); + } + + #if (defined(VT_MT) || defined(VT_HYB)) + VTTHRD_LOCK_IDS(); + #endif + + /* initialize GPU common stuff */ + vt_gpu_init(); + + /* get global counter group IDs */ + { + uint32_t cgid_kn = vt_def_counter_group(VT_MASTER_THREAD, "CUDA_KERNEL"); + + vt_cupticb_cid_blocksPerGrid = vt_def_counter(VT_MASTER_THREAD, + "blocks_per_grid", "#", + VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, 0); + vt_cupticb_cid_threadsPerBlock = vt_def_counter(VT_MASTER_THREAD, + "threads_per_block", "#", + VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, 0); + vt_cupticb_cid_threadsPerKernel = vt_def_counter(VT_MASTER_THREAD, + "threads_per_kernel", "#", + VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, cgid_kn, 0); + } + + if(vt_cupticb_trace_gpu_mem){ + vt_cupticb_cid_cudaMalloc = vt_def_counter(VT_MASTER_THREAD, + "gpu_mem_usage", "Bytes", + VT_CNTR_ABS | VT_CNTR_NEXT | VT_CNTR_UNSIGNED, + vt_def_counter_group(VT_MASTER_THREAD, "CUDA_MEMORY_USAGE"), + 0); + } + + /* get global region IDs */ + vt_cupticb_rid_sync = vt_def_region(VT_MASTER_THREAD, "cudaSynchronize", + VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "CUDA_SYNC", VT_FUNCTION); + + + + if(vt_cupticb_trace_gpu_idle){ + vt_cupticb_rid_idle = vt_def_region(VT_MASTER_THREAD, "gpu_idle", + VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "CUDA_IDLE", VT_FUNCTION); + } + + #if (defined(VT_MT) || defined(VT_HYB)) + VTTHRD_UNLOCK_IDS(); + #endif + + /* set callback for CUDA runtime API functions */ + if(vt_cupticb_trace_cudart){ + vt_cupti_set_callback(&vt_cupticb_cudart_subscriber, + vt_cupticb_cudart_ptr, + CUPTI_CB_DOMAIN_RUNTIME_API, + CUPTI_RUNTIME_TRACE_CBID_INVALID); + } + + /* reset the hash table for CUDA API functions */ + memset(vt_cupticb_cudaApiHtab, VT_NO_ID, + VT_CUPTICB_CUDA_API_HASH_MAX * sizeof(uint32_t)); + + /* TODO: check exit handler problems with CUPTI events */ + if(vt_cupticb_trace_events) vt_gpu_debug = 1; + + /* register the finalize function of VampirTrace CUPTI to be called before + * the program exits */ + atexit(vt_cupti_callback_finalize); + + vt_cupticb_initialized = 1; + CUPTI_CB_UNLOCK(); + } + } +} + +/** + * Finalize the VampirTrace CUPTI callback implementation. + */ +void vt_cupti_callback_finalize() +{ + if(!vt_cupticb_finalized){ + + CUPTI_CB_LOCK(); + if(!vt_cupticb_finalized){ + + vt_cntl_msg(2, "[CUPTI CALLBACK] Finalizing ... "); + + if(vt_cupticb_trace_cudart){ + CHECK_CUPTI_ERROR(cuptiUnsubscribe(vt_cupticb_cudart_subscriber), + "cuptiUnsubscribe"); + } + + /* clean up the VampirTrace CUDA context list */ + while(vt_cupticb_ctxList != NULL){ + vt_cupticb_ctx_t *vtCtx = vt_cupticb_ctxList; + + vt_cupticb_ctxList = vt_cupticb_ctxList->next; + + vt_cupti_callback_finalizeContext(vtCtx); + + free(vtCtx); + } + + if(vt_cupticb_trace_events) vt_cupti_events_finalize(); + + vt_cupticb_hashClear(); + vt_gpu_finalize(); + + vt_cupticb_finalized = 1; + CUPTI_CB_UNLOCK(); + +#if (defined(VT_MT) || defined (VT_HYB)) + VTTHRD_LOCK_ENV(); + VTThrd_deleteMutex(&VTThrdMutexCuptiCB); + VTTHRD_UNLOCK_ENV(); +#endif /* VT_MT || VT_HYB */ + } + } +} diff --git a/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.h b/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.h new file mode 100644 index 0000000000..ec1c5c8844 --- /dev/null +++ b/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.h @@ -0,0 +1,26 @@ +/** + * VampirTrace + * http://www.tu-dresden.de/zih/vampirtrace + * + * Copyright (c) 2005-2011, ZIH, TU Dresden, Federal Republic of Germany + * + * Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing + * Centre, Federal Republic of Germany + * + * See the file COPYING in the package base directory for details + **/ + +#ifndef VT_CUPTI_CALLBACK_H +#define VT_CUPTI_CALLBACK_H + +#ifdef __cplusplus +# define EXTERN extern "C" +#else +# define EXTERN extern +#endif + +EXTERN void vt_cupti_callback_init(void); +EXTERN void vt_cupti_callback_finalize(void); + +#endif /* VT_CUPTI_CALLBACK_H */ + diff --git a/ompi/contrib/vt/vt/vtlib/vt_cudacupti.c b/ompi/contrib/vt/vt/vtlib/vt_cupti_events.c similarity index 52% rename from ompi/contrib/vt/vt/vtlib/vt_cudacupti.c rename to ompi/contrib/vt/vt/vtlib/vt_cupti_events.c index 5e07040a27..6ce75484c7 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_cudacupti.c +++ b/ompi/contrib/vt/vt/vtlib/vt_cupti_events.c @@ -13,105 +13,237 @@ #include "vt_env.h" /* get environment variables */ #include "vt_pform.h" /* VampirTrace time measurement */ #include "vt_trc.h" /* VampirTrace events */ -#include "vt_cudacupti.h" +#include "vt_cupti_events.h" #include "vt_gpu.h" #include -#define CHECK_CUPTI_ERROR(err, cuptifunc) \ - if(err != CUPTI_SUCCESS){ \ - const char *errstr; \ - cuptiGetResultString(err, &errstr); \ - vt_error_msg("[CUPTI] %s:%d:%s:'%s'", \ - __FILE__, __LINE__, cuptifunc, errstr); \ - } - -#define PRINT_CUPTI_ERROR(err, cuptifunc){ \ - const char *errstr; \ - cuptiGetResultString(err, &errstr); \ - vt_warning("[CUPTI] %s:%d:%s:'%s'", \ - __FILE__, __LINE__, cuptifunc, errstr); \ - } - /* Mutex for locking the CUPTI environment */ #if (defined(VT_MT) || defined(VT_HYB)) static VTThrdMutex* VTThrdMutexCupti = NULL; -# define CUPTI_LOCK() VTThrd_lock(&VTThrdMutexCupti) -# define CUPTI_UNLOCK() VTThrd_unlock(&VTThrdMutexCupti) +# define VT_CUPTIEVT_LOCK() VTThrd_lock(&VTThrdMutexCupti) +# define VT_CUPTIEVT_UNLOCK() VTThrd_unlock(&VTThrdMutexCupti) #else /* VT_MT || VT_HYB */ -# define CUPTI_LOCK() -# define CUPTI_UNLOCK() +# define VT_CUPTIEVT_LOCK() +# define VT_CUPTIEVT_UNLOCK() #endif /* VT_MT || VT_HYB */ -static uint32_t rid_cupti_init; -static uint8_t vt_cupti_initialized = 0; -static uint8_t vt_cupti_finalized = 0; +/* check return values for CUPTI calls */ +#define PRINT_CUPTI_ERROR(err, _msg){ \ + const char *errstr; \ + cuptiGetResultString(err, &errstr); \ + vt_warning("[CUPTI EVENTS] %s:%d:%s:'%s'", \ + __FILE__, __LINE__, _msg, errstr); \ + } + +#define VT_CUPTI_CALL(_err, _msg) \ + if(_err != CUPTI_SUCCESS){ \ + vt_cupti_handleError(_err, _msg,__FILE__, __LINE__); \ + } + +/* some of CUPTI API functions have changed */ +#if (defined(CUPTI_API_VERSION) && (CUPTI_API_VERSION >= 2)) + +# define VTCUPTIEVENTGETATTRIBUTE(_cuDev, _cuptiEvtID, _cuptiAttr, _valueSize, \ + _value) \ + VT_CUPTI_CALL(\ + cuptiEventGetAttribute(_cuptiEvtID, _cuptiAttr, _valueSize, _value), \ + "cuptiEventGetAttribute") + +# define VTCUPTIEVENTDOMAINGETNUMEVENTS(_cuDev, _cuptiDomain, _numEvts) \ + VT_CUPTI_CALL(\ + cuptiEventDomainGetNumEvents(_cuptiDomain, _numEvts), \ + "cuptiEventDomainGetNumEvents") + +# define VTCUPTIEVENTDOMAINENUMEVENTS(_cuDev, _cuptiDomain, _valueSize, _value)\ + VT_CUPTI_CALL(\ + cuptiEventDomainEnumEvents(_cuptiDomain, _valueSize, _value), \ + "cuptiEventDomainEnumEvents") + +#else + +# define VTCUPTIEVENTGETATTRIBUTE(_cuDev, _cuptiEvtID, _cuptiAttr, _valueSize, \ + _value) \ + VT_CUPTI_CALL(\ + cuptiEventGetAttribute(_cuDev, _cuptiEvtID, _cuptiAttr, _valueSize,_value),\ + "cuptiEventGetAttribute") + +# define VTCUPTIEVENTDOMAINGETNUMEVENTS(_cuDev, _cuptiDomain, _numEvts) \ + VT_CUPTI_CALL(\ + cuptiEventDomainGetNumEvents(_cuDev, _cuptiDomain, _numEvts), \ + "cuptiEventDomainGetNumEvents") + +# define VTCUPTIEVENTDOMAINENUMEVENTS(_cuDev, _cuptiDomain, _valueSize, _value)\ + VT_CUPTI_CALL(\ + cuptiEventDomainEnumEvents(_cuDev, _cuptiDomain, _valueSize, _value), \ + "cuptiEventDomainEnumEvents") + +#endif + +static uint32_t vt_cuptievt_rid_init; +static uint8_t vt_cuptievt_initialized = 0; +static uint8_t vt_cuptievt_finalized = 0; /* VampirTrace counter group ID */ -static uint32_t cgid_cupti; +static uint32_t vt_cuptievt_cgid; -static vt_cupti_dev_t *vt_cupti_capList = NULL; -static vt_cupti_ctx_t *vtcuptiCtxlist = NULL; +static vt_cuptievt_dev_t *vtcuptievtCapList = NULL; +static vt_cuptievt_ctx_t *vtcuptievtCtxList = NULL; /***** --- Declaration of internally used functions --- *****/ /* * Enables the recording of CUPTI counters. Either thread if or pointer to the * host thread structure has to be given. + * + * @param vtcuptiCtx pointer to the VampirTrace CUPTI context */ -static void vt_cupti_start(vt_cupti_ctx_t *vtcuptiCtx); +static void vt_cuptievt_start(vt_cuptievt_ctx_t *vtcuptiCtx); /* * Disables recording of CUPTI counters. + * + * @param vtcuptiCtx pointer to the VampirTrace CUPTI context */ -static void vt_cupti_stop(vt_cupti_ctx_t *vtcuptiCtx); +static void vt_cuptievt_stop(vt_cuptievt_ctx_t *vtcuptiCtx); + +/* + * Initialize a VampirTrace CUPTI context. + * + * @param ptid the VampirTrace process/thread id + * @param cuCtx the CUDA context + * + * @return pointer to the created VampirTrace CUPTI context + */ +static vt_cuptievt_ctx_t* vt_cuptievt_initCtx(uint32_t ptid, CUcontext cuCtx); /* * Get to current VampirTrace CUPTI context or create a new one, if CUDA context * is not registered yet. * + * @param cuCtx the CUDA context to lookup the VampirTrace CUPTI context * @param ptid the VampirTrace thread id of current running thread * * @return the corresponding VampirTrace host thread structure. */ -static vt_cupti_ctx_t* vt_cupti_getCtx(CUcontext cuCtx, uint32_t ptid); +static vt_cuptievt_ctx_t* vt_cuptievt_getCtx(CUcontext cuCtx, uint32_t ptid); /* - * Free CUPTI event group and internally allocated memory for active host thread - * - * @param ptid VampirTrace thread id of current host thread + * Free the memory allocated for the given VampirTrace CUPTI context. + * + * @param vtcuptiCtx pointer to the VampirTrace CUPTI context */ -static void vt_cupti_finish(vt_cupti_ctx_t *vtcuptiCtx); +static void vt_cuptievt_freeCtx(vt_cuptievt_ctx_t *vtcuptiCtx); -static vt_cupti_grp_t* vt_cupti_createEvtGrp(vt_cupti_ctx_t *vtcuptiCtx); +/* + * Remove the given CUDA context from the global VampirTrace CUPTI context list. + * + * @param cuCtx pointer to the CUDA context + * + * @return the removed VampirTrace CUPTI context entry + */ +static vt_cuptievt_ctx_t* vt_cupti_takeCtxFromList(CUcontext *cuCtx); -static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx); -static void vt_cupti_freeCtx(vt_cupti_ctx_t *vtcuptiCtx); +/* + * De-initialize the VampirTrace CUPTI context without destroying it. + * + * @param vtcuptiCtx pointer to the VampirTrace CUPTI context + */ +static void vt_cuptievt_finish(vt_cuptievt_ctx_t *vtcuptiCtx); -static vt_cupti_dev_t* vt_cupti_setupMetricList(void); -static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList); -static vt_cupti_dev_t* vt_cupti_checkMetricList(vt_cupti_dev_t *capList, - int major, int minor); -static void vt_cupti_showAllCounters(CUdevice cuDev); -static vt_cupti_ctx_t* vt_cupti_takeCtxFromList(CUcontext cuCtx); -static void enumEvents(CUdevice cuDev, CUpti_EventDomainID domainId); +/* + * Create a VampirTrace CUPTI event group. + * + * @param vtcuptiCtx pointer to the VampirTrace CUPTI context + * + * @return the created VampirTrace CUPTI event group + */ +static vt_cuptievt_grp_t* vt_cuptievt_createEvtGrp(vt_cuptievt_ctx_t *vtcuptiCtx); + +/* + * Setup a list of devices with different device capabilities and add the + * metrics, which are specified by the user. + * + * @return a list of CUDA devices with different device capabilities + */ +static vt_cuptievt_dev_t* vt_cuptievt_setupMetricList(void); + +/* + * Parse the environment variable for CUPTI metrics (including CUDA device + * capabilities) and fill the capability metric list. + * + * @param capList points to the first element of the capability metric list + */ +static void vt_cupti_fillMetricList(vt_cuptievt_dev_t *capList); + +/* + * Check whether the CUDA device capability is already listed. + * + * @param capList IN: list containing the CUDA device capabilities + * @param major the major CUDA device capability + * @param minor the minor CUDA device capability + * + * @return pointer to the list entry (NULL if not found) + */ +static vt_cuptievt_dev_t* vt_cupti_checkMetricList(vt_cuptievt_dev_t *capList, + int major, int minor); + +/* + * Print all available counters to stdout. + * + * @param capList list of CUDA devices with different capabilities + */ +static void vt_cupti_showAllCounters(vt_cuptievt_dev_t *capList); + +/* + * Print all events for a given CUDA device and CUPTI event domain with name + * and ID. + * + * @param cuDev the CUDA device + * @param domainId the CUPTI event domain ID + */ +static void vt_cuptievt_enumEvents(CUdevice cuDev, CUpti_EventDomainID domainId); /* ------ */ /* ----------------------- internally used functions ----------------------- */ -static vt_cupti_grp_t* vt_cupti_createEvtGrp(vt_cupti_ctx_t *vtcuptiCtx) +/* + * Handles errors returned from CUPTI function calls. + * + * @param ecode the CUDA driver API error code + * @param msg a message to get more detailed information about the error + * @param the corresponding file + * @param the line the error occurred + */ +static void vt_cupti_handleError(CUptiResult err, const char* msg, + const char *file, const int line) +{ + const char *errstr; + + if(msg != NULL) vt_cntl_msg(1, msg); + + cuptiGetResultString(err, &errstr); + + if(vt_gpu_error){ + vt_error_msg("[CUPTI EVENTS] %s:%d:'%s'", file, line, errstr); + }else{ + vt_warning("[CUPTI EVENTS] %s:%d:'%s'", file, line, errstr); + } +} + +static vt_cuptievt_grp_t* vt_cuptievt_createEvtGrp(vt_cuptievt_ctx_t *vtcuptiCtx) { CUptiResult cuptiErr = CUPTI_SUCCESS; - vt_cupti_grp_t *vtcuptiGrp = NULL; + vt_cuptievt_grp_t *vtcuptiGrp = NULL; - vtcuptiGrp = (vt_cupti_grp_t*)malloc(sizeof(vt_cupti_grp_t)); + vtcuptiGrp = (vt_cuptievt_grp_t*)malloc(sizeof(vt_cuptievt_grp_t)); vtcuptiGrp->evtNum = 0; vtcuptiGrp->enabled = 0; vtcuptiGrp->next = NULL; /* create initial CUPTI counter group */ cuptiErr = cuptiEventGroupCreate(vtcuptiCtx->cuCtx, &(vtcuptiGrp->evtGrp), 0); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupCreate"); + VT_CUPTI_CALL(cuptiErr, "cuptiEventGroupCreate"); vtcuptiGrp->cuptiEvtIDs = (CUpti_EventID *)malloc( vtcuptiCtx->vtDevCap->evtNum*sizeof(CUpti_EventID)); @@ -121,11 +253,11 @@ static vt_cupti_grp_t* vt_cupti_createEvtGrp(vt_cupti_ctx_t *vtcuptiCtx) return vtcuptiGrp; } -static void vt_cupti_addEvtGrpsToCtx(vt_cupti_ctx_t *vtcuptiCtx) +static void vt_cupti_addEvtGrpsToCtx(vt_cuptievt_ctx_t *vtcuptiCtx) { CUptiResult cuptiErr = CUPTI_SUCCESS; - vt_cupti_grp_t *vtcuptiGrp = vt_cupti_createEvtGrp(vtcuptiCtx); - vt_cupti_evt_t *vtcuptiEvt = vtcuptiCtx->vtDevCap->vtcuptiEvtList; + vt_cuptievt_grp_t *vtcuptiGrp = vt_cuptievt_createEvtGrp(vtcuptiCtx); + vt_cuptievt_evt_t *vtcuptiEvt = vtcuptiCtx->vtDevCap->vtcuptiEvtList; /* try to add all events for current context/device */ while(vtcuptiEvt != NULL && vtcuptiGrp->evtNum < vtcuptiCtx->vtDevCap->evtNum){ @@ -134,8 +266,8 @@ static void vt_cupti_addEvtGrpsToCtx(vt_cupti_ctx_t *vtcuptiCtx) /* everything is fine */ if(cuptiErr == CUPTI_SUCCESS){ - *(vtcuptiGrp->cuptiEvtIDs) = vtcuptiEvt->cuptiEvtID; - *(vtcuptiGrp->vtCIDs) = vtcuptiEvt->vtCID; + vtcuptiGrp->cuptiEvtIDs[vtcuptiGrp->evtNum] = vtcuptiEvt->cuptiEvtID; + vtcuptiGrp->vtCIDs[vtcuptiGrp->evtNum] = vtcuptiEvt->vtCID; vtcuptiGrp->evtNum++; }else{ /* we can at least try to put the event in another group */ @@ -145,7 +277,7 @@ static void vt_cupti_addEvtGrpsToCtx(vt_cupti_ctx_t *vtcuptiCtx) if(cuptiErr == CUPTI_ERROR_MAX_LIMIT_REACHED || cuptiErr == CUPTI_ERROR_NOT_COMPATIBLE){ - vt_cntl_msg(2, "[CUPTI] Create another event group for event %d", + vt_cntl_msg(2, "[CUPTI EVENTS] Create new event group for event %d", vtcuptiEvt->cuptiEvtID); /* prepend last group to list, if it is not empty */ @@ -155,7 +287,7 @@ static void vt_cupti_addEvtGrpsToCtx(vt_cupti_ctx_t *vtcuptiCtx) } /* create new VampirTrace CUPTI event group */ - vtcuptiGrp = vt_cupti_createEvtGrp(vtcuptiCtx); + vtcuptiGrp = vt_cuptievt_createEvtGrp(vtcuptiCtx); /* try to add the same event to the just created group */ continue; @@ -182,16 +314,16 @@ static void vt_cupti_addEvtGrpsToCtx(vt_cupti_ctx_t *vtcuptiCtx) * * @return the created VampirTrace CUPTI host thread structure */ -static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx) +static vt_cuptievt_ctx_t* vt_cuptievt_initCtx(uint32_t ptid, CUcontext cuCtx) { - vt_cupti_ctx_t *vtcuptiCtx = NULL; + vt_cuptievt_ctx_t *vtcuptiCtx = NULL; uint64_t time; - vt_cntl_msg(2, "[CUPTI] Initializing VampirTrace CUPTI context (ptid=%d)", + vt_cntl_msg(2, "[CUPTI EVENTS] Initializing VampirTrace CUPTI context (ptid=%d)", ptid); time = vt_pform_wtime(); - vt_enter(ptid, &time, rid_cupti_init); + vt_enter(ptid, &time, vt_cuptievt_rid_init); /* do not trace CUDA functions invoked here */ VT_SUSPEND_CUDA_TRACING(ptid); @@ -211,7 +343,7 @@ static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx) CUresult cuErr = CUDA_SUCCESS; int dev_major, dev_minor; CUdevice cuDev = 0; - vt_cupti_dev_t *cuptiDev; + vt_cuptievt_dev_t *cuptiDev; CHECK_CU_ERROR(cuCtxGetDevice(&cuDev), "cuCtxGetDevice"); @@ -219,13 +351,13 @@ static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx) CHECK_CU_ERROR(cuErr, "cuDeviceComputeCapability"); /* check if device capability already listed */ - CUPTI_LOCK(); - cuptiDev = vt_cupti_capList; - CUPTI_UNLOCK(); + VT_CUPTIEVT_LOCK(); + cuptiDev = vtcuptievtCapList; + VT_CUPTIEVT_UNLOCK(); cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor); if(cuptiDev){ - vtcuptiCtx = (vt_cupti_ctx_t*)malloc(sizeof(vt_cupti_ctx_t)); + vtcuptiCtx = (vt_cuptievt_ctx_t*)malloc(sizeof(vt_cuptievt_ctx_t)); if(vtcuptiCtx == NULL) vt_error_msg("malloc(sizeof(VTCUPTIhostThrd)) failed!"); vtcuptiCtx->cuCtx = cuCtx; @@ -256,10 +388,10 @@ static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx) } /* add VampirTrace CUPTI context entry to list (as first element) */ - CUPTI_LOCK(); - vtcuptiCtx->next = vtcuptiCtxlist; - vtcuptiCtxlist = vtcuptiCtx; - CUPTI_UNLOCK(); + VT_CUPTIEVT_LOCK(); + vtcuptiCtx->next = vtcuptievtCtxList; + vtcuptievtCtxList = vtcuptiCtx; + VT_CUPTIEVT_UNLOCK(); time = vt_pform_wtime(); vt_exit(ptid, &time); @@ -267,9 +399,9 @@ static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx) return vtcuptiCtx; } -static void vt_cupti_freeCtx(vt_cupti_ctx_t *vtcuptiCtx) +static void vt_cuptievt_freeCtx(vt_cuptievt_ctx_t *vtcuptiCtx) { - vt_cupti_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList; + vt_cuptievt_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList; while(vtcuptiGrp != NULL){ free(vtcuptiGrp->cuptiEvtIDs); @@ -292,39 +424,40 @@ static void vt_cupti_freeCtx(vt_cupti_ctx_t *vtcuptiCtx) * @param cuCtx the CUDA context * @param ptid the active VampirTrace thread id */ -static vt_cupti_ctx_t* vt_cupti_getCtx(CUcontext cuCtx, uint32_t ptid) +static vt_cuptievt_ctx_t* vt_cuptievt_getCtx(CUcontext cuCtx, uint32_t ptid) { - vt_cupti_ctx_t *vtcuptiCtx = NULL; + vt_cuptievt_ctx_t *vtcuptiCtx = NULL; /* check, if there has been at least one VampirTrace CUPTI context created */ - if(vtcuptiCtxlist == NULL) vt_cupti_init(); + if(vtcuptievtCtxList == NULL) vt_cupti_events_init(); /* check, if the current VampirTrace thread is enabled for GPU counters */ if((vt_gpu_prop[ptid] & VTGPU_NO_PC) == VTGPU_NO_PC) return NULL; /* check if CUDA context is listed (linear search) */ - CUPTI_LOCK(); - vtcuptiCtx = vtcuptiCtxlist; + VT_CUPTIEVT_LOCK(); + vtcuptiCtx = vtcuptievtCtxList; while(vtcuptiCtx != NULL){ if(vtcuptiCtx->cuCtx == cuCtx){ - CUPTI_UNLOCK(); - /*vt_cntl_msg(1, "[CUPTI] host thread %d (MPI rank %d)", ptid, vt_my_trace);*/ + VT_CUPTIEVT_UNLOCK(); + /*vt_cntl_msg(1, "[CUPTI EVENTS] host thread %d (MPI rank %d)", ptid, vt_my_trace);*/ return vtcuptiCtx; } vtcuptiCtx = vtcuptiCtx->next; } - CUPTI_UNLOCK(); + VT_CUPTIEVT_UNLOCK(); - vt_cntl_msg(2, "[CUPTI] Context for VT tid %d unknown! Creating ... ", ptid); + vt_cntl_msg(2, "[CUPTI EVENTS] Context for VT tid=%d unknown! Creating ... ", + ptid); - vtcuptiCtx = vt_cupti_initCtx(ptid, cuCtx); + vtcuptiCtx = vt_cuptievt_initCtx(ptid, NULL); if(vtcuptiCtx != NULL){ - vt_cupti_start(vtcuptiCtx); + vt_cuptievt_start(vtcuptiCtx); }else{ /* no performance counters for this thread available */ vt_gpu_prop[ptid] |= VTGPU_NO_PC; - vt_cntl_msg(2, "[CUPTI] Could not initialize!"); + vt_cntl_msg(2, "[CUPTI EVENTS] Could not initialize!"); } return vtcuptiCtx; @@ -336,7 +469,7 @@ static vt_cupti_ctx_t* vt_cupti_getCtx(CUcontext cuCtx, uint32_t ptid) * * @param capList points to the first element of the capability metric list */ -static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) +static void vt_cupti_fillMetricList(vt_cuptievt_dev_t *capList) { char *metricString = vt_env_cupti_metrics(); char *metric_sep = vt_env_metrics_sep(); @@ -346,8 +479,8 @@ static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) while (metric != NULL){ CUptiResult cuptiErr = CUPTI_SUCCESS; - vt_cupti_dev_t *cuptiDev = NULL; - vt_cupti_evt_t *vtcuptiEvt = NULL; + vt_cuptievt_dev_t *cuptiDev = NULL; + vt_cuptievt_evt_t *vtcuptiEvt = NULL; int metr_major = 0; int metr_minor = 0; @@ -358,8 +491,8 @@ static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) metr_minor = atoi(metric_cap+1); metric_cap = strchr(metric_cap+1, '_'); } - - /* check wether device capability is given or not */ + + /* check whether device capability is given or not */ if(metric_cap){ metric = metric_cap + 1; @@ -371,12 +504,13 @@ static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) continue; } - vtcuptiEvt = (vt_cupti_evt_t*)malloc(sizeof(vt_cupti_evt_t)); + vtcuptiEvt = (vt_cuptievt_evt_t*)malloc(sizeof(vt_cuptievt_evt_t)); cuptiErr = cuptiEventGetIdFromName(cuptiDev->cuDev, metric, &vtcuptiEvt->cuptiEvtID); if(cuptiErr != CUPTI_SUCCESS){ - vt_warning("Skipping invalid event name: %s", metric); - vt_cupti_showAllCounters(cuptiDev->cuDev); + if(!strncmp(metric, "help", 4)) vt_cupti_showAllCounters(capList); + vt_warning("[CUPTI EVENTS] Skipping invalid event '%s' for device %d", + metric, cuptiDev->cuDev); metric = strtok(NULL, metric_sep); continue; } @@ -385,8 +519,8 @@ static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif - vtcuptiEvt->vtCID = vt_def_counter(VT_MASTER_THREAD, metric, - VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, cgid_cupti, ""); + vtcuptiEvt->vtCID = vt_def_counter(VT_MASTER_THREAD, metric, "#", + VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, vt_cuptievt_cgid, 0); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif @@ -394,34 +528,37 @@ static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) cuptiDev->evtNum++; vtcuptiEvt->next = cuptiDev->vtcuptiEvtList; cuptiDev->vtcuptiEvtList = vtcuptiEvt; - }else{ /* try to add metric to all devices */ - uint32_t cid_metric; - - /* create VampirTrace counter ID */ -#if (defined(VT_MT) || defined(VT_HYB)) - VTTHRD_LOCK_IDS(); -#endif - cid_metric = vt_def_counter(VT_MASTER_THREAD, - metric, VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, cgid_cupti, ""); -#if (defined(VT_MT) || defined(VT_HYB)) - VTTHRD_UNLOCK_IDS(); -#endif + }else{ + /* device capability is not given. Try to add metric to all devices */ + uint32_t cid_metric = VT_NO_ID; cuptiDev = capList; while(cuptiDev != NULL){ - vtcuptiEvt = (vt_cupti_evt_t*)malloc(sizeof(vt_cupti_evt_t)); + vtcuptiEvt = (vt_cuptievt_evt_t*)malloc(sizeof(vt_cuptievt_evt_t)); cuptiErr = cuptiEventGetIdFromName(cuptiDev->cuDev, metric, &vtcuptiEvt->cuptiEvtID); if(cuptiErr != CUPTI_SUCCESS){ - vt_cntl_msg(2, "[CUPTI] Skipping event '%s' for device %d", - metric, cuptiDev->cuDev); + if(!strncmp(metric, "help", 4)) vt_cupti_showAllCounters(capList); + vt_warning("[CUPTI EVENTS] Skipping invalid event '%s' for device %d", + metric, cuptiDev->cuDev); }else{ + /* create VampirTrace counter ID, if not yet done for other device */ + if(cid_metric == VT_NO_ID){ +#if (defined(VT_MT) || defined(VT_HYB)) + VTTHRD_LOCK_IDS(); +#endif + cid_metric = vt_def_counter(VT_MASTER_THREAD, metric, "#", + VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, vt_cuptievt_cgid, 0); +#if (defined(VT_MT) || defined(VT_HYB)) + VTTHRD_UNLOCK_IDS(); +#endif + } + cuptiDev->evtNum++; vtcuptiEvt->vtCID = cid_metric; vtcuptiEvt->next = cuptiDev->vtcuptiEvtList; cuptiDev->vtcuptiEvtList = vtcuptiEvt; - } cuptiDev = cuptiDev->next; @@ -433,7 +570,7 @@ static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) } /* - * Check wether the CUDA device capability is already listed. + * Check whether the CUDA device capability is already listed. * * @param capList IN: list containing the CUDA device capabilities * @param major the major CUDA device capability @@ -441,10 +578,10 @@ static void vt_cupti_fillMetricList(vt_cupti_dev_t *capList) * * @return pointer to the list entry (NULL if not found) */ -static vt_cupti_dev_t* vt_cupti_checkMetricList(vt_cupti_dev_t *capList, +static vt_cuptievt_dev_t* vt_cupti_checkMetricList(vt_cuptievt_dev_t *capList, int major, int minor) { - vt_cupti_dev_t *cuptiDev; + vt_cuptievt_dev_t *cuptiDev; /* check if device capability is already listed and return it if found */ cuptiDev = capList; @@ -459,33 +596,31 @@ static vt_cupti_dev_t* vt_cupti_checkMetricList(vt_cupti_dev_t *capList, } /* + * Setup a list of devices with different device capabilities and add the + * metrics, which are specified by the user. * + * @return a list of CUDA devices with different device capabilities */ -static vt_cupti_dev_t* vt_cupti_setupMetricList(void) +static vt_cuptievt_dev_t* vt_cuptievt_setupMetricList(void) { CUresult err; int deviceCount, id; - vt_cupti_dev_t *capList = NULL; + vt_cuptievt_dev_t *capList = NULL; /* CUDA initialization */ - err = cuInit( 0 ); - if ( err != CUDA_SUCCESS ) { - printf( "Initialization of CUDA library failed.\n" ); - exit( EXIT_FAILURE ); - } + CHECK_CU_ERROR(cuInit(0), "cuInit"); - /* How many gpgpu devices do we have? */ + /* How many GPGPU devices do we have? */ err = cuDeviceGetCount( &deviceCount ); CHECK_CU_ERROR(err, "cuDeviceGetCount"); if(deviceCount == 0){ - printf("[CUPTI]There is no device supporting CUDA.\n"); - exit(EXIT_FAILURE); + vt_error_msg("[CUPTI EVENTS] There is no device supporting CUDA."); } /* create list with available compute capabilities */ for(id = 0; id < deviceCount; id++){ CUdevice cuDev; - vt_cupti_dev_t *cuptiDev; + vt_cuptievt_dev_t *cuptiDev; int dev_major, dev_minor; err = cuDeviceGet(&cuDev, id); @@ -499,7 +634,7 @@ static vt_cupti_dev_t* vt_cupti_setupMetricList(void) if(cuptiDev == NULL){ /* allocate memory for device list entry */ - cuptiDev = (vt_cupti_dev_t *)malloc(sizeof(vt_cupti_dev_t)); + cuptiDev = (vt_cuptievt_dev_t *)malloc(sizeof(vt_cuptievt_dev_t)); cuptiDev->dev_major = dev_major; cuptiDev->dev_minor = dev_minor; cuptiDev->cuDev = cuDev; @@ -517,11 +652,11 @@ static vt_cupti_dev_t* vt_cupti_setupMetricList(void) /* cleanup list: remove entries, which don't have metrics */ { - vt_cupti_dev_t *curr = capList; - vt_cupti_dev_t *last = capList; + vt_cuptievt_dev_t *curr = capList; + vt_cuptievt_dev_t *last = capList; while(curr != NULL){ - vt_cupti_dev_t *freeDev = curr; + vt_cuptievt_dev_t *freeDev = curr; curr = curr->next; if(freeDev->evtNum == 0){ @@ -540,118 +675,143 @@ static vt_cupti_dev_t* vt_cupti_setupMetricList(void) } /* - * Enumerate/Print the available CUPTI events for a given CUDA device and - * domain. - * + * Print all events for a given CUDA device and CUPTI event domain with name + * and ID. + * * @param cuDev the CUDA device - * @param domainId the CUPTI event domain + * @param domainId the CUPTI event domain ID */ -static void enumEvents(CUdevice cuDev, CUpti_EventDomainID domainId) +static void vt_cuptievt_enumEvents(CUdevice cuDev, CUpti_EventDomainID domainId) { - CUptiResult cuptiErr = CUPTI_SUCCESS; - /* size_t DESC_SHORT = 512; */ CUpti_EventID *eventId = NULL; uint32_t maxEvents = 0; uint32_t i = 0; size_t size = 0; + uint8_t desc_on = 0; + char *help = vt_env_cupti_metrics(); + + if(!strncmp(&help[4], "_l", 2)) desc_on = 1; + + /*vt_cntl_msg(1, "############ %s", &help[5]);*/ /* query num of events available in the domain */ - cuptiErr = cuptiEventDomainGetNumEvents(cuDev, - (CUpti_EventDomainID)domainId, - &maxEvents); - if(cuptiErr == CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID){ - vt_error_msg("Domain Id %d is not supported by device", domainId); - }else{ - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventDomainGetNumEvents"); - } + VTCUPTIEVENTDOMAINGETNUMEVENTS(cuDev, + (CUpti_EventDomainID)domainId, + &maxEvents); size = sizeof(CUpti_EventID) * maxEvents; eventId = (CUpti_EventID*)malloc(size); - if(eventId == NULL) vt_error_msg("Failed to allocate memory to event ID"); + if(eventId == NULL) vt_error_msg("Failed to allocate memory for event ID"); memset(eventId, 0, size); - cuptiErr = cuptiEventDomainEnumEvents(cuDev, - (CUpti_EventDomainID)domainId, - &size, - eventId); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventDomainEnumEvents"); + VTCUPTIEVENTDOMAINENUMEVENTS(cuDev, + (CUpti_EventDomainID)domainId, + &size, + eventId); /* query event info */ { size_t NAME_SHORT = 32; + size_t DESC_SHORT = 2048; char *eventname = (char*)malloc(NAME_SHORT*sizeof(char)); /* event name */ - /*char *shortdesc = malloc(DESC_SHORT*sizeof(char)); short desc of the event */ - + char *shortdesc = NULL; /* short desc of the event */ + + if(desc_on) shortdesc = malloc(DESC_SHORT*sizeof(char)); + for(i = 0; i < maxEvents; i++){ NAME_SHORT = 32; - cuptiErr = cuptiEventGetAttribute(cuDev, - eventId[i], - CUPTI_EVENT_ATTR_NAME, - &NAME_SHORT, - eventname); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGetAttribute"); + DESC_SHORT = 2048; + VTCUPTIEVENTGETATTRIBUTE(cuDev, + eventId[i], + CUPTI_EVENT_ATTR_NAME, + &NAME_SHORT, + eventname); - /*cuptiErr = cuptiEventGetAttribute(cuDev, - eventId[i], - CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, - &DESC_SHORT, - (uint8_t*)shortdesc); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGetAttribute");*/ + if(desc_on){ + VTCUPTIEVENTGETATTRIBUTE(cuDev, + eventId[i], + CUPTI_EVENT_ATTR_LONG_DESCRIPTION, + &DESC_SHORT, + (uint8_t*)shortdesc); + } - vt_cntl_msg(1, "Id:Name = %d: %s", eventId[i], eventname); - /*vt_cntl_msg(1, "Shortdesc = %s\n", shortdesc);*/ + vt_cntl_msg(1, "%d:%s", eventId[i], eventname); + if(desc_on) vt_cntl_msg(1, "%s\n", shortdesc); } free(eventname); + if(desc_on) free(shortdesc); } free(eventId); } /* - * Print all available counters for a given CUDA device to stdout. + * Print all available counters to stdout. * - * @param cuDev the CUDA device + * @param capList list of CUDA devices with different capabilities */ -static void vt_cupti_showAllCounters(CUdevice cuDev) +static void vt_cupti_showAllCounters(vt_cuptievt_dev_t *capList) { CUptiResult cuptiErr = CUPTI_SUCCESS; CUpti_EventDomainID *domainId = NULL; uint32_t maxDomains = 0; uint32_t i; size_t size = 0; + + while(capList != NULL){ + CUdevice cuDev = capList->cuDev; + vt_cntl_msg(1, "[CUPTI EVENTS] Available events for device %d (SM %d.%d):", + cuDev, capList->dev_major, capList->dev_minor); + vt_cntl_msg(1, "Id:Name"); + vt_cntl_msg(1, "Description\n" + "-------------------------------------------------------------------"); + + cuptiErr = cuptiDeviceGetNumEventDomains(cuDev, &maxDomains); + VT_CUPTI_CALL(cuptiErr, "cuptiDeviceGetNumEventDomains"); - cuptiErr = cuptiDeviceGetNumEventDomains(cuDev, &maxDomains); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiDeviceGetNumEventDomains"); + if(maxDomains == 0){ + vt_cntl_msg(1, "[CUPTI EVENTS] No domain is exposed by dev = %d\n", cuDev); + return; + } - if(maxDomains == 0){ - vt_cntl_msg(1, "[CUPTI] No domain is exposed by dev = %d\n", cuDev); - return; + size = sizeof(CUpti_EventDomainID) * maxDomains; + domainId = (CUpti_EventDomainID*)malloc(size); + if(domainId == NULL){ + vt_cntl_msg(1, "[CUPTI EVENTS] Failed to allocate memory to domain ID"); + return; + } + memset(domainId, 0, size); + + cuptiErr = cuptiDeviceEnumEventDomains(cuDev, &size, domainId); + VT_CUPTI_CALL(cuptiErr, "cuptiDeviceEnumEventDomains"); + + /* enum domains */ + for(i = 0; i < maxDomains; i++) vt_cuptievt_enumEvents(cuDev, domainId[i]); + + vt_cntl_msg(1, "------------------------------------------------------"); + + free(domainId); + + capList = capList->next; } - - size = sizeof(CUpti_EventDomainID) * maxDomains; - domainId = (CUpti_EventDomainID*)malloc(size); - if(domainId == NULL){ - vt_cntl_msg(1, "[CUPTI] Failed to allocate memory to domain ID"); - return; - } - memset(domainId, 0, size); - - cuptiErr = cuptiDeviceEnumEventDomains(cuDev, &size, domainId); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiDeviceEnumEventDomains"); - - /* enum domains */ - for(i = 0; i < maxDomains; i++) enumEvents(cuDev, domainId[i]); - - free(domainId); + + /* as this function is in the call-path of the initialize functions + * -> vt_cupti_setupMetrics + * -> vt_cupti_fillMetricList + * -> vt_cupti_showAllCounters + */ + vt_cuptievt_initialized = 1; + VT_CUPTIEVT_UNLOCK(); + exit(0); } -static void vt_cupti_start(vt_cupti_ctx_t *vtcuptiCtx) +static void vt_cuptievt_start(vt_cuptievt_ctx_t *vtcuptiCtx) { CUptiResult cuptiErr = CUPTI_SUCCESS; - vt_cupti_grp_t *vtcuptiGrp = NULL; - vt_cupti_grp_t *lastGrp = NULL; + vt_cuptievt_grp_t *vtcuptiGrp = NULL; + vt_cuptievt_grp_t *lastGrp = NULL; if(vtcuptiCtx == NULL) return; @@ -664,7 +824,7 @@ static void vt_cupti_start(vt_cupti_ctx_t *vtcuptiCtx) /* if the event group could not be enabled, remove it */ if(cuptiErr != CUPTI_SUCCESS){ size_t i; - vt_cupti_grp_t *freeGrp = vtcuptiGrp; + vt_cuptievt_grp_t *freeGrp = vtcuptiGrp; size_t valueSize = 32; char name[32]; @@ -672,11 +832,11 @@ static void vt_cupti_start(vt_cupti_ctx_t *vtcuptiCtx) /* give user information about the group, which cannot be enabled */ for(i = 0; i < freeGrp->evtNum; i++){ - cuptiEventGetAttribute(vtcuptiCtx->vtDevCap->cuDev, - *(freeGrp->cuptiEvtIDs)+i, - CUPTI_EVENT_ATTR_NAME, - &valueSize, (char*)name); - vt_warning("[CUPTI] Event '%s' (%d) cannot be enabled", + VTCUPTIEVENTGETATTRIBUTE(vtcuptiCtx->vtDevCap->cuDev, + *(freeGrp->cuptiEvtIDs)+i, + CUPTI_EVENT_ATTR_NAME, + &valueSize, (char*)name); + vt_warning("[CUPTI EVENTS] Event '%s' (%d) cannot be enabled", name, *(freeGrp->cuptiEvtIDs)+i); } @@ -703,10 +863,10 @@ static void vt_cupti_start(vt_cupti_ctx_t *vtcuptiCtx) * * @param vtcuptiCtx pointer to the VampirTrace CUPTI context */ -static void vt_cupti_stop(vt_cupti_ctx_t *vtcuptiCtx) +static void vt_cuptievt_stop(vt_cuptievt_ctx_t *vtcuptiCtx) { - vt_cupti_grp_t *vtcuptiGrp = NULL; - /*vt_cntl_msg(1, "[CUPTI] vt_cupti_stop() ... ");*/ + vt_cuptievt_grp_t *vtcuptiGrp = NULL; + /*vt_cntl_msg(1, "[CUPTI EVENTS] vt_cupti_stop() ... ");*/ if(vtcuptiCtx == NULL || vt_gpu_debug) return; @@ -715,9 +875,9 @@ static void vt_cupti_stop(vt_cupti_ctx_t *vtcuptiCtx) while(vtcuptiGrp != NULL){ if(vtcuptiGrp->enabled){ CUptiResult cuptiErr = CUPTI_SUCCESS; - + cuptiErr = cuptiEventGroupDisable(vtcuptiGrp->evtGrp); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupDisable"); + VT_CUPTI_CALL(cuptiErr, "cuptiEventGroupDisable"); vtcuptiGrp->enabled = 0; } @@ -731,7 +891,7 @@ static void vt_cupti_stop(vt_cupti_ctx_t *vtcuptiCtx) * * @param vtcuptiCtx pointer to the VampirTrace CUPTI context */ -static void vt_cupti_finish(vt_cupti_ctx_t *vtcuptiCtx) +static void vt_cuptievt_finish(vt_cuptievt_ctx_t *vtcuptiCtx) { CUptiResult cuptiErr = CUPTI_SUCCESS; @@ -741,18 +901,18 @@ static void vt_cupti_finish(vt_cupti_ctx_t *vtcuptiCtx) vt_cupti_resetCounter(vtcuptiCtx, 0, &time);*/ /* stop CUPTI counter capturing */ - vt_cupti_stop(vtcuptiCtx); + vt_cuptievt_stop(vtcuptiCtx); /* destroy all CUPTI event groups, which have been created */ { - vt_cupti_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList; + vt_cuptievt_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList; while(vtcuptiGrp != NULL){ cuptiErr = cuptiEventGroupRemoveAllEvents(vtcuptiGrp->evtGrp); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupRemoveAllEvents"); + VT_CUPTI_CALL(cuptiErr, "cuptiEventGroupRemoveAllEvents"); cuptiErr = cuptiEventGroupDestroy(vtcuptiGrp->evtGrp); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupDestroy"); + VT_CUPTI_CALL(cuptiErr, "cuptiEventGroupDestroy"); vtcuptiGrp = vtcuptiGrp->next; } @@ -760,78 +920,78 @@ static void vt_cupti_finish(vt_cupti_ctx_t *vtcuptiCtx) } /* - * Searches the requested host thread by its CUDA context and returns - * the corresponding VampirTrace CUPTI context structure. - * + * Remove the given CUDA context from the global VampirTrace CUPTI context list. + * * @param cuCtx pointer to the CUDA context - * - * @return VampirTrace CUPTI context structure + * + * @return the removed VampirTrace CUPTI context entry */ -static vt_cupti_ctx_t* vt_cupti_takeCtxFromList(CUcontext cuCtx) +static vt_cuptievt_ctx_t* vt_cupti_takeCtxFromList(CUcontext *cuCtx) { - vt_cupti_ctx_t *currCtx = NULL; - vt_cupti_ctx_t *lastCtx = NULL; + vt_cuptievt_ctx_t *currCtx = NULL; + vt_cuptievt_ctx_t *lastCtx = NULL; - CUPTI_LOCK(); - currCtx = vtcuptiCtxlist; - lastCtx = vtcuptiCtxlist; + VT_CUPTIEVT_LOCK(); + currCtx = vtcuptievtCtxList; + lastCtx = vtcuptievtCtxList; while(currCtx != NULL){ - if(currCtx->cuCtx == cuCtx){ + if(currCtx->cuCtx == *cuCtx){ /* if first element in list */ - if(currCtx == vtcuptiCtxlist){ - vtcuptiCtxlist = vtcuptiCtxlist->next; + if(currCtx == vtcuptievtCtxList){ + vtcuptievtCtxList = vtcuptievtCtxList->next; }else{ lastCtx->next = currCtx->next; } - CUPTI_UNLOCK(); + VT_CUPTIEVT_UNLOCK(); return currCtx; } lastCtx = currCtx; currCtx = currCtx->next; } - CUPTI_UNLOCK(); + VT_CUPTIEVT_UNLOCK(); - vt_cntl_msg(2, "[CUPTI] Context structure not found!"); + vt_cntl_msg(2, "[CUPTI EVENTS] Context structure not found!"); return NULL; } -/* ------------------ Implementation of public functions ------------------ */ +/* -------------START: Implementation of public functions ------------------ */ +/* ------------------------------------------------------------------------- */ /* - * Initialize Mutex, VampirTrace ids and registers the finalize function. + * Initialize Mutex, VampirTrace IDs and registers the finalize function. * This may be done implicitly by vt_cupti_count(). */ -void vt_cupti_init() +void vt_cupti_events_init() { - if(!vt_cupti_initialized){ + if(!vt_cuptievt_initialized){ #if (defined(VT_MT) || defined(VT_HYB)) VTThrd_createMutex(&VTThrdMutexCupti); #endif - CUPTI_LOCK(); - if(!vt_cupti_initialized){ - vt_cntl_msg(2, "[CUPTI] Initializing ... "); + VT_CUPTIEVT_LOCK(); + if(!vt_cuptievt_initialized){ + vt_cntl_msg(2, "[CUPTI EVENTS] Initializing ... "); /* create VampirTrace counter group ID only once */ #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif - rid_cupti_init = vt_def_region(VT_MASTER_THREAD, "vtcuptiHostThreadInit", + vt_cuptievt_rid_init = vt_def_region(VT_MASTER_THREAD, "vtcuptiHostThreadInit", VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "VT_CUPTI", VT_FUNCTION); - cgid_cupti = vt_def_counter_group(VT_MASTER_THREAD, "CUPTI"); + vt_cuptievt_cgid = vt_def_counter_group(VT_MASTER_THREAD, "CUPTI"); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif - vt_cupti_capList = vt_cupti_setupMetricList(); + vtcuptievtCapList = vt_cuptievt_setupMetricList(); - /* register the finalize function of the CUDA wrapper to be called before - * the program exits and CUDA has done its implicit clean-up */ - atexit(vt_cupti_finalize); + /* register the finalize function of VampirTrace CUPTI to be called before + * the program exits */ + atexit(vt_cupti_events_finalize); - vt_cupti_initialized = 1; - CUPTI_UNLOCK(); + vt_cuptievt_initialized = 1; + VT_CUPTIEVT_UNLOCK(); } } } @@ -839,35 +999,35 @@ void vt_cupti_init() /* * Finalizes the VampirTrace CUPTI implementation. */ -void vt_cupti_finalize() +void vt_cupti_events_finalize() { - if(!vt_cupti_finalized){ + if(!vt_cuptievt_finalized){ - CUPTI_LOCK(); - if(!vt_cupti_finalized){ + VT_CUPTIEVT_LOCK(); + if(!vt_cuptievt_finalized){ - vt_cntl_msg(2, "[CUPTI] Finalizing ..."); + vt_cntl_msg(2, "[CUPTI EVENTS] Finalizing ..."); /* free VampirTrace CUPTI context structures (should already be freed) */ - while(vtcuptiCtxlist != NULL){ - vt_cupti_ctx_t *tmp = vtcuptiCtxlist; + while(vtcuptievtCtxList != NULL){ + vt_cuptievt_ctx_t *tmp = vtcuptievtCtxList; - vt_cupti_finish(vtcuptiCtxlist); + vt_cuptievt_finish(vtcuptievtCtxList); - vtcuptiCtxlist = vtcuptiCtxlist->next; + vtcuptievtCtxList = vtcuptievtCtxList->next; free(tmp); tmp = NULL; } /* free capability metric list */ - while(vt_cupti_capList != NULL){ - vt_cupti_dev_t *tmp = vt_cupti_capList; - vt_cupti_capList = vt_cupti_capList->next; + while(vtcuptievtCapList != NULL){ + vt_cuptievt_dev_t *tmp = vtcuptievtCapList; + vtcuptievtCapList = vtcuptievtCapList->next; /* free VampirTrace CUPTI events */ while(tmp->vtcuptiEvtList != NULL){ - vt_cupti_evt_t *tmpEvt = tmp->vtcuptiEvtList; + vt_cuptievt_evt_t *tmpEvt = tmp->vtcuptiEvtList; tmp->vtcuptiEvtList = tmp->vtcuptiEvtList->next; free(tmpEvt); tmpEvt = NULL; @@ -877,8 +1037,8 @@ void vt_cupti_finalize() tmp = NULL; } - vt_cupti_finalized = 1; - CUPTI_UNLOCK(); + vt_cuptievt_finalized = 1; + VT_CUPTIEVT_UNLOCK(); #if (defined(VT_MT) || defined (VT_HYB)) VTTHRD_LOCK_ENV(); @@ -895,11 +1055,11 @@ void vt_cupti_finalize() * * @param ptid the VampirTrace thread id of the calling host thread */ -vt_cupti_ctx_t* vt_cupti_getCurrentContext(uint32_t ptid) +vt_cuptievt_ctx_t* vt_cuptievt_getCurrentContext(uint32_t ptid) { CUcontext cuCtx = NULL; - if(!vt_cupti_initialized) vt_cupti_init(); + if(!vt_cuptievt_initialized) vt_cupti_events_init(); VT_SUSPEND_CUDA_TRACING(ptid); @@ -912,12 +1072,12 @@ vt_cupti_ctx_t* vt_cupti_getCurrentContext(uint32_t ptid) VT_RESUME_CUDA_TRACING(ptid); - if(cuCtx == NULL) { - vt_cntl_msg(2, "[CUPTI] No context is bound to the calling CPU thread", cuCtx); + if(cuCtx == NULL){ + vt_cntl_msg(2, "[CUPTI EVENTS] No context is bound to the calling CPU thread!"); return NULL; } - return vt_cupti_getCtx(cuCtx, ptid); + return vt_cuptievt_getCtx(cuCtx, ptid); } /* @@ -928,11 +1088,11 @@ vt_cupti_ctx_t* vt_cupti_getCurrentContext(uint32_t ptid) * @param strmid the stream id for the counter values * @param time the VampirTrace timestamps */ -void vt_cupti_writeCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, - uint64_t *time) +void vt_cuptievt_writeCounter(vt_cuptievt_ctx_t *vtcuptiCtx, uint32_t strmid, + uint64_t *time) { CUptiResult cuptiErr = CUPTI_SUCCESS; - vt_cupti_grp_t *vtcuptiGrp = NULL; + vt_cuptievt_grp_t *vtcuptiGrp = NULL; size_t bufferSizeBytes; size_t arraySizeBytes; @@ -940,7 +1100,7 @@ void vt_cupti_writeCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, if(vtcuptiCtx == NULL){ VT_CHECK_THREAD; - vtcuptiCtx = vt_cupti_getCurrentContext(VT_MY_THREAD); + vtcuptiCtx = vt_cuptievt_getCurrentContext(VT_MY_THREAD); if(vtcuptiCtx == NULL) return; } @@ -958,10 +1118,10 @@ void vt_cupti_writeCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, &bufferSizeBytes, vtcuptiCtx->counterData, &arraySizeBytes, vtcuptiCtx->cuptiEvtIDs, &numCountersRead); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupReadAllEvents"); - + VT_CUPTI_CALL(cuptiErr, "cuptiEventGroupReadAllEvents"); + if(vtcuptiGrp->evtNum != numCountersRead){ - vt_error_msg("[CUPTI] %d counter reads, %d metrics specified in " + vt_error_msg("[CUPTI EVENTS] %d counter reads, %d metrics specified in " "VT_CUPTI_METRICS!", numCountersRead, vtcuptiGrp->evtNum); } @@ -997,15 +1157,15 @@ void vt_cupti_writeCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, * @param strmid the stream id for the counter values * @param time the VampirTrace timestamps */ -void vt_cupti_resetCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, - uint64_t *time) +void vt_cuptievt_resetCounter(vt_cuptievt_ctx_t *vtcuptiCtx, uint32_t strmid, + uint64_t *time) { size_t i; - vt_cupti_grp_t *vtcuptiGrp = NULL; + vt_cuptievt_grp_t *vtcuptiGrp = NULL; if(vtcuptiCtx == NULL){ VT_CHECK_THREAD; - vtcuptiCtx = vt_cupti_getCurrentContext(VT_MY_THREAD); + vtcuptiCtx = vt_cuptievt_getCurrentContext(VT_MY_THREAD); if(vtcuptiCtx == NULL) return; } @@ -1016,7 +1176,7 @@ void vt_cupti_resetCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, } /* reset counter values of this group */ - CHECK_CUPTI_ERROR(cuptiEventGroupResetAllEvents(vtcuptiGrp->evtGrp), + VT_CUPTI_CALL(cuptiEventGroupResetAllEvents(vtcuptiGrp->evtGrp), "cuptiEventGroupResetAllEvents"); vtcuptiGrp = vtcuptiGrp->next; @@ -1026,16 +1186,17 @@ void vt_cupti_resetCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid, /* * Finalizes CUPTI device. * + * @param ptid VampirTrace process/thread id * @param cleanExit 1 to cleanup CUPTI event group, otherwise 0 */ -void vt_cupti_finalize_device(uint32_t ptid, uint8_t cleanExit){ +void vt_cuptievt_finalize_device(uint32_t ptid, uint8_t cleanExit){ CUptiResult cuptiErr = CUPTI_SUCCESS; - vt_cupti_ctx_t *vtcuptiCtx = NULL; + vt_cuptievt_ctx_t *vtcuptiCtx = NULL; - vt_cntl_msg(2, "[CUPTI] Finalize device ... "); + vt_cntl_msg(2, "[CUPTI EVENTS] Finalize device ... "); { - CUcontext cuCtx = NULL; + CUcontext cuCtx; VT_SUSPEND_CUDA_TRACING(ptid); @@ -1048,7 +1209,7 @@ void vt_cupti_finalize_device(uint32_t ptid, uint8_t cleanExit){ VT_RESUME_CUDA_TRACING(ptid); - vtcuptiCtx = vt_cupti_takeCtxFromList(cuCtx); + vtcuptiCtx = vt_cupti_takeCtxFromList(&cuCtx); if(vtcuptiCtx == NULL) return; } @@ -1058,18 +1219,18 @@ void vt_cupti_finalize_device(uint32_t ptid, uint8_t cleanExit){ vt_cupti_resetCounter(vtcuptiCtx, 0, &time);*/ /* stop CUPTI counter capturing */ - vt_cupti_stop(vtcuptiCtx); + vt_cuptievt_stop(vtcuptiCtx); /* destroy all CUPTI event groups, which have been created */ { - vt_cupti_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList; + vt_cuptievt_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList; while(vtcuptiGrp != NULL){ cuptiErr = cuptiEventGroupRemoveAllEvents(vtcuptiGrp->evtGrp); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupRemoveAllEvents"); + VT_CUPTI_CALL(cuptiErr, "cuptiEventGroupRemoveAllEvents"); cuptiErr = cuptiEventGroupDestroy(vtcuptiGrp->evtGrp); - CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupDestroy"); + VT_CUPTI_CALL(cuptiErr, "cuptiEventGroupDestroy"); vtcuptiGrp = vtcuptiGrp->next; } @@ -1077,5 +1238,8 @@ void vt_cupti_finalize_device(uint32_t ptid, uint8_t cleanExit){ } /* free VampirTrace CUPTI context */ - vt_cupti_freeCtx(vtcuptiCtx); + vt_cuptievt_freeCtx(vtcuptiCtx); } + +/* ------------------------------------------------------------------------- */ +/* -------------- END: Implementation of public functions ------------------ */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_cupti_events.h b/ompi/contrib/vt/vt/vtlib/vt_cupti_events.h index 70099fa313..5c1db6dbf6 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_cupti_events.h +++ b/ompi/contrib/vt/vt/vtlib/vt_cupti_events.h @@ -13,12 +13,113 @@ #ifndef VT_CUPTI_EVENTS_H #define VT_CUPTI_EVENTS_H -/* Disable all compiler warnings before including the actual - CUPTI events header file. */ -#ifdef __GNUC__ -# pragma GCC system_header -#endif /* __GNUC__ */ -#include "cupti_events.h" +#if (defined(VT_CUPTI)) + +#include "vt_cupti.h" +#include "vt_inttypes.h" /* VampirTrace integer types */ + +/* + * VampirTrace CUPTI event (single linked list element) + */ +typedef struct vtcuptievtevt_st +{ + CUpti_EventID cuptiEvtID; /**< CUPTI event ID */ + uint32_t vtCID; /**< VampirTrace counter ID */ + /*CUpti_EventDomainID cuptiDomainID; *< CUPTI domain ID */ + struct vtcuptievtevt_st *next; +}vt_cuptievt_evt_t; + +/* + * Structure that stores events to be trace for specific device capability + * (single linked list element) + */ +typedef struct vtcuptievtdev_st +{ + int dev_major; /**< Major CUDA device capability */ + int dev_minor; /**< Minor CUDA device capability */ + CUdevice cuDev; /**< CUDA device */ + vt_cuptievt_evt_t *vtcuptiEvtList; /**< list of events to be traced for this device*/ + size_t evtNum; /**< Number of tracable CUPTI events */ + struct vtcuptievtdev_st *next; +}vt_cuptievt_dev_t; + +/* + * VampirTrace CUPTI event group and its counters and properties. + */ +typedef struct vtcuptievtgrp_st +{ + CUpti_EventGroup evtGrp; /**< CUPTI event group, created for this context */ + CUpti_EventID *cuptiEvtIDs; /**< CUPTI event IDs to be traced */ + uint32_t *vtCIDs; /**< VampirTrace counter ids */ + size_t evtNum; /**< number of CUPTI events in this group */ + uint8_t enabled; /**< is the threads CUPTI capturing enabled */ + struct vtcuptievtgrp_st *next; +}vt_cuptievt_grp_t; + +/* + * The VampirTrace CUPTI context has the CUDA context as key and contains + * further information about its device and counters. + */ +typedef struct vtcuptievtctx_st +{ + CUcontext cuCtx; /**< CUDA context (primary key) */ + vt_cuptievt_dev_t *vtDevCap; /**< pointer to device capability (events, ...) */ + vt_cuptievt_grp_t *vtGrpList; /**< list of VT CUPTI event groups */ + uint64_t *counterData; /**< preallocated buffer for counter data */ + CUpti_EventID *cuptiEvtIDs; /**< preallocated buffer for CUPTI event IDs*/ + struct vtcuptievtctx_st *next; +}vt_cuptievt_ctx_t; + +/* + * Initialize Mutex, VampirTrace IDs and registers the finalize function. + * This may be done implicitly by vt_cuptievt_count(). + */ +void vt_cupti_events_init(void); + +/* + * Finalizes the VampirTrace CUPTI implementation. + */ +void vt_cupti_events_finalize(void); + +/* + * Finalizes CUPTI device. + * + * @param ptid the VampirTrace process/thread id + * @param cleanExit 1 to cleanup CUPTI event group, otherwise 0 + */ +void vt_cuptievt_finalize_device(uint32_t ptid, uint8_t cleanExit); + + +/* + * Returns the VampirTrace CUPTI context for the CUDA context associated with + * the calling host thread. + * + * @param ptid the VampirTrace thread id of the calling host thread + */ +vt_cuptievt_ctx_t* vt_cuptievt_getCurrentContext(uint32_t ptid); + +/* + * Request the CUTPI counter values and write it to the given VampirTrace + * stream with the given timestamps. + * + * @param vtcuptiCtx pointer to the VampirTrace CUPTI context + * @param strmid the stream id for the counter values + * @param time the VampirTrace timestamps + */ +void vt_cuptievt_writeCounter(vt_cuptievt_ctx_t *vtcuptiCtx, uint32_t strmid, + uint64_t *time); + +/* + * Reset the VampirTrace counter values (to zero) for active CUPTI counters. + * + * @param vtcuptiCtx pointer to the VampirTrace CUPTI context + * @param strmid the stream id for the counter values + * @param time the VampirTrace timestamps + */ +void vt_cuptievt_resetCounter(vt_cuptievt_ctx_t *vtcuptiCtx, uint32_t strmid, + uint64_t *time); + +#endif /* VT_CUPTI_EVENTS */ #endif /* VT_CUPTI_EVENTS_H */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_defs.h b/ompi/contrib/vt/vt/vtlib/vt_defs.h index cc44190bc6..6a78321c17 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_defs.h +++ b/ompi/contrib/vt/vt/vtlib/vt_defs.h @@ -49,11 +49,20 @@ typedef unsigned char* buffer_t; *----------------------------------------------------------------------------- */ +/* maximum number of threads */ +#define VT_MAX_THREADS \ + 1<<(VT_PROCESS_ID_BITNESS - VT_PROCESS_ID_SPLITTING) +/* maximum string lengths */ #define VT_MAX_COMMENT_LEN 4096 #define VT_MAX_MARKER_LEN 4096 #define VT_MAX_THREAD_NAME_LEN 100 -#define VT_MAX_THREADS \ - 1<<(VT_PROCESS_ID_BITNESS - VT_PROCESS_ID_SPLITTING) +/* maximum number of certain MPI handles defined per process + (initial maximums; raised as needed) */ +#define VT_MAX_MPI_COMMS_INIT 100 +#define VT_MAX_MPI_GROUPS_INIT 100 +#define VT_MAX_MPI_WINS_INIT 100 +/* maximum number of regions to be instrumented by Dyninst */ +#define VT_MAX_DYNINST_REGIONS 100000 /* *----------------------------------------------------------------------------- @@ -147,7 +156,6 @@ typedef unsigned char* buffer_t; #define VT_MPI_COLL_ONE2ALL 10 #define VT_MPI_COLL_ALL2ONE 11 #define VT_MPI_COLL_ALL2ALL 12 -#define VT_MPI_COLL_OTHER 13 #define VT_OMP_FUNCTION 14 #define VT_OMP_PARALLEL 15 @@ -181,17 +189,27 @@ typedef unsigned char* buffer_t; /* *----------------------------------------------------------------------------- - * MPI communicators + * MPI communicators/groups *----------------------------------------------------------------------------- */ #define VT_MPI_COMM_WORLD 0 #define VT_MPI_COMM_SELF 1 #define VT_MPI_COMM_OTHER 2 +#define VT_MPI_GROUP 3 /* *----------------------------------------------------------------------------- - * Counter flags + * Process group attributes + *----------------------------------------------------------------------------- + */ + +#define VT_PROCGRP_ISCOMMUNICATOR 1<<0 +#define VT_PROCGRP_HASCOUNTERS 1<<1 + +/* + *----------------------------------------------------------------------------- + * Counter properties *----------------------------------------------------------------------------- */ @@ -272,13 +290,12 @@ typedef unsigned char* buffer_t; #define VT_UNIFY_STRID_USRCOM_RECV_COMMENT "__USRCOM_R__" #define VT_UNIFY_STRID_ETIMESYNC_COMMENT "__ETIMESYNC__" +#define VT_UNIFY_STRID_ALL_PROCGRP "__ALL__" #define VT_UNIFY_STRID_NODE_PROCGRP "__NODE__" #define VT_UNIFY_STRID_MPI_COMM_WORLD_PROCGRP "__MPI_COMM_WORLD__" #define VT_UNIFY_STRID_MPI_COMM_SELF_PROCGRP "__MPI_COMM_SELF__" #define VT_UNIFY_STRID_MPI_COMM_OTHER_PROCGRP "__MPI_COMM_OTHER__" -#define VT_UNIFY_STRID_OMP_TEAM_PROCGRP "__OMP_TEAM__" -#define VT_UNIFY_STRID_GPU_COMM_PROCGRP "__GPU_COMM__" -#define VT_UNIFY_STRID_GPU_GROUP_PROCGRP "__GPU_GROUP__" +#define VT_UNIFY_STRID_MPI_GROUP_PROCGRP "__MPI_GROUP__" #define VT_UNIFY_STRID_USER_COMM_PROCGRP "__USER_COMM__" #define VT_UNIFY_STRID_ASYNC_SOURCE_KEY "__ASYNC_SOURCE__" diff --git a/ompi/contrib/vt/vt/vtlib/vt_dyninst.c b/ompi/contrib/vt/vt/vtlib/vt_dyninst.c index b27c93990f..280b559188 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_dyninst.c +++ b/ompi/contrib/vt/vt/vtlib/vt_dyninst.c @@ -31,62 +31,18 @@ static int dyn_init = 1; /* is initialization needed? */ -/* - *----------------------------------------------------------------------------- - * Simple hash table to map function addresses to region names/identifier - *----------------------------------------------------------------------------- - */ - -typedef struct HN { - size_t id; /* hash code (address of function) */ - uint32_t vtid; /* associated region identifier */ - struct HN* next; -} HashNode; - -#define HASH_MAX 1021 - -static HashNode* htab[HASH_MAX]; - -/* - * Stores region identifier `e' under hash code `h' - */ - -static HashNode* hash_put(size_t h, uint32_t e) { - size_t id = h % HASH_MAX; - HashNode *add = (HashNode*)malloc(sizeof(HashNode)); - add->id = h; - add->vtid = e; - add->next = htab[id]; - htab[id] = add; - return add; -} - -/* - * Lookup hash code `h' - * Returns hash table entry if already stored, otherwise NULL - */ - -static HashNode* hash_get(size_t h) { - size_t id = h % HASH_MAX; - HashNode *curr = htab[id]; - while ( curr ) { - if ( curr->id == h ) { - return curr; - } - curr = curr->next; - } - return NULL; -} +static uint32_t* rtab = NULL; /* region id lookup table */ /* * Register new region */ -static HashNode *register_region(size_t addr, char* func, char* file, int lno) { - uint32_t rid; +static void register_region(uint32_t* rid, char* func, char* file, int lno) +{ uint32_t fid; - /* -- register file if available -- */ + /* Register file if available + */ if( file[0] ) { fid = vt_def_scl_file(VT_CURRENT_THREAD, file); @@ -97,15 +53,13 @@ static HashNode *register_region(size_t addr, char* func, char* file, int lno) { lno = VT_NO_LNO; } - /* -- register region and store region identifier -- */ - rid = vt_def_region(VT_CURRENT_THREAD, func, fid, lno, VT_NO_LNO, NULL, - VT_FUNCTION); - - return hash_put(addr, rid); + /* Register region and store region identifier */ + *rid = vt_def_region(VT_CURRENT_THREAD, func, fid, lno, VT_NO_LNO, NULL, + VT_FUNCTION); } -void VT_Dyn_start(void* addr, char* name, char* fname, int lno); -void VT_Dyn_end(void* addr); +void VT_Dyn_start(uint32_t index, char* name, char* fname, int lno); +void VT_Dyn_end(uint32_t index); void VT_Dyn_attach(void); void VT_Dyn_finalize(void); @@ -113,45 +67,55 @@ void VT_Dyn_finalize(void); * This function is called at the entry of each function */ -void VT_Dyn_start(void* addr, char* name, char* fname, int lno) +void VT_Dyn_start(uint32_t index, char* name, char* fname, int lno) { - HashNode *hn; uint64_t time; + uint32_t* rid; - /* -- ignore events if VT is initializing -- */ + vt_assert(index < VT_MAX_DYNINST_REGIONS); + + /* Ignore events if VT is initializing */ if( !dyn_init && !vt_is_alive ) return; - /* -- if not yet initialized, initialize VampirTrace -- */ - if ( dyn_init ) { + /* If not yet initialized, initialize VampirTrace */ + if ( dyn_init ) + { VT_MEMHOOKS_OFF(); dyn_init = 0; + rtab = (uint32_t*)calloc(VT_MAX_DYNINST_REGIONS, sizeof(uint32_t)); + if ( rtab == NULL ) + vt_error(); vt_open(); vt_comp_finalize = VT_Dyn_finalize; VT_MEMHOOKS_ON(); } - /* -- if VampirTrace already finalized, return -- */ + /* If VampirTrace already finalized, return */ if ( !vt_is_alive ) return; VT_MEMHOOKS_OFF(); time = vt_pform_wtime(); - /* -- get region identifier -- */ - if ( (hn = hash_get((size_t)addr)) == 0 ) { - /* -- region entered the first time, register region -- */ + /* Get region identifier + */ + rid = &(rtab[index]); + if ( *rid == 0 ) + { + /* If region entered the first time, register region + */ #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); - if ( (hn = hash_get((size_t) addr)) == 0 ) - hn = register_region((size_t) addr, name, fname, lno); + if ( *rid == 0 ) + register_region(rid, name, fname, lno); VTTHRD_UNLOCK_IDS(); #else /* VT_MT || VT_HYB */ - hn = register_region((size_t) addr, name, fname, lno); + register_region(rid, name, fname, lno); #endif /* VT_MT || VT_HYB */ } - /* -- write enter record -- */ - vt_enter(VT_CURRENT_THREAD, &time, hn->vtid); + /* Write enter record */ + vt_enter(VT_CURRENT_THREAD, &time, *rid); VT_MEMHOOKS_ON(); } @@ -160,20 +124,24 @@ void VT_Dyn_start(void* addr, char* name, char* fname, int lno) * This function is called at the exit of each function */ -void VT_Dyn_end(void* addr) +void VT_Dyn_end(uint32_t index) { uint64_t time; - /* -- if VampirTrace already finalized, return -- */ + vt_assert(index < VT_MAX_DYNINST_REGIONS); + + /* If VampirTrace already finalized, return */ if ( !vt_is_alive ) return; + /* If region id isn't present, return */ + if ( rtab[index] == 0 ) return; + VT_MEMHOOKS_OFF(); time = vt_pform_wtime(); - /* -- write exit record -- */ - if ( hash_get((size_t) addr) ) - vt_exit(VT_CURRENT_THREAD, &time); + /* Write exit record */ + vt_exit(VT_CURRENT_THREAD, &time); VT_MEMHOOKS_ON(); } @@ -201,14 +169,14 @@ void sig_usr2_handler(int signum) } /* - * This function is called by the shared dyninst attach library (libvt-dynatt) + * This function is called by the shared Dyninst attach library (libvt-dynatt) */ void VT_Dyn_attach() { int mutatee_pid = getpid(); - vt_cntl_msg(1, "Attaching instrumentor to PID %i ...", mutatee_pid); + vt_cntl_msg(1, "[%i]: Attaching instrumentor", mutatee_pid); /* Install signal handler for continue execution (SIGUSR1) and abort execution (SIGUSR2) @@ -219,18 +187,18 @@ void VT_Dyn_attach() if( signal(SIGUSR2, sig_usr2_handler) == SIG_ERR ) vt_error_msg("Could not install handler for signal SIGUSR2"); - /* The dyninst attach library (libvt-dynatt) could be set by LD_PRELOAD. + /* The Dyninst attach library (libvt-dynatt) could be set by LD_PRELOAD. Unset this environment variable to avoid recursion. */ putenv((char*)"LD_PRELOAD="); putenv((char*)"DYLD_INSERT_LIBRARIES="); /* equivalent on MacOS */ - /* Attach dyninst instrumentor on running executable + /* Attach Dyninst instrumentor on running executable */ switch( fork() ) { case -1: { - vt_error_msg("Could not attach dyninst instrumentor"); + vt_error_msg("Could not attach Dyninst instrumentor"); break; } case 0: @@ -267,13 +235,14 @@ void VT_Dyn_attach() shlibs_arg[strlen(shlibs_arg)-1] = '\0'; } - snprintf(cmd, sizeof(cmd)-1, "%s/vtdyn %s %s %s %s %s %s %s -p %i %s", + snprintf(cmd, sizeof(cmd)-1, "%s/vtdyn %s %s %s %s %s %s %s %s -p %i %s", vt_installdirs_get(VT_INSTALLDIR_BINDIR), (vt_env_verbose() == 0) ? "-q" : "", (vt_env_verbose() >= 2) ? "-v" : "", filter ? "-f" : "", filter ? filter : "", shlibs_arg ? "-s" : "", shlibs_arg ? shlibs_arg : "", (vt_env_dyn_ignore_nodbg()) ? "--ignore-nodbg" : "", + (vt_env_dyn_detach()) ? "" : "--nodetach", mutatee_pid, mutatee_path ? mutatee_path : ""); @@ -281,7 +250,7 @@ void VT_Dyn_attach() free(shlibs_arg); /* Start mutator (instrumentor) */ - vt_cntl_msg(2, "Executing %s", cmd); + vt_cntl_msg(2, "[%i]: Executing %s", mutatee_pid, cmd); rc = system(cmd); /* Kill mutatee, if an error occurred during attaching @@ -297,7 +266,7 @@ void VT_Dyn_attach() { /* Wait until mutator send signal to continue execution */ - vt_cntl_msg(1, "[%i]: Wait until instrumentation is done ...", + vt_cntl_msg(1, "[%i]: Waiting until instrumentation is done", mutatee_pid); do { sleep(1); } while(mutatee_cont == 0); @@ -325,14 +294,12 @@ void VT_Dyn_attach() void VT_Dyn_finalize() { - int i; + if ( dyn_init ) return; + + /* Free region id table + */ + free( rtab ); + rtab = NULL; - for ( i = 0; i < HASH_MAX; i++ ) - { - if ( htab[i] ) { - free(htab[i]); - htab[i] = NULL; - } - } dyn_init = 1; } diff --git a/ompi/contrib/vt/vt/vtlib/vt_env.c b/ompi/contrib/vt/vt/vtlib/vt_env.c index 3fa3e587e5..eeaaf62cd0 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_env.c +++ b/ompi/contrib/vt/vt/vtlib/vt_env.c @@ -207,22 +207,44 @@ int vt_env_dyn_ignore_nodbg() char* tmp; if (dyn_ignore_nodbg == -1) - { - tmp = getenv("VT_DYN_IGNORE_NODBG"); - if (tmp != NULL && strlen(tmp) > 0) { - vt_cntl_msg(2, "VT_DYN_IGNORE_NODBG=%s", tmp); + tmp = getenv("VT_DYN_IGNORE_NODBG"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_DYN_IGNORE_NODBG=%s", tmp); - dyn_ignore_nodbg = parse_bool(tmp); + dyn_ignore_nodbg = parse_bool(tmp); + } + else + { + dyn_ignore_nodbg = 0; + } } - else - { - dyn_ignore_nodbg = 0; - } - } return dyn_ignore_nodbg; } +int vt_env_dyn_detach() +{ + static int dyn_detach = -1; + char* tmp; + + if (dyn_detach == -1) + { + tmp = getenv("VT_DYN_DETACH"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_DYN_DETACH=%s", tmp); + + dyn_detach = parse_bool(tmp); + } + else + { + dyn_detach = 1; + } + } + return dyn_detach; +} + char* vt_env_gnu_nm() { static int read = 1; @@ -1057,54 +1079,6 @@ int vt_env_mpicheck() return mpicheck; } -int vt_env_max_mpi_comms() -{ - static int max_mpi_comms = -1; - char* tmp; - - if (max_mpi_comms == -1) - { - tmp = getenv("VT_MAX_MPI_COMMS"); - if (tmp != NULL && strlen(tmp) > 0) - { - vt_cntl_msg(2, "VT_MAX_MPI_COMMS=%s", tmp); - - max_mpi_comms = atoi(tmp); - if (max_mpi_comms < 2) - vt_error_msg("VT_MAX_MPI_COMMS not properly set"); - } - else - { - max_mpi_comms = 100; - } - } - return max_mpi_comms; -} - -int vt_env_max_mpi_wins() -{ - static int max_mpi_wins = -1; - char* tmp; - - if (max_mpi_wins == -1) - { - tmp = getenv("VT_MAX_MPI_WINS"); - if (tmp != NULL && strlen(tmp) > 0) - { - vt_cntl_msg(2, "VT_MAX_MPI_WINS=%s", tmp); - - max_mpi_wins = atoi(tmp); - if (max_mpi_wins < 1) - vt_error_msg("VT_MAX_MPI_WINS not properly set"); - } - else - { - max_mpi_wins = 100; - } - } - return max_mpi_wins; -} - int vt_env_mpicheck_errexit() { static int mpicheck_errexit = -1; @@ -1267,6 +1241,27 @@ int vt_env_sync_flush() return sync_flush; } +int vt_env_sync_flush_skip(void) +{ + static int skip = -1; + if (skip == -1) + { + char* tmp = getenv("VT_SYNC_FLUSH_SKIP"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_SYNC_FLUSH_SKIP=%s", tmp); + + skip = atoi(tmp); + if (skip < 0) skip = 0; + } + else + { + skip = 0; + } + } + return skip; +} + int vt_env_sync_flush_level() { static int sync_flush_level = -1; @@ -1291,6 +1286,27 @@ int vt_env_sync_flush_level() return sync_flush_level; } +int vt_env_onoff_check_stack_balance() +{ + static int check_stack_balance = -1; + + if (check_stack_balance == -1) + { + char* tmp = getenv("VT_ONOFF_CHECK_STACK_BALANCE"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_ONOFF_CHECK_STACK_BALANCE=%s", tmp); + + check_stack_balance = parse_bool(tmp); + } + else + { + check_stack_balance = 1; + } + } + return check_stack_balance; +} + int vt_env_max_stack_depth() { static int max_stack_depth = -1; @@ -1389,6 +1405,38 @@ int vt_env_compression() #endif /* HAVE_ZLIB */ } +size_t vt_env_compression_bsize(void) +{ + static size_t bsize = 0; + if (bsize == 0) + { + char* tmp = getenv("VT_COMPRESSION_BUFFER_SIZE"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_COMPRESSION_BUFFER_SIZE=%s", tmp); + + bsize = parse_size(tmp); + } + } + return bsize; +} + +size_t vt_env_otf_bsize(void) +{ + static size_t bsize = 0; + if (bsize == 0) + { + char* tmp = getenv("VT_OTF_BUFFER_SIZE"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_OTF_BUFFER_SIZE=%s", tmp); + + bsize = parse_size(tmp); + } + } + return bsize; +} + int vt_env_java_native() { static int native = -1; @@ -1706,27 +1754,6 @@ int vt_env_cudatrace_gpumem() return cudamem; } -int vt_env_cudatrace_error() -{ - static int error = -1; - - if (error == -1) - { - char* tmp = getenv("VT_CUDATRACE_ERROR"); - if(tmp != NULL && strlen(tmp) > 0) - { - vt_cntl_msg(2, "VT_CUDATRACE_ERROR=%s", tmp); - - error = parse_bool(tmp); - } - else - { - error = 0; - } - } - return error; -} - char* vt_env_cupti_metrics() { static int read = 1; @@ -1768,6 +1795,27 @@ int vt_env_cupti_sampling() return cuptisampling; } +int vt_env_cupti_api_callback() +{ + static int cupti_cb = -1; + + if (cupti_cb == -1) + { + char* tmp = getenv("VT_CUPTI_API_CALLBACK"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_CUPTI_API_CALLBACK=%s", tmp); + + cupti_cb = parse_bool(tmp); + } + else + { + cupti_cb = 0; + } + } + return cupti_cb; +} + int vt_env_gputrace_debug() { static int debug = -1; @@ -1790,3 +1838,24 @@ int vt_env_gputrace_debug() } return debug; } + +int vt_env_gputrace_error() +{ + static int error = -1; + + if (error == -1) + { + char* tmp = getenv("VT_GPUTRACE_ERROR"); + if (tmp != NULL && strlen(tmp) > 0) + { + vt_cntl_msg(2, "VT_GPUTRACE_ERROR=%s", tmp); + + error = parse_bool(tmp); + } + else + { + error = 0; + } + } + return error; +} diff --git a/ompi/contrib/vt/vt/vtlib/vt_env.h b/ompi/contrib/vt/vt/vtlib/vt_env.h index c9017621b5..78e1d08ceb 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_env.h +++ b/ompi/contrib/vt/vt/vtlib/vt_env.h @@ -24,6 +24,7 @@ EXTERN char* vt_env_apppath(void); EXTERN char* vt_env_dyn_shlibs(void); EXTERN int vt_env_dyn_ignore_nodbg(void); +EXTERN int vt_env_dyn_detach(void); EXTERN char* vt_env_gnu_nm(void); EXTERN char* vt_env_gnu_nmfile(void); EXTERN char* vt_env_gdir(void); @@ -55,19 +56,21 @@ EXTERN int vt_env_omptrace(void); EXTERN int vt_env_mpitrace(void); EXTERN int vt_env_mpicheck(void); EXTERN int vt_env_mpicheck_errexit(void); -EXTERN int vt_env_max_mpi_comms(void); -EXTERN int vt_env_max_mpi_wins(void); EXTERN char* vt_env_rusage(void); EXTERN int vt_env_rusage_intv(void); EXTERN char* vt_env_metrics(void); EXTERN char* vt_env_metrics_sep(void); EXTERN char* vt_env_metrics_spec(void); EXTERN int vt_env_sync_flush(void); +EXTERN int vt_env_sync_flush_skip(void); EXTERN int vt_env_sync_flush_level(void); +EXTERN int vt_env_onoff_check_stack_balance(void); EXTERN int vt_env_max_stack_depth(void); EXTERN int vt_env_max_flushes(void); EXTERN int vt_env_max_threads(void); EXTERN int vt_env_compression(void); +EXTERN size_t vt_env_otf_bsize(void); +EXTERN size_t vt_env_compression_bsize(void); EXTERN int vt_env_java_native(void); EXTERN int vt_env_java_synthetic(void); EXTERN int vt_env_java_group_classes(void); @@ -83,10 +86,11 @@ EXTERN int vt_env_cudatrace_kernel(void); EXTERN int vt_env_cudatrace_memcpyasync(void); EXTERN int vt_env_cudatrace_sync(void); EXTERN int vt_env_cudatrace_gpumem(void); -EXTERN int vt_env_cudatrace_error(void); EXTERN char* vt_env_cupti_metrics(void); EXTERN int vt_env_cupti_sampling(void); +EXTERN int vt_env_cupti_api_callback(void); EXTERN int vt_env_gputrace_debug(void); +EXTERN int vt_env_gputrace_error(void); #endif /* _VT_ENV_H */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_getcpu.c b/ompi/contrib/vt/vt/vtlib/vt_getcpu.c index a2ed5360ae..d3a2a8b7c6 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_getcpu.c +++ b/ompi/contrib/vt/vt/vtlib/vt_getcpu.c @@ -29,9 +29,9 @@ void vt_getcpu_init() vt_assert(vt_misc_cgid != 0); /* write counter definition */ - vt_getcpu_cid = vt_def_counter(VT_CURRENT_THREAD, "CPU_ID", + vt_getcpu_cid = vt_def_counter(VT_CURRENT_THREAD, "CPU_ID", "#", VT_CNTR_ABS | VT_CNTR_NEXT, - vt_misc_cgid, "#"); + vt_misc_cgid, 0); } void vt_getcpu_finalize() diff --git a/ompi/contrib/vt/vt/vtlib/vt_gpu.c b/ompi/contrib/vt/vt/vtlib/vt_gpu.c index 438ec0bde1..b0132cd8b7 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_gpu.c +++ b/ompi/contrib/vt/vt/vtlib/vt_gpu.c @@ -20,6 +20,8 @@ uint8_t *vt_gpu_prop; /* gpu debugging flag is '0' by default */ uint8_t vt_gpu_debug = 0; +uint8_t vt_gpu_error = 0; + static uint8_t finalized = 0; static void vt_gpu_createGroups(void); @@ -41,7 +43,8 @@ void vt_gpu_init(void) vt_gpu_commCID = vt_get_curid(); vt_gpu_groupCID = vt_get_curid(); - vt_gpu_debug = vt_env_gputrace_debug(); + vt_gpu_debug = (uint8_t)vt_env_gputrace_debug(); + vt_gpu_error = (uint8_t)vt_env_gputrace_error(); initflag = 1; } @@ -88,30 +91,41 @@ static void vt_gpu_createGroups() if(ctrGPUComm > 0){ uint32_t *gpu_comm_array = (uint32_t*)malloc(ctrGPUComm*sizeof(uint32_t)); int j = 0; + for(i = 0; i < VTThrdn; i++){ if((vt_gpu_prop[i] & VTGPU_GPU_COMM) == VTGPU_GPU_COMM){ - gpu_comm_array[j++] = i; + gpu_comm_array[j++] = VT_PROCESS_ID(vt_my_trace, i); } } - vt_def_gpu_comm(ctrGPUComm, gpu_comm_array, - VT_UNIFY_STRID_GPU_COMM_PROCGRP, vt_gpu_commCID); + + vt_def_procgrp(VT_CURRENT_THREAD, "GPU_COMM_GLOBAL", + VT_PROCGRP_ISCOMMUNICATOR, ctrGPUComm, gpu_comm_array, + vt_gpu_commCID); + + free(gpu_comm_array); } /* create array of GPU threads and define group */ if(ctrGPUGroup > 0){ uint32_t *gpu_group_array = (uint32_t*)malloc(ctrGPUGroup*sizeof(uint32_t)); int j = 0; + for(i = 0; i < VTThrdn; i++){ if((vt_gpu_prop[i] & VTGPU_GPU) == VTGPU_GPU){ - gpu_group_array[j++] = i; + gpu_group_array[j++] = VT_PROCESS_ID(vt_my_trace, i); } } - vt_def_gpu_comm(ctrGPUGroup, gpu_group_array, - VT_UNIFY_STRID_GPU_GROUP_PROCGRP, vt_gpu_groupCID); + + vt_def_procgrp(VT_CURRENT_THREAD, "GPU_GROUP", 0, ctrGPUGroup, + gpu_group_array, vt_gpu_groupCID); + + free(gpu_group_array); } } -/* Uses VampirTrace Thread API to create a GPU thread +/* + * Uses VampirTrace Thread API to create a GPU thread. + * * @param tname the name of the thread to be registered * @param the parent thread id * @param vt_tid pointer to the thread id of the thread to be registered @@ -125,8 +139,35 @@ void vt_gpu_registerThread(const char* tname, uint32_t ptid, uint32_t *vt_tid) } /* create new thread object */ - *vt_tid = VTThrd_createNewThreadId(); - VTThrd_create(*vt_tid, ptid, tname, 1); + *vt_tid = VTThrd_create(tname, ptid, 1); + /* open thread associated trace file */ VTThrd_open(*vt_tid); + vt_cntl_msg(2, "[GPU] Created thread '%s' with id: %d", tname, *vt_tid); } + +/****************** common for CUDA driver API and CUPTI **********************/ +#if (defined(VT_CUDAWRAP) || defined(VT_CUPTI)) + +/* + * Handles errors returned from CUDA driver API calls. + * + * @param ecode the CUDA driver API error code + * @param msg a message to get more detailed information about the error + * @param the corresponding file + * @param the line the error occurred + */ +void vt_gpu_handleCuError(CUresult ecode, const char* msg, + const char *file, const int line) +{ + if(msg != NULL) vt_cntl_msg(1, "[CUDA] %s", msg); + VT_CHECK_THREAD; + if(vt_gpu_error){ + vt_error_msg("[CUDA Error %d in <%s>:%i] (ptid %d)", ecode, file, line, VT_MY_THREAD); + }else{ + vt_warning("[CUDA Error %d in <%s>:%i] (ptid %d)", ecode, file, line, VT_MY_THREAD); + } +} + +#endif +/******************************************************************************/ diff --git a/ompi/contrib/vt/vt/vtlib/vt_gpu.h b/ompi/contrib/vt/vt/vtlib/vt_gpu.h index e73720d4da..a18f2ab863 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_gpu.h +++ b/ompi/contrib/vt/vt/vtlib/vt_gpu.h @@ -19,8 +19,6 @@ # define EXTERN extern #endif -/*#if (defined(VT_CUDARTWRAP) || defined(VT_CUDAWRAP) || defined(VT_OPENCLWRAP))*/ - #include "vt_inttypes.h" /* VampirTrace integer types */ #include "vt_thrd.h" /* thread creation for GPU kernels */ #include "vt_trc.h" /* VampirTrace events */ @@ -33,7 +31,7 @@ #define VTGPU_DEFAULT_BSIZE 8192 #define VTGPU_MAX_BSIZE 2097152 /* 8192^8 bytes */ -/* defines for GPU GROUP and GPU COMM */ +/* defines for GPU GROUP and GPU COMM (8 bit only!!!) */ #define VTGPU_NO_GPU 0x00 /* thread is no gpu and does no gpu communication */ #define VTGPU_GPU 0x01 /* thread is a GPU thread */ #define VTGPU_GPU_COMM 0x02 /* thread does gpu communication (CPU or GPU) */ @@ -41,15 +39,43 @@ /* performance counter available? */ #define VTGPU_NO_PC 0x04 /* no performance counter for this thread available */ -/*** some specials for the CUDA driver API ***/ -#if (defined(VT_CUDAWRAP)) +/* device/host communication directions (8 bit only!!!) */ +#define VTGPU_DEV2HOST 0x00 /* device to host copy */ +#define VTGPU_HOST2DEV 0x01 /* host to device copy */ +#define VTGPU_DEV2DEV 0x02 /* device to device copy */ +#define VTGPU_HOST2HOST 0x04 /* host to host copy */ + +/****************** common for CUDA driver API and CUPTI **********************/ +#if (defined(VT_CUDAWRAP) || defined(VT_CUPTI)) + #include "vt_cuda_driver_api.h" -#define CHECK_CU_ERROR(err, cufunc) \ - if(err != CUDA_SUCCESS){ \ - vt_error_msg("Error %d for CUDA Driver API function '%s'.", err, cufunc); \ +# define CHECK_CU_ERROR(_err, _msg) \ + if(_err != CUDA_SUCCESS){ \ + vt_gpu_handleCuError(_err, _msg, __FILE__,__LINE__); \ } +/* + * Handles errors returned from CUDA driver API calls. + * + * @param ecode the CUDA driver API error code + * @param msg a message to get more detailed information about the error + * @param the corresponding file + * @param the line the error occurred + */ +EXTERN void vt_gpu_handleCuError(CUresult ecode, const char* msg, + const char *file, const int line); + +#else + +# define CHECK_CU_ERROR(_err, _msg) + +#endif +/******************************************************************************/ + +/****************************** CUDA driver API *******************************/ +#if (defined(VT_CUDAWRAP)) + /* is CUDA driver API tracing suspended? */ # define VTGPU_CUDA_SUSPENDED 0x08 @@ -59,13 +85,12 @@ ((vt_gpu_prop[_tid] & VTGPU_CUDA_SUSPENDED) == VTGPU_CUDA_SUSPENDED) #else -#define CHECK_CU_ERROR(err, cufunc) - # define VT_SUSPEND_CUDA_TRACING(tid) # define VT_RESUME_CUDA_TRACING(tid) # define VT_CUDA_IS_SUSPENDED(tid) #endif +/******************************************************************************/ /* * gobal communicator id for all GPU threads @@ -88,8 +113,13 @@ EXTERN uint8_t *vt_gpu_prop; */ EXTERN uint8_t vt_gpu_debug; +/* + * flag: abort program on GPU error, if enabled + */ +EXTERN uint8_t vt_gpu_error; + /* - * Initializion for all GPU API wrappers. + * Initialization for all GPU API wrappers. * VampirTrace IDS have to be locked, before calling this function. */ EXTERN void vt_gpu_init(void); @@ -104,12 +134,10 @@ EXTERN void vt_gpu_finalize(void); * Uses VampirTrace Thread API to create a GPU thread * * @param tname the name of the thread to be registered - * @param the parent thread id + * @param ptid the parent thread id * @param vt_tid pointer to the thread id of the thread to be registered */ EXTERN void vt_gpu_registerThread(const char* tname, uint32_t ptid, - uint32_t *vt_tid); - -/*#endif VT_CUDARTWRAP || VT_CUDAWRAP || VT_OPENCLWRAP */ + uint32_t *vt_tid); #endif /* _VT_GPU_H_ */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_memhook.c b/ompi/contrib/vt/vt/vtlib/vt_memhook.c index 28b516228c..64444b042e 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_memhook.c +++ b/ompi/contrib/vt/vt/vtlib/vt_memhook.c @@ -97,9 +97,9 @@ void vt_memhook_init() /* define counter */ memalloc_cid = - vt_def_counter(VT_CURRENT_THREAD, "MEM_ALLOC", + vt_def_counter(VT_CURRENT_THREAD, "MEM_ALLOC", "Bytes", VT_CNTR_ABS | VT_CNTR_NEXT, - gid, "Bytes"); + gid, 0); vt_memhook_is_initialized = 1; } diff --git a/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c b/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c index d50dd266e1..f7bccf456b 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c +++ b/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c @@ -302,25 +302,33 @@ static void metric_descriptions(void) if (metricv[i]->descr[j] == '\n') metricv[i]->descr[j]='\0'; j=strlen(metricv[i]->descr)-1; if (metricv[i]->descr[j] != '.') - strncat(metricv[i]->descr, ".", sizeof(metricv[i]->descr)); + strncat(metricv[i]->descr, ".", + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); } if (metricv[i]->papi_code & PAPI_PRESET_MASK) { /* PAPI preset */ char *postfix_chp = info.postfix; char derive_ch = strcmp(info.derived,"DERIVED_SUB")?'+':'-'; - strncat(metricv[i]->descr, " [ ", sizeof(metricv[i]->descr)); - strncat(metricv[i]->descr, info.name[0], sizeof(metricv[i]->descr)); + strncat(metricv[i]->descr, " [ ", + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + strncat(metricv[i]->descr, info.name[0], + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); for (k=1; k<(int)info.count; k++) { char op[4]; postfix_chp = postfix_chp?strpbrk(++postfix_chp, "+-*/"):NULL; sprintf(op, " %c ", (postfix_chp?*postfix_chp:derive_ch)); - strncat(metricv[i]->descr, op, sizeof(metricv[i]->descr)); - strncat(metricv[i]->descr, info.name[k], sizeof(metricv[i]->descr)); + strncat(metricv[i]->descr, op, + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + strncat(metricv[i]->descr, info.name[k], + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); } - strncat(metricv[i]->descr, " ]", sizeof(metricv[i]->descr)); + strncat(metricv[i]->descr, " ]", + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); if (strcmp(info.symbol, metricv[i]->name) != 0) { /* add preset name */ - strncat(metricv[i]->descr, " = ", sizeof(metricv[i]->descr)); - strncat(metricv[i]->descr, info.symbol, sizeof(metricv[i]->descr)); + strncat(metricv[i]->descr, " = ", + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + strncat(metricv[i]->descr, info.symbol, + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); } } diff --git a/ompi/contrib/vt/vt/vtlib/vt_mpicom.c b/ompi/contrib/vt/vt/vtlib/vt_mpicom.c index dff12e6735..23e0a2d182 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_mpicom.c +++ b/ompi/contrib/vt/vt/vtlib/vt_mpicom.c @@ -20,6 +20,36 @@ #include "vt_error.h" #include +#include + +/* + *----------------------------------------------------------------------------- + * Macros + *----------------------------------------------------------------------------- + */ + +/* Raise the maximum number of MPI handles (groups, comms., or windows) + defined per process */ +#define RAISE_MAX(handles, max, type) \ +{ \ + uint32_t newmax = \ + (max) + (uint32_t)((double)(max) * 1.5/*raise factor*/ + 0.5/*round*/); \ + /* reallocate handle-array */ \ + handles = (type*)realloc(handles, newmax * sizeof(type)); \ + if ( !handles ) \ + vt_error(); \ + /* initialize new memory */ \ + memset(handles + max, 0, (newmax - max) * sizeof(type)); \ + max = newmax; \ +} + +/* + *----------------------------------------------------------------------------- + * Data structures + *----------------------------------------------------------------------------- + */ + +/* Structure to hold the MPI_COMM_WORLD definition */ struct VTWorld { @@ -29,18 +59,27 @@ struct VTWorld VT_MPI_INT* ranks; }; -/* -- communicator handling -- */ +/* Structure for group tracking */ + +struct VTGroup +{ + MPI_Group group; + uint32_t gid; + uint32_t refcnt; +}; + +/* Structure for communicator tracking */ struct VTComm { MPI_Comm comm; - MPI_Group group; uint32_t cid; }; -/* -- window handling -- */ - #if defined(HAVE_MPI2_1SIDED) && HAVE_MPI2_1SIDED + +/* Structure for window tracking */ + struct VTWin { MPI_Win win; @@ -48,36 +87,80 @@ struct VTWin uint32_t gid; uint32_t wid; }; + #endif /* HAVE_MPI2_1SIDED */ -struct VTWorld world; +/* + *----------------------------------------------------------------------------- + * Static variables + *----------------------------------------------------------------------------- + */ +/* MPI_COMM_WORLD definition */ +static struct VTWorld world; + +/* MPI_COMM_WORLD process group id */ static uint32_t world_cid = (uint32_t)-1; -static uint32_t self_cid = (uint32_t)-1; -static uint32_t last_comm = 0; -static uint32_t max_comms = (uint32_t)-1; -static VT_MPI_INT* ranks; -static struct VTComm* comms; -static uint8_t* grpv; -static uint8_t comm_initialized = 0; +/* MPI_COMM_SELF process group id */ +static uint32_t self_cid = (uint32_t)-1; + +/* index to group array */ +static uint32_t last_group = 0; + +/* index to comm array */ +static uint32_t last_comm = 0; + +/* maximum number of groups defined per process */ +static uint32_t max_groups = VT_MAX_MPI_GROUPS_INIT; + +/* maximum number of communicators defined per process */ +static uint32_t max_comms = VT_MAX_MPI_COMMS_INIT; + +/* group tracking data structure */ +static struct VTGroup* groups; + +/* communicator tracking data structure */ +static struct VTComm* comms; #if defined(HAVE_MPI2_1SIDED) && HAVE_MPI2_1SIDED -static uint32_t free_win = (uint32_t)-1; -static uint32_t last_win = 0; -static uint32_t max_wins = (uint32_t)-1; -static struct VTWin* wins; + + static uint32_t free_win = (uint32_t)-1; + + /* index to win array */ + static uint32_t last_win = 0; + + /* maximum number of windows defined per process */ + static uint32_t max_wins = VT_MAX_MPI_WINS_INIT; + + /* window tracking data structure */ + static struct VTWin* wins; + #endif /* HAVE_MPI2_1SIDED */ +/* array of ranks used for rank translation */ +static VT_MPI_INT* ranks; + +/* bitvector used for bitvector creation of new communicators/groups */ +static uint8_t* grpv; + +/* flag to indicate communicator initialization */ +static uint8_t comm_initialized = 0; + +/* + *----------------------------------------------------------------------------- + * Static functions + *----------------------------------------------------------------------------- + */ static uint32_t group_search(MPI_Group group) { uint32_t i = 0; - while ((i < last_comm) && (comms[i].group != group)) + while ((i < last_group) && (groups[i].group != group)) i++; - if (i != last_comm) + if (i != last_group) return i; else return (uint32_t)-1; @@ -104,33 +187,31 @@ static uint32_t win_search(MPI_Win win) } #endif /* HAVE_MPI2_1SIDED */ -/* +static void group_to_bitvector(MPI_Group group) +{ + int i; + int size; + + /* determine the world rank of each process in group + (Parameter #3 is world.ranks here, as we need an array of integers + initialized with 0 to n-1, which world.ranks happens to be. */ + PMPI_Group_size(group, &size); + PMPI_Group_translate_ranks(group, size, world.ranks, world.group, ranks); + + /* initialize grpv */ + memset(grpv, 0, world.size_grpv); + + /* set corresponding bit for each process in group */ + for (i = 0; i < size; i++) + grpv[ranks[i] / 8] |= (1 << (ranks[i] % 8)); +} + +/* *----------------------------------------------------------------------------- - * - * Communicator management - * + * Global functions *----------------------------------------------------------------------------- */ -/* -- rank translation -- */ - -uint32_t vt_rank_to_pe(VT_MPI_INT rank, MPI_Comm comm) -{ - MPI_Group group; - VT_MPI_INT global_rank; - VT_MPI_INT inter; - - PMPI_Comm_test_inter(comm, &inter); - if ( inter ) - PMPI_Comm_remote_group(comm, &group); - else - PMPI_Comm_group(comm, &group); - - PMPI_Group_translate_ranks(group, 1, &rank, world.group, &global_rank); - PMPI_Group_free(&group); - return (uint32_t)global_rank; -} - void vt_comm_init() { VT_MPI_INT i; @@ -139,13 +220,15 @@ void vt_comm_init() { comm_initialized = 1; - max_comms = vt_env_max_mpi_comms(); + groups = (struct VTGroup*)calloc(max_groups, sizeof(struct VTGroup)); + if ( !groups ) + vt_error(); + comms = (struct VTComm*)calloc(max_comms, sizeof(struct VTComm)); if ( !comms ) vt_error(); #if defined(HAVE_MPI2_1SIDED) && HAVE_MPI2_1SIDED - max_wins = vt_env_max_mpi_wins(); wins = (struct VTWin*)calloc(max_wins, sizeof(struct VTWin)); if ( !wins ) vt_error(); @@ -174,6 +257,7 @@ void vt_comm_finalize() { PMPI_Group_free(&world.group); + free(groups); free(comms); #if defined(HAVE_MPI2_1SIDED) && HAVE_MPI2_1SIDED free(wins); @@ -184,89 +268,103 @@ void vt_comm_finalize() free(grpv); } -void vt_group_to_bitvector(MPI_Group group) +uint32_t vt_rank_to_pe(VT_MPI_INT rank, MPI_Comm comm) { - VT_MPI_INT i; + MPI_Group group; + VT_MPI_INT global_rank; + VT_MPI_INT inter; - /* translate ranks */ - PMPI_Group_translate_ranks(world.group, world.size, world.ranks, group, ranks); + PMPI_Comm_test_inter(comm, &inter); + if ( inter ) + PMPI_Comm_remote_group(comm, &group); + else + PMPI_Comm_group(comm, &group); - /* initialize grpv */ - for (i = 0; i < world.size_grpv; i++) - grpv[i] = 0; + PMPI_Group_translate_ranks(group, 1, &rank, world.group, &global_rank); + PMPI_Group_free(&group); - /* which process in MPI_COMM_WORLD is member of comm */ - for (i = 0; i < world.size; i++) - if (ranks[i] != MPI_UNDEFINED) - grpv[i / 8] |= (1 << (i % 8)); + return (uint32_t)global_rank; } +/* + *----------------------------------------------------------------------------- + * Communicator management + *----------------------------------------------------------------------------- + */ + void vt_comm_create(MPI_Comm comm) { - int i; - MPI_Group group; + MPI_Group group, lgroup, rgroup; + VT_MPI_INT inter; + VT_MPI_INT size_grpv = 0; + uint32_t cid; - if (last_comm >= max_comms) - vt_error_msg("Too many communicators (VT_MAX_MPI_COMMS=%d", max_comms); + /* raise maximum number of communicators, if necessary */ + if (last_comm == max_comms) + RAISE_MAX(comms, max_comms, struct VTComm); /* ask for group of comm */ - PMPI_Comm_group(comm, &group); - /* check if group already exists w/o communicator */ - if ((i = group_search( group ) != (uint32_t)-1) && - (comms[i].comm == MPI_COMM_NULL)) + PMPI_Comm_test_inter(comm, &inter); + + if (inter) { - /* just set communicator to comm */ - comms[i].comm = comm; + PMPI_Comm_remote_group(comm, &rgroup); + PMPI_Comm_group(comm, &lgroup); + + PMPI_Group_union(lgroup, rgroup, &group); + + PMPI_Group_free(&lgroup); + PMPI_Group_free(&rgroup); } else { - uint32_t cid; - VT_MPI_INT size_grpv = 0; - - /* create group entry in grpv except for - MPI_COMM_SELF and - MPI_COMM_WORLD (if the current rank isn't the first available one) */ - if ((comm != MPI_COMM_SELF && comm != MPI_COMM_WORLD) || - (comm == MPI_COMM_WORLD && vt_my_trace_is_first_avail)) - { - vt_group_to_bitvector( group ); - size_grpv = world.size_grpv; - } - - /* register mpi communicator definition */ -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) - VTTHRD_LOCK_IDS(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ - cid = vt_def_mpi_comm(VT_CURRENT_THREAD, - comm == MPI_COMM_WORLD ? VT_MPI_COMM_WORLD : - comm == MPI_COMM_SELF ? VT_MPI_COMM_SELF : VT_MPI_COMM_OTHER, - size_grpv, grpv); -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) - VTTHRD_UNLOCK_IDS(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ - - /* save communicator id for fast access in vt_comm_id */ - if (comm == MPI_COMM_WORLD) world_cid = cid; - else if (comm == MPI_COMM_SELF) self_cid = cid; - - /* enter comm in comms[] array */ - comms[last_comm].comm = comm; - comms[last_comm].group = group; - comms[last_comm].cid = cid; - last_comm++; + PMPI_Comm_group(comm, &group); } + /* create group entry in grpv except for + MPI_COMM_SELF and + MPI_COMM_WORLD (if the current rank isn't the first available one) */ + if ((comm != MPI_COMM_SELF && comm != MPI_COMM_WORLD) || + (comm == MPI_COMM_WORLD && vt_my_trace_is_first_avail)) + { + group_to_bitvector( group ); + size_grpv = world.size_grpv; + } + + /* register mpi communicator definition */ +#if (defined(VT_MT) || defined(VT_HYB)) + VTTHRD_LOCK_IDS(); +#endif /* VT_MT || VT_HYB */ + cid = vt_def_mpi_comm(VT_CURRENT_THREAD, + comm == MPI_COMM_WORLD ? VT_MPI_COMM_WORLD : + comm == MPI_COMM_SELF ? VT_MPI_COMM_SELF : VT_MPI_COMM_OTHER, + size_grpv, grpv); +#if (defined(VT_MT) || defined(VT_HYB)) + VTTHRD_UNLOCK_IDS(); +#endif /* VT_MT || VT_HYB */ + + /* save communicator id for fast access in vt_comm_id */ + if (comm == MPI_COMM_WORLD) world_cid = cid; + else if (comm == MPI_COMM_SELF) self_cid = cid; + + /* enter comm in comms[] array */ + comms[last_comm].comm = comm; + comms[last_comm].cid = cid; + last_comm++; + /* clean up */ PMPI_Group_free(&group); } void vt_comm_free(MPI_Comm comm) { + /* if only one communicator exists, we just need to decrease last_comm */ if (last_comm == 1 && comms[0].comm == comm) { last_comm = 0; } + /* if more than one communicator exists, we need to search for the entry */ else if (last_comm > 1) { uint32_t i = 0; @@ -275,9 +373,14 @@ void vt_comm_free(MPI_Comm comm) i++; if (i < last_comm--) - comms[i] = comms[last_comm]; + { + /* swap deletion candidate with last entry in the list */ + comms[i] = comms[last_comm]; + } else - vt_error_msg("vt_comm_free1: Cannot find communicator"); + { + vt_error_msg("vt_comm_free1: Cannot find communicator"); + } } else { @@ -296,94 +399,115 @@ uint32_t vt_comm_id(MPI_Comm comm) i++; if (i != last_comm) - return comms[i].cid; + { + return comms[i].cid; + } else { - vt_error_msg("vt_comm_id: Cannot find communicator"); + vt_error_msg("Cannot find communicator"); return (uint32_t)-1; } } -/* +/* *----------------------------------------------------------------------------- - * * Group management - * *----------------------------------------------------------------------------- */ void vt_group_create(MPI_Group group) { - if (last_comm >= max_comms) - vt_error_msg("Too many communicators (VT_MAX_MPI_COMMS=%d", max_comms); + uint32_t i; - /* check if group already exists w/ communicator */ - if (group_search( group ) == (uint32_t)-1) + /* check if group already exists */ + if ((i = group_search( group )) == (uint32_t)-1) { - uint32_t cid; + uint32_t gid; + + /* raise maximum number of groups, if necessary */ + if (last_group == max_groups) + RAISE_MAX(groups, max_groups, struct VTGroup); /* create group entry in grpv */ - vt_group_to_bitvector( group ); + group_to_bitvector( group ); - /* register mpi communicator definition */ -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) + /* register mpi group definition (as communicator) */ +#if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ - cid = vt_def_mpi_comm(VT_CURRENT_THREAD, VT_MPI_COMM_OTHER, +#endif /* VT_MT || VT_HYB */ + gid = vt_def_mpi_comm(VT_CURRENT_THREAD, VT_MPI_GROUP, world.size_grpv, grpv); -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) +#if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ +#endif /* VT_MT || VT_HYB */ - /* enter comm in comms[] array */ - comms[last_comm].comm = MPI_COMM_NULL; - comms[last_comm].group = group; - comms[last_comm].cid = cid; - last_comm++; + /* enter group in groups[] array */ + groups[last_group].group = group; + groups[last_group].gid = gid; + groups[last_group].refcnt = 1; + last_group++; + } + else + { + /* count additional reference on group */ + groups[i].refcnt++; } } void vt_group_free(MPI_Group group) { - /* The follow code to "free" the comm/group entry is unusable. A thorough - check of the usage of active communicators is required to verify that - none still require it, then the entire comm/group entry should be deleted - so that it can be subsequently re-used. */ - -#if 0 - uint32_t pos = group_search(group); - if ( pos != (uint32_t)-1 ) + if (last_group == 1 && groups[0].group == group) { - comms[pos].group = MPI_GROUP_EMPTY; - comms[pos].cid = 0; + groups[0].refcnt--; + + if (groups[0].refcnt == 0) + last_group--; + } + else if (last_group > 1) + { + uint32_t i; + + if ((i = group_search(group)) != (uint32_t)-1) + { + /* decrease reference count on entry */ + groups[i].refcnt--; + + /* check if entry can be deleted */ + if (groups[i].refcnt == 0) + groups[i] = groups[--last_group]; + } + else + { + vt_error_msg("vt_group_free1: Cannot find group"); + } } else { - vt_error_msg("Cannot find group"); + vt_error_msg("vt_group_free2: Cannot find group"); } -#endif } uint32_t vt_group_id(MPI_Group group) { - uint32_t pos = group_search(group); - - if ( pos != (uint32_t)-1 ) + uint32_t i = 0; + + while ((i < last_group) && (groups[i].group != group)) + i++; + + if (i != last_group) { - return comms[pos].cid; + return groups[i].gid; } else { vt_error_msg("Cannot find group"); - return (uint32_t)-1; + return (uint32_t)-1; } } -/* +/* *----------------------------------------------------------------------------- - * * Window management - * *----------------------------------------------------------------------------- */ @@ -398,13 +522,13 @@ void vt_win_create( MPI_Win win, MPI_Comm comm ) { uint32_t wid; -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) +#if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ +#endif /* VT_MT || VT_HYB */ wid = vt_get_curid(); -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) +#if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ +#endif /* VT_MT || VT_HYB */ PMPI_Win_get_group(win, &group); @@ -417,30 +541,30 @@ void vt_win_create( MPI_Win win, MPI_Comm comm ) wins[free_win].gid = vt_group_id(group); wins[free_win].wid = wid; } - else if ( last_win < max_wins ) + else { + /* raise maximum number of windows, if necessary */ + if (last_win == max_wins) + RAISE_MAX(wins, max_wins, struct VTWin); + wins[last_win].win = win; wins[last_win].comm = comm; wins[last_win].gid = vt_group_id(group); wins[last_win].wid = wid; last_win++; } - else - { - vt_error_msg("Too many windows (VT_MAX_MPI_WINS=%d", max_wins); - } } } void vt_win_free( MPI_Win win ) { - uint32_t pos = win_search(win); - if ( pos != (uint32_t)-1 ) + uint32_t i = win_search(win); + if ( i != (uint32_t)-1 ) { - wins[pos].win = MPI_WIN_NULL; - wins[pos].comm = MPI_COMM_NULL; - wins[pos].gid = 0; - wins[pos].wid = 0; + wins[i].win = MPI_WIN_NULL; + wins[i].comm = MPI_COMM_NULL; + wins[i].gid = 0; + wins[i].wid = 0; } else { @@ -450,13 +574,13 @@ void vt_win_free( MPI_Win win ) void vt_win_id( MPI_Win win, MPI_Comm* comm, uint32_t* gid, uint32_t* wid ) { - uint32_t pos = win_search(win); + uint32_t i = win_search(win); - if ( pos != (uint32_t)-1 ) + if ( i != (uint32_t)-1 ) { - *comm = wins[pos].comm; - *gid = wins[pos].gid; - *wid = wins[pos].wid; + *comm = wins[i].comm; + *gid = wins[i].gid; + *wid = wins[i].wid; } else { @@ -466,11 +590,11 @@ void vt_win_id( MPI_Win win, MPI_Comm* comm, uint32_t* gid, uint32_t* wid ) void vt_win_set_gid( MPI_Win win, uint32_t gid ) { - uint32_t pos = win_search(win); + uint32_t i = win_search(win); - if ( pos != (uint32_t)-1 ) + if ( i != (uint32_t)-1 ) { - wins[pos].gid = gid; + wins[i].gid = gid; } else { diff --git a/ompi/contrib/vt/vt/vtlib/vt_mpicom.h b/ompi/contrib/vt/vt/vtlib/vt_mpicom.h index 584e62c554..f5a08f1f8c 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_mpicom.h +++ b/ompi/contrib/vt/vt/vtlib/vt_mpicom.h @@ -27,20 +27,16 @@ EXTERN void vt_comm_init(void); EXTERN void vt_comm_finalize(void); - -EXTERN void vt_group_to_bitvector(MPI_Group group); +EXTERN uint32_t vt_rank_to_pe(VT_MPI_INT rank, MPI_Comm comm); EXTERN void vt_group_create(MPI_Group group); EXTERN void vt_group_free(MPI_Group group); EXTERN uint32_t vt_group_id(MPI_Group group); -EXTERN uint32_t vt_group_search(MPI_Group group); EXTERN void vt_comm_create(MPI_Comm comm); EXTERN void vt_comm_free(MPI_Comm comm); EXTERN uint32_t vt_comm_id(MPI_Comm comm); -EXTERN uint32_t vt_rank_to_pe(VT_MPI_INT rank, MPI_Comm comm); - #if defined(HAVE_MPI2_1SIDED) && HAVE_MPI2_1SIDED EXTERN void vt_win_create(MPI_Win win, MPI_Comm comm); EXTERN void vt_win_free(MPI_Win win); diff --git a/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c b/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c index bb90c8b25e..67f24a39e4 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c +++ b/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c @@ -508,6 +508,78 @@ VT_MPI_INT MPI_Comm_create( MPI_Comm comm, return result; } +/* -- MPI_Comm_group -- */ + +VT_MPI_INT MPI_Comm_group( MPI_Comm comm, + MPI_Group* group ) +{ + VT_MPI_INT result; + uint64_t time; + uint8_t was_recorded; + + if (IS_MPI_TRACE_ON) + { + MPI_TRACE_OFF(); + + time = vt_pform_wtime(); + was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_COMM_GROUP]); + + CALL_PMPI_2(MPI_Comm_group, comm, group, + result, was_recorded, &time); + + if ( *group != MPI_GROUP_NULL) + vt_group_create(*group); + + time = vt_pform_wtime(); + vt_exit(VT_CURRENT_THREAD, &time); + + MPI_TRACE_ON(); + } + else + { + CALL_PMPI_2(MPI_Comm_group, comm, group, + result, 0, NULL); + } + + return result; +} + +/* -- MPI_Comm_remote_group -- */ + +VT_MPI_INT MPI_Comm_remote_group( MPI_Comm comm, + MPI_Group* group ) +{ + VT_MPI_INT result; + uint64_t time; + uint8_t was_recorded; + + if (IS_MPI_TRACE_ON) + { + MPI_TRACE_OFF(); + + time = vt_pform_wtime(); + was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_COMM_REMOTE_GROUP]); + + CALL_PMPI_2(MPI_Comm_remote_group, comm, group, + result, was_recorded, &time); + + if ( *group != MPI_GROUP_NULL) + vt_group_create(*group); + + time = vt_pform_wtime(); + vt_exit(VT_CURRENT_THREAD, &time); + + MPI_TRACE_ON(); + } + else + { + CALL_PMPI_2(MPI_Comm_remote_group, comm, group, + result, 0, NULL); + } + + return result; +} + /* -- MPI_Comm_split -- */ VT_MPI_INT MPI_Comm_split( MPI_Comm comm, @@ -900,7 +972,7 @@ VT_MPI_INT MPI_Win_create( void* base, /* -- MPI_Win_free -- */ -VT_MPI_INT MPI_Win_free ( MPI_Win* win ) +VT_MPI_INT MPI_Win_free( MPI_Win* win ) { VT_MPI_INT result; uint64_t time; @@ -931,6 +1003,45 @@ VT_MPI_INT MPI_Win_free ( MPI_Win* win ) return result; } +VT_MPI_INT MPI_Win_get_group( MPI_Win win, MPI_Group* group ) +{ + VT_MPI_INT result; + uint64_t time; + uint8_t was_recorded; + + if (IS_MPI_TRACE_ON) + { + MPI_TRACE_OFF(); + + time = vt_pform_wtime(); + was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_WIN_GET_GROUP]); + + /* UNIMCI_check___MPI_Win_get_group not yet available; + call PMPI function directly */ + result = PMPI_Win_get_group(win, group); + /*CALL_PMPI_2(MPI_Win_get_group, win, group, + result, was_recorded, &time);*/ + + if (*group != MPI_GROUP_NULL) + vt_group_create(*group); + + time = vt_pform_wtime(); + vt_exit(VT_CURRENT_THREAD, &time); + + MPI_TRACE_ON(); + } + else + { + /* UNIMCI_check___MPI_Win_get_group not yet available; + call PMPI function directly */ + result = PMPI_Win_get_group(win, group); + /*CALL_PMPI_2(MPI_Win_get_group, win, group, + result, was_recorded, &time);*/ + } + + return result; +} + #endif /* HAVE_MPI2_1SIDED */ /* @@ -965,7 +1076,7 @@ VT_MPI_INT MPI_Cart_create( MPI_Comm comm_old, comm_cart, result, was_recorded, &time); - if ( *comm_cart != MPI_COMM_NULL) + if (*comm_cart != MPI_COMM_NULL) vt_comm_create(*comm_cart); time = vt_pform_wtime(); @@ -1003,7 +1114,7 @@ VT_MPI_INT MPI_Cart_sub( MPI_Comm comm, CALL_PMPI_3(MPI_Cart_sub, comm, rem_dims, newcomm, result, was_recorded, &time); - if ( *newcomm != MPI_COMM_NULL) + if (*newcomm != MPI_COMM_NULL) vt_comm_create(*newcomm); time = vt_pform_wtime(); @@ -1044,7 +1155,7 @@ VT_MPI_INT MPI_Graph_create( MPI_Comm comm_old, comm_graph, result, was_recorded, &time); - if ( *comm_graph != MPI_COMM_NULL) + if (*comm_graph != MPI_COMM_NULL) vt_comm_create(*comm_graph); time = vt_pform_wtime(); @@ -1087,7 +1198,7 @@ VT_MPI_INT MPI_Intercomm_create( MPI_Comm local_comm, remote_leader, tag, newintercomm, result, was_recorded, &time); - if ( *newintercomm != MPI_COMM_NULL) + if (*newintercomm != MPI_COMM_NULL) vt_comm_create(*newintercomm); time = vt_pform_wtime(); @@ -1125,7 +1236,7 @@ VT_MPI_INT MPI_Intercomm_merge( MPI_Comm intercomm, CALL_PMPI_3(MPI_Intercomm_merge, intercomm, high, newcomm, result, was_recorded, &time); - if ( *newcomm != MPI_COMM_NULL) + if (*newcomm != MPI_COMM_NULL) vt_comm_create(*newcomm); time = vt_pform_wtime(); @@ -2645,10 +2756,7 @@ VT_MPI_INT MPI_Allreduce ( void* sendbuf, time = vt_pform_wtime(); - if (was_recorded) - { - vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm); - } + vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm, was_recorded); vt_exit(VT_CURRENT_THREAD, &time); @@ -2694,10 +2802,7 @@ VT_MPI_INT MPI_Barrier( MPI_Comm comm ) time = vt_pform_wtime(); - if (was_recorded) - { - vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm); - } + vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm, was_recorded); vt_exit(VT_CURRENT_THREAD, &time); @@ -2754,10 +2859,7 @@ VT_MPI_INT MPI_Bcast( void* buf, time = vt_pform_wtime(); - if (was_recorded) - { - vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm); - } + vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm, was_recorded); vt_exit(VT_CURRENT_THREAD, &time); @@ -2799,6 +2901,14 @@ VT_MPI_INT MPI_Gather( void* sendbuf, { matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTTHRD_MY_VTTHRD); +#if defined(HAVE_DECL_MPI_IN_PLACE) && HAVE_DECL_MPI_IN_PLACE + if (sendbuf == MPI_IN_PLACE) + { + sendtype = recvtype; + sendcount = recvcount; + } +#endif /* HAVE_DECL_MPI_IN_PLACE */ + PMPI_Type_size(sendtype, &ssz); PMPI_Comm_rank(comm, &me); if ( me == root ) { @@ -2820,10 +2930,7 @@ VT_MPI_INT MPI_Gather( void* sendbuf, time = vt_pform_wtime(); - if (was_recorded) - { - vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm); - } + vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm, was_recorded); vt_exit(VT_CURRENT_THREAD, &time); @@ -2884,10 +2991,7 @@ VT_MPI_INT MPI_Reduce( void* sendbuf, time = vt_pform_wtime(); - if (was_recorded) - { - vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm); - } + vt_mpi_collend(VT_CURRENT_THREAD, &time, matchid, &comm, was_recorded); vt_exit(VT_CURRENT_THREAD, &time); @@ -2931,16 +3035,32 @@ VT_MPI_INT MPI_Gatherv( void* sendbuf, { matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTTHRD_MY_VTTHRD); + PMPI_Comm_size(comm, &N); + + recvcount = 0; + for(i = 0; ibuf->pos += (bytes) @@ -99,6 +99,7 @@ struct VTGen_struct uint32_t tid; uint32_t flushcntr; uint8_t isfirstflush; + uint8_t hasdata; uint8_t mode; uint8_t sum_props; VTRewindMark rewindmark; @@ -151,6 +152,9 @@ VTGen* VTGen_open(const char* tname, const char* tnamesuffix, /* initialize first flush flag */ gen->isfirstflush = 1; + /* initialize has data flag */ + gen->hasdata = 0; + /* initialize trace mode flags */ gen->mode = (uint8_t)vt_env_mode(); @@ -205,10 +209,10 @@ void VTGen_flush(VTGen* gen, uint8_t lastFlush, buffer_t p; /* intermediate flush and max. buffer flushes reached? */ - if(!lastFlush && gen->flushcntr == 0) return; + if (!lastFlush && gen->flushcntr == 0) return; /* reset buffer, if rank is disabled */ - if(vt_my_trace_is_disabled) + if (vt_my_trace_is_disabled) { gen->buf->pos = gen->buf->mem; return; @@ -218,19 +222,19 @@ void VTGen_flush(VTGen* gen, uint8_t lastFlush, VT_SUSPEND_IO_TRACING(gen->tid); /* mark begin of flush */ - if(!lastFlush) + if (!lastFlush) vt_enter_flush(gen->tid, &flushBTime); /* get process id */ pid = VT_PROCESS_ID(vt_my_trace, gen->tid); - if(gen->isfirstflush) + if (gen->isfirstflush) { /* set base name of the temporary files (basename includes local path but neither thread identifier nor suffix) */ gen->fileprefix = (char*)calloc(VT_PATH_MAX + 1, sizeof(char)); - if(gen->fileprefix == NULL) + if (gen->fileprefix == NULL) vt_error(); snprintf(gen->fileprefix, VT_PATH_MAX, "%s/%s.%lx.%u", @@ -238,49 +242,83 @@ void VTGen_flush(VTGen* gen, uint8_t lastFlush, vt_pform_node_id(), getpid()); /* open file manager for writer stream */ + gen->filemanager = OTF_FileManager_open(4); + if (gen->filemanager == NULL) + vt_error_msg("OTF_FileManager_open failed:\n %s", otf_strerr); /* open writer stream */ - gen->filestream = OTF_WStream_open(gen->fileprefix, gen->tid+1, - gen->filemanager); - if( gen->filestream == NULL ) - vt_error_msg("Cannot open OTF writer stream [namestub %s id %x]", - gen->fileprefix, gen->tid+1); - else - vt_cntl_msg(2, "Opened OTF writer stream [namestub %s id %x] for " - "generation [buffer %d bytes]", - gen->fileprefix, gen->tid+1, gen->buf->size); + gen->filestream = + OTF_WStream_open(gen->fileprefix, gen->tid+1, gen->filemanager); + if (gen->filestream == NULL) + vt_error_msg("OTF_WStream_open failed:\n %s", otf_strerr); - /* set file compression */ + vt_cntl_msg(2, "Opened OTF writer stream [namestub %s id %x] for " + "generation [buffer %llu bytes]", + gen->fileprefix, gen->tid+1, + (unsigned long long)gen->buf->size); - if( vt_env_compression() && - OTF_WStream_setCompression(gen->filestream, - OTF_FILECOMPRESSION_COMPRESSED) ) + /* set writer stream's buffer size */ { + size_t bsize = vt_env_otf_bsize(); + if (bsize > 0) + { + OTF_WStream_setBufferSizes(gen->filestream, bsize); + /* no return value; check otf_errno for error */ + if (otf_errno != OTF_NO_ERROR) + { + vt_error_msg("OTF_WStream_setBufferSizes failed:\n %s", + otf_strerr); + } + } + } + + /* set file compression and buffer size */ + + gen->filecomp = OTF_FILECOMPRESSION_UNCOMPRESSED; + if (vt_env_compression() && + (OTF_WStream_setCompression(gen->filestream, + OTF_FILECOMPRESSION_COMPRESSED) == 1)) + { + size_t bsize = vt_env_compression_bsize(); gen->filecomp = OTF_FILECOMPRESSION_COMPRESSED; - } - else - { - gen->filecomp = OTF_FILECOMPRESSION_UNCOMPRESSED; + + if (bsize > 0) + { + OTF_WStream_setZBufferSizes(gen->filestream, bsize); + /* no return value; check otf_errno for error */ + if (otf_errno != OTF_NO_ERROR) + { + vt_error_msg("OTF_WStream_setZBufferSizes failed:\n %s", + otf_strerr); + } + } } - if( gen->tid == 0 ) + if (gen->tid == 0) { char creator[100]; - uint64_t res = vt_pform_clockres(); + + /* write OTF version record */ + + if (OTF_WStream_writeOtfVersion(gen->filestream) == 0) + vt_error_msg("OTF_WStream_writeOtfVersion failed:\n %s", otf_strerr); /* write creator record */ - snprintf(creator, sizeof(creator) - 1, - "%s", PACKAGE_STRING); - - OTF_WStream_writeOtfVersion( gen->filestream ); - OTF_WStream_writeDefCreator( gen->filestream, creator ); + snprintf(creator, sizeof(creator) - 1, "%s", PACKAGE_STRING); + if (OTF_WStream_writeDefCreator(gen->filestream, creator) == 0) + vt_error_msg("OTF_WStream_writeDefCreator failed:\n %s", otf_strerr); /* write timer resolution record */ - OTF_WStream_writeDefTimerResolution( gen->filestream, res ); + if (OTF_WStream_writeDefTimerResolution(gen->filestream, + vt_pform_clockres()) == 0) + { + vt_error_msg("OTF_WStream_writeDefTimerResolution failed:\n %s", + otf_strerr); + } } /* write process definition record */ @@ -288,579 +326,725 @@ void VTGen_flush(VTGen* gen, uint8_t lastFlush, uint32_t parent_pid = 0; char pname[1024]; - if(gen->tid != 0) + if (gen->tid != 0) parent_pid = VT_PROCESS_ID(vt_my_trace, gen->ptid); snprintf(pname, sizeof(pname) - 1, "%s %d%s", gen->tname, vt_my_trace, gen->tnamesuffix); - OTF_WStream_writeDefProcess(gen->filestream, pid, pname, parent_pid); - } - - /* write process group definition record (node name) */ - { - char pgname[100]; - - snprintf(pgname, sizeof(pgname) - 1, VT_UNIFY_STRID_NODE_PROCGRP"%s", - vt_pform_node_name()); - - OTF_WStream_writeDefProcessGroup(gen->filestream, - 1 /* id will be given by vtunify */, pgname, 1, &pid); + if (OTF_WStream_writeDefProcess(gen->filestream, pid, pname, + parent_pid) == 0) + { + vt_error_msg("OTF_WStream_writeDefProcess failed:\n %s", + otf_strerr); + } } gen->isfirstflush = 0; } + /* set has data flag */ + gen->hasdata = (gen->hasdata || gen->buf->pos > gen->buf->mem); + /* walk through the buffer and write records */ p = gen->buf->mem; while(p < gen->buf->pos) { - /* update minimum time, if it's a time-bound record */ - if(gen->timerange.min == (uint64_t)-1 && - ((VTBuf_Entry_Base*)p)->type >= VTBUF_ENTRY_TYPE__Enter) - { - VTBuf_Entry_EnterLeave* entry = (VTBuf_Entry_EnterLeave*)p; - gen->timerange.min = entry->time; - } - - /* write record */ - switch(((VTBuf_Entry_Base*)p)->type) - { - case VTBUF_ENTRY_TYPE__DefinitionComment: - { - VTBuf_Entry_DefinitionComment* entry = - (VTBuf_Entry_DefinitionComment*)p; - - OTF_WStream_writeDefinitionComment(gen->filestream, - entry->comment); - - break; - } - case VTBUF_ENTRY_TYPE__DefSclFile: - { - VTBuf_Entry_DefSclFile* entry = - (VTBuf_Entry_DefSclFile*)p; - - OTF_WStream_writeDefSclFile(gen->filestream, - entry->fid, entry->fname); - - break; - } - case VTBUF_ENTRY_TYPE__DefScl: - { - VTBuf_Entry_DefScl* entry = - (VTBuf_Entry_DefScl*)p; - - OTF_WStream_writeDefScl(gen->filestream, - entry->sid, entry->fid, entry->ln); - - break; - } - case VTBUF_ENTRY_TYPE__DefFileGroup: - { - VTBuf_Entry_DefFileGroup* entry = - (VTBuf_Entry_DefFileGroup*)p; - - OTF_WStream_writeDefFileGroup(gen->filestream, - entry->gid, entry->gname); - - break; - } - case VTBUF_ENTRY_TYPE__DefFile: - { - VTBuf_Entry_DefFile* entry = - (VTBuf_Entry_DefFile*)p; - - OTF_WStream_writeDefFile(gen->filestream, - entry->fid, entry->fname, entry->gid); - - break; - } - case VTBUF_ENTRY_TYPE__DefFunctionGroup: - { - VTBuf_Entry_DefFunctionGroup* entry = - (VTBuf_Entry_DefFunctionGroup*)p; - - OTF_WStream_writeDefFunctionGroup(gen->filestream, - entry->rdid, entry->rdesc); - - break; - } - case VTBUF_ENTRY_TYPE__DefFunction: - { - VTBuf_Entry_DefFunction* entry = - (VTBuf_Entry_DefFunction*)p; - - OTF_WStream_writeDefFunction(gen->filestream, - entry->rid, entry->rname, entry->rdid, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__DefCollectiveOperation: - { - VTBuf_Entry_DefCollectiveOperation* entry = - (VTBuf_Entry_DefCollectiveOperation*)p; - - uint32_t ctype = OTF_COLLECTIVE_TYPE_UNKNOWN; - switch(entry->ctype) - { - case VT_MPI_COLL_ALL2ALL: - ctype = OTF_COLLECTIVE_TYPE_ALL2ALL; - break; - case VT_MPI_COLL_ALL2ONE: - ctype = OTF_COLLECTIVE_TYPE_ALL2ONE; - break; - case VT_MPI_COLL_BARRIER: - ctype = OTF_COLLECTIVE_TYPE_BARRIER; - break; - case VT_MPI_COLL_ONE2ALL: - ctype = OTF_COLLECTIVE_TYPE_ONE2ALL; - break; - default: - break; - } - - OTF_WStream_writeDefCollectiveOperation(gen->filestream, - entry->cid, entry->cname, ctype); - - break; - } - case VTBUF_ENTRY_TYPE__DefCounterGroup: - { - VTBuf_Entry_DefCounterGroup* entry = - (VTBuf_Entry_DefCounterGroup*)p; - - OTF_WStream_writeDefCounterGroup(gen->filestream, - entry->gid, entry->gname); - - break; - } - case VTBUF_ENTRY_TYPE__DefCounter: - { - VTBuf_Entry_DefCounter* entry = - (VTBuf_Entry_DefCounter*)p; - - uint32_t cprop = 0; - if((entry->cprop & VT_CNTR_ACC) != 0) - cprop |= OTF_COUNTER_TYPE_ACC; - if((entry->cprop & VT_CNTR_ABS) != 0) - cprop |= OTF_COUNTER_TYPE_ABS; - if((entry->cprop & VT_CNTR_START) != 0) - cprop |= OTF_COUNTER_SCOPE_START; - if((entry->cprop & VT_CNTR_POINT) != 0) - cprop |= OTF_COUNTER_SCOPE_POINT; - if((entry->cprop & VT_CNTR_LAST) != 0) - cprop |= OTF_COUNTER_SCOPE_LAST; - if((entry->cprop & VT_CNTR_NEXT) != 0) - cprop |= OTF_COUNTER_SCOPE_NEXT; - if((entry->cprop & VT_CNTR_SIGNED) != 0) - cprop |= OTF_COUNTER_VARTYPE_SIGNED8; - if((entry->cprop & VT_CNTR_UNSIGNED) != 0) - cprop |= OTF_COUNTER_VARTYPE_UNSIGNED8; - if((entry->cprop & VT_CNTR_FLOAT) != 0) - cprop |= OTF_COUNTER_VARTYPE_FLOAT; - if((entry->cprop & VT_CNTR_DOUBLE) != 0) - cprop |= OTF_COUNTER_VARTYPE_DOUBLE; - - OTF_WStream_writeDefCounter(gen->filestream, - entry->cid, entry->cname, cprop, entry->gid, entry->cunit); - - break; - } - case VTBUF_ENTRY_TYPE__DefProcessGroup: - { - VTBuf_Entry_DefProcessGroup* entry = - (VTBuf_Entry_DefProcessGroup*)p; - - OTF_WStream_writeDefProcessGroup(gen->filestream, - entry->cid, entry->grpn, entry->grpc, entry->grpv); - - break; - } - case VTBUF_ENTRY_TYPE__DefMarker: - { - VTBuf_Entry_DefMarker* entry = - (VTBuf_Entry_DefMarker*)p; - - uint32_t mtype = OTF_MARKER_TYPE_UNKNOWN; - switch(entry->mtype) - { - case VT_MARKER_ERROR: - mtype = OTF_MARKER_TYPE_ERROR; - break; - case VT_MARKER_WARNING: - mtype = OTF_MARKER_TYPE_WARNING; - break; - case VT_MARKER_HINT: - mtype = OTF_MARKER_TYPE_HINT; - break; - default: - vt_assert(0); - } - - OTF_WStream_writeDefMarker(gen->filestream, - entry->mid, entry->mname, mtype); - - break; - } - case VTBUF_ENTRY_TYPE__DefKeyValue: - { - VTBuf_Entry_DefKeyValue* entry = - (VTBuf_Entry_DefKeyValue*)p; - - OTF_Type vtype = OTF_UNKNOWN; - switch(entry->vtype) - { - case VT_KEYVAL_TYPE_CHAR: - vtype = OTF_CHAR; - break; - case VT_KEYVAL_TYPE_INT32: - vtype = OTF_INT32; - break; - case VT_KEYVAL_TYPE_UINT32: - vtype = OTF_UINT32; - break; - case VT_KEYVAL_TYPE_INT64: - vtype = OTF_INT64; - break; - case VT_KEYVAL_TYPE_UINT64: - vtype = OTF_UINT64; - break; - case VT_KEYVAL_TYPE_FLOAT: - vtype = OTF_FLOAT; - break; - case VT_KEYVAL_TYPE_DOUBLE: - vtype = OTF_DOUBLE; - break; - default: - vt_assert(0); - } - - OTF_WStream_writeDefKeyValue(gen->filestream, - entry->kid, - vtype, - entry->kname, - NULL); - - break; - } - case VTBUF_ENTRY_TYPE__Enter: - { - VTBuf_Entry_EnterLeave* entry = - (VTBuf_Entry_EnterLeave*)p; - - OTF_WStream_writeEnter(gen->filestream, - entry->time, entry->rid, pid, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__Leave: - { - VTBuf_Entry_EnterLeave* entry = - (VTBuf_Entry_EnterLeave*)p; - - OTF_WStream_writeLeave(gen->filestream, - entry->time, entry->rid, pid, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__FileOperation: - { - VTBuf_Entry_FileOperation* entry = - (VTBuf_Entry_FileOperation*)p; - - OTF_WStream_writeFileOperation(gen->filestream, - entry->time, entry->fid, pid, entry->hid, entry->op, entry->bytes, - entry->etime - entry->time, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__BeginFileOperation: - { - VTBuf_Entry_BeginFileOperation* entry = - (VTBuf_Entry_BeginFileOperation*)p; - - OTF_WStream_writeBeginFileOperation(gen->filestream, - entry->time, pid, entry->mid, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__EndFileOperation: - { - VTBuf_Entry_EndFileOperation* entry = - (VTBuf_Entry_EndFileOperation*)p; - - OTF_WStream_writeEndFileOperation(gen->filestream, - entry->time, pid, entry->fid, entry->mid, entry->hid, - entry->op, entry->bytes, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__Counter: - { - VTBuf_Entry_Counter* entry = - (VTBuf_Entry_Counter*)p; - - OTF_WStream_writeCounter(gen->filestream, - entry->time, pid, entry->cid, entry->cval); - - break; - } - case VTBUF_ENTRY_TYPE__Comment: - { - VTBuf_Entry_Comment* entry = - (VTBuf_Entry_Comment*)p; - - OTF_WStream_writeEventComment(gen->filestream, - entry->time, pid, entry->comment); - - break; - } - case VTBUF_ENTRY_TYPE__Marker: - { - VTBuf_Entry_Marker* entry = - (VTBuf_Entry_Marker*)p; - - OTF_WStream_writeMarker(gen->filestream, - entry->time, pid, entry->mid, entry->mtext); - - break; - } - case VTBUF_ENTRY_TYPE__KeyValue: - { - VTBuf_Entry_KeyValue* entry = - (VTBuf_Entry_KeyValue*)p; - - OTF_WBuffer* filestream_buffer; - OTF_KeyValuePair kvpair; - - filestream_buffer = OTF_WStream_getEventBuffer( gen->filestream ); - vt_assert(filestream_buffer != NULL); - - kvpair.key = entry->kid; - - switch(entry->vtype) - { - case VT_KEYVAL_TYPE_CHAR: - kvpair.type = OTF_CHAR; - kvpair.value.otf_char = entry->kvalue.c; - break; - case VT_KEYVAL_TYPE_INT32: - kvpair.type = OTF_INT32; - kvpair.value.otf_int32 = entry->kvalue.i32; - break; - case VT_KEYVAL_TYPE_UINT32: - kvpair.type = OTF_UINT32; - kvpair.value.otf_uint32 = entry->kvalue.u32; - break; - case VT_KEYVAL_TYPE_INT64: - kvpair.type = OTF_INT64; - kvpair.value.otf_int64 = entry->kvalue.i64; - break; - case VT_KEYVAL_TYPE_UINT64: - kvpair.type = OTF_UINT64; - kvpair.value.otf_uint64 = entry->kvalue.u64; - break; - case VT_KEYVAL_TYPE_FLOAT: - kvpair.type = OTF_FLOAT; - kvpair.value.otf_float = entry->kvalue.f; - break; - case VT_KEYVAL_TYPE_DOUBLE: - kvpair.type = OTF_DOUBLE; - kvpair.value.otf_double = entry->kvalue.d; - break; - default: - vt_assert(0); - } - - OTF_WBuffer_writeKeyValuePair_short( filestream_buffer, &kvpair ); - - break; - } - case VTBUF_ENTRY_TYPE__SendMsg: - { - VTBuf_Entry_SendRecvMsg* entry = - (VTBuf_Entry_SendRecvMsg*)p; - - OTF_WStream_writeSendMsg(gen->filestream, - entry->time, pid, entry->pid, entry->cid, entry->tag, entry->len, - entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__RecvMsg: - { - VTBuf_Entry_SendRecvMsg* entry = - (VTBuf_Entry_SendRecvMsg*)p; - - OTF_WStream_writeRecvMsg(gen->filestream, - entry->time, pid, entry->pid, entry->cid, entry->tag, entry->len, - entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__CollectiveOperation: - { - VTBuf_Entry_CollectiveOperation* entry = - (VTBuf_Entry_CollectiveOperation*)p; - - OTF_WStream_writeCollectiveOperation(gen->filestream, - entry->time, pid, entry->rid, entry->cid, entry->rpid, entry->sent, - entry->recvd, entry->etime - entry->time, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__BeginCollectiveOperation: - { - VTBuf_Entry_BeginCollectiveOperation* entry = - (VTBuf_Entry_BeginCollectiveOperation*)p; - - OTF_WStream_writeBeginCollectiveOperation(gen->filestream, - entry->time, pid, entry->rid, entry->mid, entry->cid, entry->rpid, - entry->sent, entry->recvd, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__EndCollectiveOperation: - { - VTBuf_Entry_EndCollectiveOperation* entry = - (VTBuf_Entry_EndCollectiveOperation*)p; - - OTF_WStream_writeEndCollectiveOperation(gen->filestream, - entry->time, pid, entry->mid); - - break; - } - case VTBUF_ENTRY_TYPE__RMAPut: - { - VTBuf_Entry_RMAPutGet* entry = (VTBuf_Entry_RMAPutGet*)p; - - OTF_WStream_writeRMAPut(gen->filestream, - entry->time, pid, entry->opid, entry->tpid, entry->cid, entry->tag, - entry->len, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__RMAPutRE: - { - VTBuf_Entry_RMAPutGet* entry = (VTBuf_Entry_RMAPutGet*)p; - - OTF_WStream_writeRMAPutRemoteEnd(gen->filestream, - entry->time, pid, entry->opid, entry->tpid, entry->cid, entry->tag, - entry->len, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__RMAGet: - { - VTBuf_Entry_RMAPutGet* entry = (VTBuf_Entry_RMAPutGet*)p; - - OTF_WStream_writeRMAGet(gen->filestream, - entry->time, pid, entry->opid, entry->tpid, entry->cid, entry->tag, - entry->len, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__RMAEnd: - { - VTBuf_Entry_RMAEnd* entry = (VTBuf_Entry_RMAEnd*)p; - - OTF_WStream_writeRMAEnd(gen->filestream, - entry->time, pid, entry->rpid, entry->cid, entry->tag, entry->sid); - - break; - } - case VTBUF_ENTRY_TYPE__FunctionSummary: - { - VTBuf_Entry_FunctionSummary* entry = - (VTBuf_Entry_FunctionSummary*)p; - - OTF_WStream_writeFunctionSummary(gen->filestream, - entry->time, entry->rid, pid, entry->cnt, entry->excl, entry->incl); - - break; - } - case VTBUF_ENTRY_TYPE__MessageSummary: - { - VTBuf_Entry_MessageSummary* entry = - (VTBuf_Entry_MessageSummary*)p; - - OTF_WStream_writeMessageSummary(gen->filestream, - entry->time, pid, entry->peer, entry->cid, entry->tag, entry->scnt, - entry->rcnt, entry->sent, entry->recvd); - - break; - } - case VTBUF_ENTRY_TYPE__CollectiveOperationSummary: - { - VTBuf_Entry_CollectiveOperationSummary* entry = - (VTBuf_Entry_CollectiveOperationSummary*)p; - - OTF_WStream_writeCollopSummary(gen->filestream, - entry->time, pid, entry->cid, entry->rid, entry->scnt, entry->rcnt, - entry->sent, entry->recvd); - - break; - } - case VTBUF_ENTRY_TYPE__FileOperationSummary: - { - VTBuf_Entry_FileOperationSummary* entry = - (VTBuf_Entry_FileOperationSummary*)p; - - OTF_WStream_writeFileOperationSummary(gen->filestream, - entry->time, entry->fid, pid, entry->nopen, entry->nclose, - entry->nread, entry->nwrite, entry->nseek, entry->read, - entry->wrote); - - break; - } - default: - { - vt_assert(0); - } - } - - /* last buffer entry and end flush not marked ? */ - if(!end_flush_marked && + /* time-bound record? */ + if (((VTBuf_Entry_Base*)p)->type >= VTBUF_ENTRY_TYPE__Enter) + { + VTBuf_Entry_EnterLeave* entry = (VTBuf_Entry_EnterLeave*)p; + + /* update time range */ + if (gen->timerange.min == (uint64_t)-1) + gen->timerange.min = entry->time; + gen->timerange.max = entry->time; + } + + /* write record */ + switch(((VTBuf_Entry_Base*)p)->type) + { + case VTBUF_ENTRY_TYPE__DefinitionComment: + { + VTBuf_Entry_DefinitionComment* entry = + (VTBuf_Entry_DefinitionComment*)p; + + if (OTF_WStream_writeDefinitionComment(gen->filestream, + entry->comment) == 0) + { + vt_error_msg("OTF_WStream_writeDefinitionComment failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefSclFile: + { + VTBuf_Entry_DefSclFile* entry = (VTBuf_Entry_DefSclFile*)p; + + if (OTF_WStream_writeDefSclFile(gen->filestream, entry->fid, + entry->fname) == 0) + { + vt_error_msg("OTF_WStream_writeDefSclFile failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefScl: + { + VTBuf_Entry_DefScl* entry = (VTBuf_Entry_DefScl*)p; + + if (OTF_WStream_writeDefScl(gen->filestream, entry->sid, entry->fid, + entry->ln) == 0) + { + vt_error_msg("OTF_WStream_writeDefScl failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefFileGroup: + { + VTBuf_Entry_DefFileGroup* entry = (VTBuf_Entry_DefFileGroup*)p; + + if (OTF_WStream_writeDefFileGroup(gen->filestream, entry->gid, + entry->gname) == 0) + { + vt_error_msg("OTF_WStream_writeDefFileGroup failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefFile: + { + VTBuf_Entry_DefFile* entry = (VTBuf_Entry_DefFile*)p; + + if (OTF_WStream_writeDefFile(gen->filestream, entry->fid, entry->fname, + entry->gid) == 0) + { + vt_error_msg("OTF_WStream_writeDefFile failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefFunctionGroup: + { + VTBuf_Entry_DefFunctionGroup* entry = (VTBuf_Entry_DefFunctionGroup*)p; + + if (OTF_WStream_writeDefFunctionGroup(gen->filestream, entry->rdid, + entry->rdesc) == 0) + { + vt_error_msg("OTF_WStream_writeDefFunctionGroup failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefFunction: + { + VTBuf_Entry_DefFunction* entry = (VTBuf_Entry_DefFunction*)p; + + if (OTF_WStream_writeDefFunction(gen->filestream, entry->rid, + entry->rname, entry->rdid, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeDefFunction failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefCollectiveOperation: + { + VTBuf_Entry_DefCollectiveOperation* entry = + (VTBuf_Entry_DefCollectiveOperation*)p; + + uint32_t ctype = OTF_COLLECTIVE_TYPE_UNKNOWN; + switch(entry->ctype) + { + case VT_MPI_COLL_ALL2ALL: + ctype = OTF_COLLECTIVE_TYPE_ALL2ALL; + break; + case VT_MPI_COLL_ALL2ONE: + ctype = OTF_COLLECTIVE_TYPE_ALL2ONE; + break; + case VT_MPI_COLL_BARRIER: + ctype = OTF_COLLECTIVE_TYPE_BARRIER; + break; + case VT_MPI_COLL_ONE2ALL: + ctype = OTF_COLLECTIVE_TYPE_ONE2ALL; + break; + default: + vt_assert(0); + } + + if (OTF_WStream_writeDefCollectiveOperation(gen->filestream, entry->cid, + entry->cname, ctype) == 0) + { + vt_error_msg("OTF_WStream_writeDefCollectiveOperation failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefCounterGroup: + { + VTBuf_Entry_DefCounterGroup* entry = (VTBuf_Entry_DefCounterGroup*)p; + + if (OTF_WStream_writeDefCounterGroup(gen->filestream, entry->gid, + entry->gname) == 0) + { + vt_error_msg("OTF_WStream_writeDefCounterGroup failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefCounter: + { + VTBuf_Entry_DefCounter* entry = (VTBuf_Entry_DefCounter*)p; + + uint32_t cprop = 0; + if ((entry->cprop & VT_CNTR_ACC) != 0) + cprop |= OTF_COUNTER_TYPE_ACC; + if ((entry->cprop & VT_CNTR_ABS) != 0) + cprop |= OTF_COUNTER_TYPE_ABS; + if ((entry->cprop & VT_CNTR_START) != 0) + cprop |= OTF_COUNTER_SCOPE_START; + if ((entry->cprop & VT_CNTR_POINT) != 0) + cprop |= OTF_COUNTER_SCOPE_POINT; + if ((entry->cprop & VT_CNTR_LAST) != 0) + cprop |= OTF_COUNTER_SCOPE_LAST; + if ((entry->cprop & VT_CNTR_NEXT) != 0) + cprop |= OTF_COUNTER_SCOPE_NEXT; + if ((entry->cprop & VT_CNTR_SIGNED) != 0) + cprop |= OTF_COUNTER_VARTYPE_SIGNED8; + if ((entry->cprop & VT_CNTR_UNSIGNED) != 0) + cprop |= OTF_COUNTER_VARTYPE_UNSIGNED8; + if ((entry->cprop & VT_CNTR_FLOAT) != 0) + cprop |= OTF_COUNTER_VARTYPE_FLOAT; + if ((entry->cprop & VT_CNTR_DOUBLE) != 0) + cprop |= OTF_COUNTER_VARTYPE_DOUBLE; + + if (OTF_WStream_writeDefCounter(gen->filestream, entry->cid, + entry->cname, cprop, entry->gid, entry->cunit) == 0) + { + vt_error_msg("OTF_WStream_writeDefCounter failed:\n %s", otf_strerr); + } + + if (entry->pgid != 0) + { + if (OTF_WStream_writeDefCounterAssignments(gen->filestream, + entry->cid, 1, &(entry->pgid), NULL) == 0) + { + vt_error_msg("OTF_WStream_writeDefCounterAssignments failed:\n %s", + otf_strerr); + } + } + + break; + } + case VTBUF_ENTRY_TYPE__DefProcessGroup: + { + VTBuf_Entry_DefProcessGroup* entry = (VTBuf_Entry_DefProcessGroup*)p; + + if (OTF_WStream_writeDefProcessGroup(gen->filestream, entry->gid, + entry->grpn, entry->grpc, entry->grpv) == 0) + { + vt_error_msg("OTF_WStream_writeDefProcessGroup failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefProcessGroupAttributes: + { + VTBuf_Entry_DefProcessGroupAttributes* entry = + (VTBuf_Entry_DefProcessGroupAttributes*)p; + + uint32_t gattr = 0; + if ((entry->gattr & VT_PROCGRP_ISCOMMUNICATOR) != 0) + gattr |= (1<gattr & VT_PROCGRP_HASCOUNTERS) != 0) + gattr |= (1<filestream, + entry->gid, gattr) == 0) + { + vt_error_msg("OTF_WStream_writeDefProcessOrGroupAttributes failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefMarker: + { + VTBuf_Entry_DefMarker* entry = (VTBuf_Entry_DefMarker*)p; + + uint32_t mtype = OTF_MARKER_TYPE_UNKNOWN; + switch(entry->mtype) + { + case VT_MARKER_ERROR: + mtype = OTF_MARKER_TYPE_ERROR; + break; + case VT_MARKER_WARNING: + mtype = OTF_MARKER_TYPE_WARNING; + break; + case VT_MARKER_HINT: + mtype = OTF_MARKER_TYPE_HINT; + break; + default: + vt_assert(0); + } + + if (OTF_WStream_writeDefMarker(gen->filestream, entry->mid, + entry->mname, mtype) == 0) + { + vt_error_msg("OTF_WStream_writeDefMarker failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__DefKeyValue: + { + VTBuf_Entry_DefKeyValue* entry = (VTBuf_Entry_DefKeyValue*)p; + + OTF_Type vtype = OTF_UNKNOWN; + switch(entry->vtype) + { + case VT_KEYVAL_TYPE_CHAR: + vtype = OTF_CHAR; + break; + case VT_KEYVAL_TYPE_INT32: + vtype = OTF_INT32; + break; + case VT_KEYVAL_TYPE_UINT32: + vtype = OTF_UINT32; + break; + case VT_KEYVAL_TYPE_INT64: + vtype = OTF_INT64; + break; + case VT_KEYVAL_TYPE_UINT64: + vtype = OTF_UINT64; + break; + case VT_KEYVAL_TYPE_FLOAT: + vtype = OTF_FLOAT; + break; + case VT_KEYVAL_TYPE_DOUBLE: + vtype = OTF_DOUBLE; + break; + default: + vt_assert(0); + } + + if (OTF_WStream_writeDefKeyValue(gen->filestream, entry->kid, vtype, + entry->kname, NULL) == 0) + { + vt_error_msg("OTF_WStream_writeDefKeyValue failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__Enter: + { + VTBuf_Entry_EnterLeave* entry = (VTBuf_Entry_EnterLeave*)p; + + if (OTF_WStream_writeEnter(gen->filestream, entry->time, entry->rid, + pid, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeEnter failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__Leave: + { + VTBuf_Entry_EnterLeave* entry = (VTBuf_Entry_EnterLeave*)p; + + if (OTF_WStream_writeLeave(gen->filestream, entry->time, entry->rid, + pid, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeLeave failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__FileOperation: + { + VTBuf_Entry_FileOperation* entry = (VTBuf_Entry_FileOperation*)p; + + if (OTF_WStream_writeFileOperation(gen->filestream, entry->time, + entry->fid, pid, entry->hid, entry->op, entry->bytes, + entry->etime - entry->time, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeFileOperation failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__BeginFileOperation: + { + VTBuf_Entry_BeginFileOperation* entry = + (VTBuf_Entry_BeginFileOperation*)p; + + if (OTF_WStream_writeBeginFileOperation(gen->filestream, entry->time, + pid, entry->mid, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeBeginFileOperation failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__EndFileOperation: + { + VTBuf_Entry_EndFileOperation* entry = (VTBuf_Entry_EndFileOperation*)p; + + if (OTF_WStream_writeEndFileOperation(gen->filestream, entry->time, pid, + entry->fid, entry->mid, entry->hid, entry->op, entry->bytes, + entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeEndFileOperation failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__Counter: + { + VTBuf_Entry_Counter* entry = (VTBuf_Entry_Counter*)p; + + if (OTF_WStream_writeCounter(gen->filestream, entry->time, pid, + entry->cid, entry->cval) == 0) + { + vt_error_msg("OTF_WStream_writeCounter failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__Comment: + { + VTBuf_Entry_Comment* entry = (VTBuf_Entry_Comment*)p; + + if (OTF_WStream_writeEventComment(gen->filestream, entry->time, pid, + entry->comment) == 0) + { + vt_error_msg("OTF_WStream_writeEventComment failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__Marker: + { + VTBuf_Entry_Marker* entry = (VTBuf_Entry_Marker*)p; + + if (OTF_WStream_writeMarker(gen->filestream, entry->time, pid, + entry->mid, entry->mtext) == 0) + { + vt_error_msg("OTF_WStream_writeMarker failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__KeyValue: + { + VTBuf_Entry_KeyValue* entry = (VTBuf_Entry_KeyValue*)p; + + OTF_WBuffer* filestream_buffer; + OTF_KeyValuePair kvpair; + + filestream_buffer = OTF_WStream_getEventBuffer( gen->filestream ); + if (filestream_buffer == NULL ) + vt_error_msg("OTF_WStream_getEventBuffer failed:\n %s", otf_strerr); + + kvpair.key = entry->kid; + + switch(entry->vtype) + { + case VT_KEYVAL_TYPE_CHAR: + kvpair.type = OTF_CHAR; + kvpair.value.otf_char = entry->kvalue.c; + break; + case VT_KEYVAL_TYPE_INT32: + kvpair.type = OTF_INT32; + kvpair.value.otf_int32 = entry->kvalue.i32; + break; + case VT_KEYVAL_TYPE_UINT32: + kvpair.type = OTF_UINT32; + kvpair.value.otf_uint32 = entry->kvalue.u32; + break; + case VT_KEYVAL_TYPE_INT64: + kvpair.type = OTF_INT64; + kvpair.value.otf_int64 = entry->kvalue.i64; + break; + case VT_KEYVAL_TYPE_UINT64: + kvpair.type = OTF_UINT64; + kvpair.value.otf_uint64 = entry->kvalue.u64; + break; + case VT_KEYVAL_TYPE_FLOAT: + kvpair.type = OTF_FLOAT; + kvpair.value.otf_float = entry->kvalue.f; + break; + case VT_KEYVAL_TYPE_DOUBLE: + kvpair.type = OTF_DOUBLE; + kvpair.value.otf_double = entry->kvalue.d; + break; + default: + vt_assert(0); + } + + if (OTF_WBuffer_writeKeyValuePair_short( filestream_buffer, + &kvpair ) == 0) + { + vt_error_msg("OTF_WBuffer_writeKeyValuePair_short failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__SendMsg: + { + VTBuf_Entry_SendRecvMsg* entry = (VTBuf_Entry_SendRecvMsg*)p; + + if (OTF_WStream_writeSendMsg(gen->filestream, entry->time, pid, + entry->pid, entry->cid, entry->tag, entry->len, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeSendMsg failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__RecvMsg: + { + VTBuf_Entry_SendRecvMsg* entry = (VTBuf_Entry_SendRecvMsg*)p; + + if (OTF_WStream_writeRecvMsg(gen->filestream, entry->time, pid, + entry->pid, entry->cid, entry->tag, entry->len, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeRecvMsg failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__CollectiveOperation: + { + VTBuf_Entry_CollectiveOperation* entry = + (VTBuf_Entry_CollectiveOperation*)p; + + if (OTF_WStream_writeCollectiveOperation(gen->filestream, + entry->time, pid, entry->rid, entry->cid, entry->rpid, + entry->sent, entry->recvd, entry->etime - entry->time, + entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeCollectiveOperation failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__BeginCollectiveOperation: + { + VTBuf_Entry_BeginCollectiveOperation* entry = + (VTBuf_Entry_BeginCollectiveOperation*)p; + + if (OTF_WStream_writeBeginCollectiveOperation(gen->filestream, + entry->time, pid, entry->rid, entry->mid, entry->cid, + entry->rpid, entry->sent, entry->recvd, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeBeginCollectiveOperation failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__EndCollectiveOperation: + { + VTBuf_Entry_EndCollectiveOperation* entry = + (VTBuf_Entry_EndCollectiveOperation*)p; + + if (OTF_WStream_writeEndCollectiveOperation(gen->filestream, + entry->time, pid, entry->mid) == 0) + { + vt_error_msg("OTF_WStream_writeEndCollectiveOperation failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__RMAPut: + { + VTBuf_Entry_RMAPutGet* entry = (VTBuf_Entry_RMAPutGet*)p; + + if (OTF_WStream_writeRMAPut(gen->filestream, entry->time, pid, + entry->opid, entry->tpid, entry->cid, entry->tag, entry->len, + entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeRMAPut failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__RMAPutRE: + { + VTBuf_Entry_RMAPutGet* entry = (VTBuf_Entry_RMAPutGet*)p; + + if (OTF_WStream_writeRMAPutRemoteEnd(gen->filestream, entry->time, + pid, entry->opid, entry->tpid, entry->cid, entry->tag, + entry->len, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeRMAPutRemoteEnd failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__RMAGet: + { + VTBuf_Entry_RMAPutGet* entry = (VTBuf_Entry_RMAPutGet*)p; + + if (OTF_WStream_writeRMAGet(gen->filestream, entry->time, pid, + entry->opid, entry->tpid, entry->cid, entry->tag, entry->len, + entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeRMAGet failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__RMAEnd: + { + VTBuf_Entry_RMAEnd* entry = (VTBuf_Entry_RMAEnd*)p; + + if (OTF_WStream_writeRMAEnd(gen->filestream, entry->time, pid, + entry->rpid, entry->cid, entry->tag, entry->sid) == 0) + { + vt_error_msg("OTF_WStream_writeRMAEnd failed:\n %s", otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__FunctionSummary: + { + VTBuf_Entry_FunctionSummary* entry = (VTBuf_Entry_FunctionSummary*)p; + + if (OTF_WStream_writeFunctionSummary(gen->filestream, entry->time, + entry->rid, pid, entry->cnt, entry->excl, entry->incl) == 0) + { + vt_error_msg("OTF_WStream_writeFunctionSummary failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__MessageSummary: + { + VTBuf_Entry_MessageSummary* entry = (VTBuf_Entry_MessageSummary*)p; + + if (OTF_WStream_writeMessageSummary(gen->filestream, entry->time, + pid, entry->peer, entry->cid, entry->tag, entry->scnt, + entry->rcnt, entry->sent, entry->recvd) == 0) + { + vt_error_msg("OTF_WStream_writeMessageSummary failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__CollectiveOperationSummary: + { + VTBuf_Entry_CollectiveOperationSummary* entry = + (VTBuf_Entry_CollectiveOperationSummary*)p; + + if (OTF_WStream_writeCollopSummary(gen->filestream, entry->time, + pid, entry->cid, entry->rid, entry->scnt, entry->rcnt, + entry->sent, entry->recvd) == 0) + { + vt_error_msg("OTF_WStream_writeCollopSummary failed:\n %s", + otf_strerr); + } + + break; + } + case VTBUF_ENTRY_TYPE__FileOperationSummary: + { + VTBuf_Entry_FileOperationSummary* entry = + (VTBuf_Entry_FileOperationSummary*)p; + + if (OTF_WStream_writeFileOperationSummary(gen->filestream, + entry->time, entry->fid, pid, entry->nopen, entry->nclose, + entry->nread, entry->nwrite, entry->nseek, entry->read, + entry->wrote) == 0) + { + vt_error_msg("OTF_WStream_writeFileOperationSummary failed:\n %s", + otf_strerr); + } + + break; + } + default: + { + vt_assert(0); + } + } + + /* last buffer entry and end flush not marked ? */ + if (!end_flush_marked && p + ((VTBuf_Entry_Base*)p)->length >= gen->buf->pos) - { - /* mark end of flush, if it's not the last (invisible) flush and - max flushes not reached */ - if(!lastFlush && gen->flushcntr > 1) - { - uint64_t flush_etime = vt_pform_wtime(); - vt_exit_flush(gen->tid, &flush_etime); - if( flushETime != NULL ) *flushETime = flush_etime; - } + { + /* mark end of flush, if it's not the last (invisible) flush and + max flushes not reached */ + if (!lastFlush && gen->flushcntr > 1) + { + uint64_t flush_etime = vt_pform_wtime(); + vt_exit_flush(gen->tid, &flush_etime); + if (flushETime != NULL) *flushETime = flush_etime; + } - end_flush_marked = 1; - } + end_flush_marked = 1; + } - p += ((VTBuf_Entry_Base*)p)->length; + p += ((VTBuf_Entry_Base*)p)->length; } - if(lastFlush) + if (lastFlush) { - /* write event/summary comment record, in order that all event/summary - files will exist */ + /* if nothing is recorded, write event/summary comment record in order + that all event/summary files will exist */ + if (!gen->hasdata) + { + uint64_t time = vt_pform_wtime(); - uint64_t time = vt_pform_wtime(); + if (VTGEN_IS_TRACE_ON(gen)) + { + if (OTF_WStream_writeEventComment(gen->filestream, time, pid, "") == 0) + { + vt_error_msg("OTF_WStream_writeEventComment failed:\n %s", + otf_strerr); + } + } - if(VTGEN_IS_TRACE_ON(gen)) - OTF_WStream_writeEventComment(gen->filestream, time, pid, ""); - else /* VTGEN_IS_SUM_ON(gen) */ - OTF_WStream_writeSummaryComment(gen->filestream, time, pid, ""); + if (VTGEN_IS_SUM_ON(gen)) + { + if (OTF_WStream_writeSummaryComment(gen->filestream, time, pid, "") == 0) + { + vt_error_msg("OTF_WStream_writeSummaryComment failed:\n %s", + otf_strerr); + } + } + + /* set time range */ + gen->timerange.min = gen->timerange.max = time; + } /* write time range record */ - - if( gen->timerange.min == (uint64_t)-1 ) - gen->timerange.min = time; - gen->timerange.max = time; - - OTF_WStream_writeDefTimeRange(gen->filestream, gen->timerange.min, - gen->timerange.max, NULL); + if (OTF_WStream_writeDefTimeRange(gen->filestream, gen->timerange.min, + gen->timerange.max, NULL) == 0) + { + vt_error_msg("OTF_WStream_writeDefTimeRange failed:\n %s", + otf_strerr); + } } /* reset buffer */ @@ -870,15 +1054,18 @@ void VTGen_flush(VTGen* gen, uint8_t lastFlush, gen->fileprefix, gen->tid+1); /* decrement flush counter */ - if(gen->flushcntr > 0) gen->flushcntr--; + if (gen->flushcntr > 0) gen->flushcntr--; /* switch tracing off, if number of max flushes reached */ - if(!lastFlush && gen->flushcntr == 0) + if (!lastFlush && gen->flushcntr == 0) { int max_flushes = vt_env_max_flushes(); + vt_cntl_msg(1, "Maximum number of buffer flushes reached " "(VT_MAX_FLUSHES=%d)", max_flushes); + vt_trace_off(gen->tid, 1, 1); + vt_def_comment(gen->tid, VT_UNIFY_STRID_VT_COMMENT"WARNING: This trace is " "incomplete, because the maximum number of " @@ -903,23 +1090,27 @@ void VTGen_close(VTGen* gen) /* flush buffer if necessary */ VTGen_flush(gen, 1, 0, NULL); - if(gen->fileprefix) + if (gen->fileprefix) { /* close writer stream */ - OTF_WStream_close(gen->filestream); + if (OTF_WStream_close(gen->filestream) == 0) + vt_error_msg("OTF_WStream_close failed:\n %s", otf_strerr); /* close file manager of writer stream */ OTF_FileManager_close(gen->filemanager); + /* no return value; check otf_errno for error */ + if (otf_errno != OTF_NO_ERROR) + vt_error_msg("OTF_FileManager_close failed:\n %s", otf_strerr); vt_cntl_msg(2, "Closed OTF writer stream [namestub %s id %x]", gen->fileprefix, gen->tid+1); } /* free buffer memory */ - free(gen->buf->mem); + free(gen->buf->mem); /* free buffer record */ - free(gen->buf); + free(gen->buf); } void VTGen_delete(VTGen* gen) @@ -939,22 +1130,31 @@ void VTGen_delete(VTGen* gen) uint8_t i; /* determine (local) files for removal */ + tmp_namev[0] = OTF_getFilename(gen->fileprefix, gen->tid+1, OTF_FILETYPE_DEF | gen->filecomp, 0, NULL); + vt_assert(tmp_namev[0]); + tmp_namev[1] = OTF_getFilename(gen->fileprefix, gen->tid+1, OTF_FILETYPE_EVENT | gen->filecomp, 0, NULL); + vt_assert(tmp_namev[1]); + tmp_namev[2] = OTF_getFilename(gen->fileprefix, gen->tid+1, OTF_FILETYPE_STATS | gen->filecomp, 0, NULL); + vt_assert(tmp_namev[2]); + tmp_namev[3] = OTF_getFilename(gen->fileprefix, gen->tid+1, OTF_FILETYPE_MARKER | gen->filecomp, 0, NULL); + vt_assert(tmp_namev[3]); + tmp_namev[4] = NULL; i = 0; @@ -1077,23 +1277,27 @@ void VTGen_destroy(VTGen* gen) if(gen->fileprefix) { /* close writer stream */ - OTF_WStream_close(gen->filestream); + if (OTF_WStream_close(gen->filestream) == 0) + vt_error_msg("OTF_WStream_close failed:\n %s", otf_strerr); /* close file manager of writer stream */ OTF_FileManager_close(gen->filemanager); + /* no return value; check otf_errno for error */ + if (otf_errno != OTF_NO_ERROR) + vt_error_msg("OTF_FileManager_close failed:\n %s", otf_strerr); } /* destroy sum record */ if (VTGEN_IS_SUM_ON(gen)) VTSum_destroy(gen->sum); /* free buffer memory */ - free(gen->buf->mem); + free(gen->buf->mem); /* free buffer record */ - free(gen->buf); + free(gen->buf); /* free gen record */ - free(gen); + free(gen); } uint8_t VTGen_get_buflevel(VTGen* gen) @@ -1164,6 +1368,7 @@ void VTGen_write_DEF_SCL(VTGen* gen, uint32_t sid, uint32_t fid, uint32_t ln) VTGEN_ALLOC(gen, length); new_entry = ((VTBuf_Entry_DefScl*)gen->buf->pos); + new_entry->type = VTBUF_ENTRY_TYPE__DefScl; new_entry->length = length; new_entry->sid = sid; @@ -1314,7 +1519,8 @@ void VTGen_write_DEF_COUNTER_GROUP(VTGen* gen, uint32_t gid, const char* gname) } void VTGen_write_DEF_COUNTER(VTGen* gen, uint32_t cid, const char* cname, - uint32_t cprop, uint32_t gid, const char* cunit) + const char* cunit, uint32_t cprop, uint32_t gid, + uint32_t pgid) { VTBuf_Entry_DefCounter* new_entry; @@ -1333,6 +1539,7 @@ void VTGen_write_DEF_COUNTER(VTGen* gen, uint32_t cid, const char* cname, new_entry->cid = cid; new_entry->cprop = cprop; new_entry->gid = gid; + new_entry->pgid = pgid; strncpy(new_entry->cunit, cunit, sizeof(new_entry->cunit)-1); new_entry->cunit[sizeof(new_entry->cunit)-1] = '\0'; strcpy(new_entry->cname, cname); @@ -1340,8 +1547,8 @@ void VTGen_write_DEF_COUNTER(VTGen* gen, uint32_t cid, const char* cname, VTGEN_JUMP(gen, length); } -void VTGen_write_DEF_PROCESS_GROUP(VTGen* gen, uint32_t cid, const char* grpn, - uint32_t grpc, uint32_t grpv[]) +void VTGen_write_DEF_PROCESS_GROUP(VTGen* gen, uint32_t gid, const char* grpn, + uint32_t grpc, uint32_t grpv[]) { VTBuf_Entry_DefProcessGroup* new_entry; @@ -1357,7 +1564,7 @@ void VTGen_write_DEF_PROCESS_GROUP(VTGen* gen, uint32_t cid, const char* grpn, new_entry->type = VTBUF_ENTRY_TYPE__DefProcessGroup; new_entry->length = length; - new_entry->cid = cid; + new_entry->gid = gid; strncpy(new_entry->grpn, grpn, sizeof(new_entry->grpn)-1); new_entry->grpn[sizeof(new_entry->grpn)-1] = '\0'; new_entry->grpc = grpc; @@ -1367,6 +1574,28 @@ void VTGen_write_DEF_PROCESS_GROUP(VTGen* gen, uint32_t cid, const char* grpn, VTGEN_JUMP(gen, length); } +void VTGen_write_DEF_PROCESS_GROUP_ATTRIBUTES(VTGen* gen, uint32_t gid, + uint32_t gattr) +{ + VTBuf_Entry_DefProcessGroupAttributes* new_entry; + + uint32_t length = + VTGEN_ALIGN_LENGTH(sizeof(VTBuf_Entry_DefProcessGroupAttributes)); + + VTGEN_CHECK(gen); + + VTGEN_ALLOC(gen, length); + + new_entry = ((VTBuf_Entry_DefProcessGroupAttributes*)gen->buf->pos); + + new_entry->type = VTBUF_ENTRY_TYPE__DefProcessGroupAttributes; + new_entry->length = length; + new_entry->gid = gid; + new_entry->gattr = gattr; + + VTGEN_JUMP(gen, length); +} + void VTGen_write_DEF_KEYVAL(VTGen* gen, uint32_t kid, uint8_t vtype, const char* kname) { @@ -2340,7 +2569,9 @@ void VTGen_rewind(VTGen* gen, uint64_t *time) case VTBUF_ENTRY_TYPE__DefCounterGroup: case VTBUF_ENTRY_TYPE__DefCounter: case VTBUF_ENTRY_TYPE__DefProcessGroup: + case VTBUF_ENTRY_TYPE__DefProcessGroupAttributes: case VTBUF_ENTRY_TYPE__DefMarker: + case VTBUF_ENTRY_TYPE__DefKeyValue: { if(p != gen->rewindmark.pos) memmove(gen->rewindmark.pos, p, length); diff --git a/ompi/contrib/vt/vt/vtlib/vt_otf_gen.h b/ompi/contrib/vt/vt/vtlib/vt_otf_gen.h index 2559f78b00..df573a5567 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_otf_gen.h +++ b/ompi/contrib/vt/vt/vtlib/vt_otf_gen.h @@ -47,6 +47,7 @@ typedef enum VTBUF_ENTRY_TYPE__DefCounterGroup, VTBUF_ENTRY_TYPE__DefCounter, VTBUF_ENTRY_TYPE__DefProcessGroup, + VTBUF_ENTRY_TYPE__DefProcessGroupAttributes, VTBUF_ENTRY_TYPE__DefMarker, VTBUF_ENTRY_TYPE__DefKeyValue, VTBUF_ENTRY_TYPE__KeyValue, @@ -199,7 +200,8 @@ typedef struct uint32_t cid; uint32_t cprop; uint32_t gid; - char cunit[100]; + uint32_t pgid; + char cunit[128]; char cname[1]; } VTBuf_Entry_DefCounter; @@ -210,12 +212,23 @@ typedef struct VTBuf_EntryTypes type; uint32_t length; - uint32_t cid; - char grpn[100]; - uint32_t grpc; - uint32_t grpv[1]; + uint32_t gid; + char grpn[128]; + uint32_t grpc; + uint32_t grpv[1]; } VTBuf_Entry_DefProcessGroup; +/* - VTBUF_ENTRY_TYPE__DefProcessGroupAttributes - */ + +typedef struct +{ + VTBuf_EntryTypes type; + uint32_t length; + + uint32_t gid; + uint32_t gattr; +} VTBuf_Entry_DefProcessGroupAttributes; + /* - VTBUF_ENTRY_TYPE__DefMarker - */ typedef struct @@ -572,13 +585,17 @@ EXTERN void VTGen_write_DEF_COUNTER_GROUP(VTGen* gen, uint32_t gid, const char* gname); EXTERN void VTGen_write_DEF_COUNTER(VTGen* gen, uint32_t cid, - const char* cname, uint32_t cprop, - uint32_t gid, const char* cunit); + const char* cname, const char* cunit, + uint32_t cprop, uint32_t gid, + uint32_t pgid); -EXTERN void VTGen_write_DEF_PROCESS_GROUP(VTGen* gen, uint32_t cid, +EXTERN void VTGen_write_DEF_PROCESS_GROUP(VTGen* gen, uint32_t gid, const char* grpn, uint32_t grpc, uint32_t grpv[]); +EXTERN void VTGen_write_DEF_PROCESS_GROUP_ATTRIBUTES(VTGen* gen, uint32_t gid, + uint32_t gattr); + EXTERN void VTGen_write_DEF_KEYVAL(VTGen* gen, uint32_t kid, uint8_t vtype, const char* kname); diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_altix.c b/ompi/contrib/vt/vt/vtlib/vt_pform_altix.c index 1bb44f44a7..20a6e17c14 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_altix.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_altix.c @@ -157,11 +157,11 @@ uint64_t vt_pform_clockres() { #if TIMER == TIMER_MMTIMER return mmdev_ticks_per_sec; #elif TIMER == TIMER_CLOCK_GETTIME - return 1e9; + return 1000000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -172,7 +172,7 @@ uint64_t vt_pform_wtime() { #elif TIMER == TIMER_CLOCK_GETTIME struct timespec tp; clock_gettime(CLOCK_REALTIME, &tp); - return ((tp.tv_sec - vt_time_base) * 1e9) + tp.tv_nsec; + return ((tp.tv_sec - vt_time_base) * 1000000000LL) + tp.tv_nsec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_bgl.c b/ompi/contrib/vt/vt/vtlib/vt_pform_bgl.c index 18aa545a35..a5f0080dfa 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_bgl.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_bgl.c @@ -83,7 +83,7 @@ uint64_t vt_pform_clockres() { #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_bgp.c b/ompi/contrib/vt/vt/vtlib/vt_pform_bgp.c index 1374a18f6b..c47425c507 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_bgp.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_bgp.c @@ -51,7 +51,7 @@ static _BGP_Personality_t mybgp; void vt_pform_init() { Kernel_GetPersonality(&mybgp, sizeof(_BGP_Personality_t)); #if TIMER == TIMER_BGP_GET_TIMEBASE - vt_ticks_per_sec = (uint64_t)BGP_Personality_clockMHz(&mybgp) * 1e6; + vt_ticks_per_sec = (uint64_t)BGP_Personality_clockMHz(&mybgp) * 1000000LL; #elif TIMER == TIMER_PAPI_REAL_USEC vt_time_base = vt_metric_real_usec(); #endif @@ -84,7 +84,7 @@ uint64_t vt_pform_clockres() { #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_crayt3e.c b/ompi/contrib/vt/vt/vtlib/vt_pform_crayt3e.c index 4030536a2d..24cc8dbacc 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_crayt3e.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_crayt3e.c @@ -78,7 +78,7 @@ uint64_t vt_pform_clockres() { #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_crayx1.c b/ompi/contrib/vt/vt/vtlib/vt_pform_crayx1.c index 3e1b61f65b..820c03fff7 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_crayx1.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_crayx1.c @@ -109,11 +109,11 @@ uint64_t vt_pform_clockres() { #if TIMER == TIMER_RTC return vt_ticks_per_sec; #elif TIMER == TIMER_GETTIMEOFDAY - return 1e6; + return 1000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -124,7 +124,7 @@ uint64_t vt_pform_wtime() { #elif TIMER == TIMER_GETTIMEOFDAY struct timeval tp; gettimeofday(&tp, 0); - return ((tp.tv_sec - vt_time_base) * 1e6) + tp.tv_usec; + return ((tp.tv_sec - vt_time_base) * 1000000LL) + tp.tv_usec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_crayxe.c b/ompi/contrib/vt/vt/vtlib/vt_pform_crayxe.c index 8d7f9901d9..e5458bdcb7 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_crayxe.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_crayxe.c @@ -81,7 +81,7 @@ void vt_pform_init() { strtok(line, ":"); vt_ticks_per_sec = - strtol((char*) strtok(NULL, " \n"), (char**) NULL, 0) * 1e6; + strtol((char*) strtok(NULL, " \n"), (char**) NULL, 0) * 1000000LL; } else if (!strncmp("timebase", line, 8)) { @@ -166,13 +166,13 @@ uint64_t vt_pform_clockres() { #if TIMER == TIMER_CYCLE_COUNTER return vt_ticks_per_sec; #elif TIMER == TIMER_CLOCK_GETTIME - return 1e9; + return 1000000000LL; #elif TIMER == TIMER_GETTIMEOFDAY - return 1e6; + return 1000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -188,11 +188,11 @@ uint64_t vt_pform_wtime() { #elif TIMER == TIMER_CLOCK_GETTIME struct timespec tp; clock_gettime(CLOCK_REALTIME, &tp); - return ((tp.tv_sec - vt_time_base) * 1e9) + tp.tv_nsec; + return ((tp.tv_sec - vt_time_base) * 1000000000LL) + tp.tv_nsec; #elif TIMER == TIMER_GETTIMEOFDAY struct timeval tp; gettimeofday(&tp, 0); - return ((tp.tv_sec - vt_time_base) * 1e6) + tp.tv_usec; + return ((tp.tv_sec - vt_time_base) * 1000000LL) + tp.tv_usec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_crayxt.c b/ompi/contrib/vt/vt/vtlib/vt_pform_crayxt.c index 4d49a0d9c0..f40c1962f1 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_crayxt.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_crayxt.c @@ -88,7 +88,7 @@ void vt_pform_init() { strtok(line, ":"); vt_ticks_per_sec = - strtol((char*) strtok(NULL, " \n"), (char**) NULL, 0) * 1e6; + strtol((char*) strtok(NULL, " \n"), (char**) NULL, 0) * 1000000LL; } else if (!strncmp("timebase", line, 8)) { @@ -177,17 +177,17 @@ char* vt_pform_exec() /* clock resolution */ uint64_t vt_pform_clockres() { #if TIMER == TIMER_DCLOCK - return 1e15; + return 1000000000000000LL; #elif TIMER == TIMER_CYCLE_COUNTER return vt_ticks_per_sec; #elif TIMER == TIMER_CLOCK_GETTIME - return 1e9; + return 1000000000LL; #elif TIMER == TIMER_GETTIMEOFDAY - return 1e6; + return 1000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -205,11 +205,11 @@ uint64_t vt_pform_wtime() { #elif TIMER == TIMER_CLOCK_GETTIME struct timespec tp; clock_gettime(CLOCK_REALTIME, &tp); - return ((tp.tv_sec - vt_time_base) * 1e9) + tp.tv_nsec; + return ((tp.tv_sec - vt_time_base) * 1000000000LL) + tp.tv_nsec; #elif TIMER == TIMER_GETTIMEOFDAY struct timeval tp; gettimeofday(&tp, 0); - return ((tp.tv_sec - vt_time_base) * 1e6) + tp.tv_usec; + return ((tp.tv_sec - vt_time_base) * 1000000LL) + tp.tv_usec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_generic.c b/ompi/contrib/vt/vt/vtlib/vt_pform_generic.c index a0b74d04f3..402178b684 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_generic.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_generic.c @@ -90,11 +90,11 @@ char* vt_pform_exec() { /* clock resolution */ uint64_t vt_pform_clockres() { #if TIMER == TIMER_GETTIMEOFDAY - return 1e6; + return 1000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -103,7 +103,7 @@ uint64_t vt_pform_wtime() { #if TIMER == TIMER_GETTIMEOFDAY struct timeval tp; gettimeofday(&tp, 0); - return ((tp.tv_sec - vt_time_base) * 1e6) + tp.tv_usec; + return ((tp.tv_sec - vt_time_base) * 1000000LL) + tp.tv_usec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_ibm.c b/ompi/contrib/vt/vt/vtlib/vt_pform_ibm.c index 83ae9278d5..8fcd1143b3 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_ibm.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_ibm.c @@ -119,11 +119,11 @@ uint64_t vt_pform_clockres() { } return 1; #elif TIMER == TIMER_POWER_REALTIME - return 1e9; + return 1000000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -140,7 +140,7 @@ uint64_t vt_pform_wtime() { timebasestruct_t t; read_real_time(&t, TIMEBASE_SZ); time_base_to_time(&t, TIMEBASE_SZ); - return ((t.tb_high - vt_time_base) * 1e9) + t.tb_low; + return ((t.tb_high - vt_time_base) * 1000000000LL) + t.tb_low; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_linux.c b/ompi/contrib/vt/vt/vtlib/vt_pform_linux.c index dee5d7f62f..3a043de525 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_linux.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_linux.c @@ -92,8 +92,9 @@ uint64_t cylce_counter_frequency(long usleep_time) end_time_cylce_counter = ( end1_cylce_counter+ end2_cylce_counter)/2; /* freq is 1e6 * cylce_counter_time_diff/gettimeofday_time_diff */ - return (uint64_t) ((double)1000000.0*(double)(end_time_cylce_counter-start_time_cylce_counter)/ - (double)(end_time-start_time)); + return (uint64_t) + (1e6*(double)(end_time_cylce_counter-start_time_cylce_counter)/ + (double)(end_time-start_time)); } #endif /* TIMER == TIMER_CYCLE_COUNTER */ @@ -149,7 +150,7 @@ void vt_pform_init() strtok(line, ":"); vt_ticks_per_sec = - strtol((char*) strtok(NULL, " \n"), (char**) NULL, 0) * 1e6; + strtol((char*) strtok(NULL, " \n"), (char**) NULL, 0) * 1000000LL; } else if (!strncmp("timebase", line, 8)) { @@ -263,13 +264,13 @@ uint64_t vt_pform_clockres() #if TIMER == TIMER_CYCLE_COUNTER return vt_ticks_per_sec; #elif TIMER == TIMER_CLOCK_GETTIME - return 1e9; + return 1000000000LL; #elif TIMER == TIMER_GETTIMEOFDAY - return 1e6; + return 1000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -320,11 +321,11 @@ uint64_t vt_pform_wtime() #elif TIMER == TIMER_CLOCK_GETTIME struct timespec tp; clock_gettime(CLOCK_REALTIME, &tp); - return ((tp.tv_sec - vt_time_base) * 1e9) + tp.tv_nsec; + return ((tp.tv_sec - vt_time_base) * 1000000000LL) + tp.tv_nsec; #elif TIMER == TIMER_GETTIMEOFDAY struct timeval tp; gettimeofday(&tp, 0); - return ((tp.tv_sec - vt_time_base) * 1e6) + tp.tv_usec; + return ((tp.tv_sec - vt_time_base) * 1000000LL) + tp.tv_usec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_macos.c b/ompi/contrib/vt/vt/vtlib/vt_pform_macos.c index 74a6621db2..47ebe93573 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_macos.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_macos.c @@ -165,13 +165,13 @@ char* vt_pform_exec() uint64_t vt_pform_clockres() { #if TIMER == TIMER_GETTIMEOFDAY - return 1e6; + return 1000000LL; #elif TIMER == TIMER_CYCLE_COUNTER return vt_ticks_per_sec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -181,7 +181,7 @@ uint64_t vt_pform_wtime() #if TIMER == TIMER_GETTIMEOFDAY struct timeval tp; gettimeofday(&tp, 0); - return ((tp.tv_sec - vt_time_base) * 1e6) + tp.tv_usec; + return ((tp.tv_sec - vt_time_base) * 1000000LL) + tp.tv_usec; #elif TIMER == TIMER_CYCLE_COUNTER uint64_t clock_value; diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_necsx.c b/ompi/contrib/vt/vt/vtlib/vt_pform_necsx.c index 878a70de26..30a7329e36 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_necsx.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_necsx.c @@ -71,7 +71,7 @@ char* vt_pform_exec() { /* clock resolution */ uint64_t vt_pform_clockres() { - return 1e6; + return 1000000LL; } /* local or global wall-clock time */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_origin.c b/ompi/contrib/vt/vt/vtlib/vt_pform_origin.c index 01aa9cdfd8..399520c80d 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_origin.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_origin.c @@ -90,11 +90,11 @@ char* vt_pform_exec() { /* clock resolution */ uint64_t vt_pform_clockres() { #if TIMER == TIMER_CLOCK_GETTIME - return 1e9; + return 1000000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -103,7 +103,7 @@ uint64_t vt_pform_wtime() { #if TIMER == TIMER_CLOCK_GETTIME struct timespec tp; clock_gettime(CLOCK_SGI_CYCLE,&tp); - return ((tp.tv_sec - vt_time_base) * 1e9) + tp.tv_nsec; + return ((tp.tv_sec - vt_time_base) * 1000000000LL) + tp.tv_nsec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_sicortex.c b/ompi/contrib/vt/vt/vtlib/vt_pform_sicortex.c index 6c138abd48..34d0d7aeda 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_sicortex.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_sicortex.c @@ -131,11 +131,11 @@ char* vt_pform_exec() uint64_t vt_pform_clockres() { #if TIMER == TIMER_GETTIMEOFDAY - return 1e6; + return 1000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } @@ -145,7 +145,7 @@ uint64_t vt_pform_wtime() #if TIMER == TIMER_GETTIMEOFDAY struct timeval tp; gettimeofday(&tp, 0); - return ((tp.tv_sec - vt_time_base) * 1e6) + tp.tv_usec; + return ((tp.tv_sec - vt_time_base) * 1000000LL) + tp.tv_usec; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_real_cyc(); #elif TIMER == TIMER_PAPI_REAL_USEC diff --git a/ompi/contrib/vt/vt/vtlib/vt_pform_sun.c b/ompi/contrib/vt/vt/vtlib/vt_pform_sun.c index 5d1a260d28..95c7fff8fd 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pform_sun.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pform_sun.c @@ -113,11 +113,11 @@ char* vt_pform_exec() { /* clock resolution */ uint64_t vt_pform_clockres() { #if TIMER == TIMER_GETHRTIME - return 1e9; + return 1000000000LL; #elif TIMER == TIMER_PAPI_REAL_CYC return vt_metric_clckrt(); #elif TIMER == TIMER_PAPI_REAL_USEC - return 1e6; + return 1000000LL; #endif } diff --git a/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr.c b/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr.c index 45b0360d4c..bf6a467d56 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr.c +++ b/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr.c @@ -57,7 +57,7 @@ static uint32_t max_values_callback; static uint32_t all_group = INVALID_GROUP_NUMBER; static uint32_t host_group = INVALID_GROUP_NUMBER; -static uint32_t process_group = INVALID_GROUP_NUMBER; +static uint32_t thread_group = INVALID_GROUP_NUMBER; /* whether plugins are used or not*/ uint8_t vt_plugin_cntr_used = 0; @@ -199,7 +199,7 @@ void vt_plugin_cntr_init() { } /*go through all plugins:*/ for (i = 0; i < nr_selected_plugins; i++) { - uint32_t tid = VT_CURRENT_THREAD; + uint32_t group = 0; current_plugin = plugins[i]; vt_cntl_msg(2, "Loading plugin counter library: lib%s.so", current_plugin); /* next one is stored in next_plugin, @@ -234,35 +234,41 @@ void vt_plugin_cntr_init() { /* check the run per type */ if (info.run_per == VT_PLUGIN_CNTR_PER_PROCESS) { - if (process_group == 0xFFFFFFFF) + if (thread_group == INVALID_GROUP_NUMBER){ vt_cntl_msg(3, "No process group defined, using master thread for %s", current_plugin); - else - tid = process_group; + } + else{ +# if (defined(VT_MT) || defined(VT_HYB)) + /* only called per process */ + group = vt_get_curid(); + thread_group = group; +# else + /* not multithreaded -> keep information on local process */ +#endif + } } if (info.run_per == VT_PLUGIN_CNTR_PER_HOST) { if (!vt_my_trace_is_master) continue; - else if (host_group == 0xFFFFFFFF) - vt_cntl_msg(3, - "No host group defined, using first thread on each host for %s", - current_plugin); - else - tid = host_group; + else if (host_group == INVALID_GROUP_NUMBER){ + host_group = vt_node_pgid; + vt_def_procgrp_attributes(VT_MY_THREAD ,vt_node_pgid, + VT_PROCGRP_HASCOUNTERS); + } + group = host_group; } if (info.run_per == VT_PLUGIN_CNTR_ONCE) { - if (vt_my_trace == 0) - if (all_group == 0xFFFFFFFF) - vt_cntl_msg( - 3, - "No all group defined, using first thread of first process for %s", - current_plugin); - else - tid = all_group; - else + if (vt_my_trace != 0) continue; + else if (all_group == INVALID_GROUP_NUMBER){ + all_group = vt_all_pgid; + vt_def_procgrp_attributes(VT_MY_THREAD ,vt_all_pgid, + VT_PROCGRP_HASCOUNTERS); + } + group = all_group; } if (info.add_counter == NULL) { @@ -404,7 +410,7 @@ void vt_plugin_cntr_init() { continue; } /* define a counter group for every plugin*/ - current->counter_group = vt_def_counter_group(tid, current_plugin); + current->counter_group = vt_def_counter_group(VT_MY_THREAD, current_plugin); /* now search for all available events on that plugin */ next_plugin_metric = env_vt_plugin_metrics; @@ -485,14 +491,15 @@ void vt_plugin_cntr_init() { otf_prop = current_event_info->cntr_property; /* define new counter */ current->vt_counter_ids[current->num_selected_events - 1] - = vt_def_counter(tid, + = vt_def_counter(VT_MY_THREAD, current->selected_events[current->num_selected_events - 1], - otf_prop, current->counter_group, unit); + unit, otf_prop, current->counter_group, group); + if (current->info.synch != VT_PLUGIN_CNTR_SYNCH) { char buffer[512]; sprintf(buffer, "%s_%s", current_plugin, current_event_info->name); current->vt_asynch_keys[current->num_selected_events - 1] - = vt_def_async_source(tid, buffer); + = vt_def_async_source(VT_MY_THREAD, buffer); } /* enable plugin counters */ vt_plugin_cntr_used = 1; @@ -525,8 +532,9 @@ void vt_plugin_cntr_thread_init(VTThrd * thrd, uint32_t tid) { /* then enable the counter if this thread has to */ if (vt_plugin_handles[i][j].info.run_per == VT_PLUGIN_CNTR_ONCE) { - if ((vt_my_trace != 0) || (thrd != VTThrdv[0])) + if ((vt_my_trace != 0) || (thrd != VTThrdv[0])){ continue; + } } if (vt_plugin_handles[i][j].info.run_per == VT_PLUGIN_CNTR_PER_HOST) if ((!vt_my_trace_is_master) || (thrd != VTThrdv[0])) @@ -619,12 +627,40 @@ void vt_plugin_cntr_thread_disable_counters(VTThrd * thrd) { * This should be called after the last thread exited. * It should free all ressources used by vt_plugin */ -void vt_plugin_cntr_finalize() { +void vt_plugin_cntr_finalize(uint32_t tnum) { uint32_t i, j; int k; vt_cntl_msg(3, "Process %i exits plugins", vt_my_ptrace); + +# if (defined(VT_MT) || defined(VT_HYB)) + if ( thread_group != INVALID_GROUP_NUMBER ) + /* write thread process group definition */ + { + uint32_t* grpv; + char tmp_char[128]; + + /* get member array */ + + grpv = (uint32_t*)malloc(tnum * sizeof(uint32_t)); + if ( grpv == NULL ) + vt_error(); + + for (i = 0; i < tnum; i++) + grpv[i] = VT_PROCESS_ID(vt_my_trace, i); + + /* prepend thread process group identifier to name */ + snprintf(tmp_char, sizeof(tmp_char) - 1, + "Threads of Process %d",vt_my_trace); + fprintf(stderr,"%u,%s,0,%u,...,%u",VT_MASTER_THREAD,tmp_char,tnum,thread_group); + /* write thread process group definition */ + vt_def_procgrp(VT_MASTER_THREAD, tmp_char, 0, tnum, grpv, thread_group); + + free(grpv); + } +#endif + /* free all ressources */ for (i = 0; i < VT_PLUGIN_CNTR_SYNCH_TYPE_MAX; i++) { @@ -655,6 +691,7 @@ void vt_plugin_cntr_finalize() { free(vt_plugin_handles); if (nr_plugins) free(nr_plugins); + vt_cntl_msg(3, "Process %i exits plugins done", vt_my_ptrace); } @@ -767,11 +804,12 @@ static void add_events(struct vt_plugin current_plugin, VTThrd * thrd) { vt_error_msg("Failed to allocate memory for callback buffer\n"); } } - current[*current_size].tid = VT_MY_THREAD; + + current[*current_size].tid = VT_MY_THREAD;/* switch (current_plugin.info.run_per) { case VT_PLUGIN_CNTR_PER_PROCESS: - if (process_group != INVALID_GROUP_NUMBER) - current[*current_size].tid = process_group; + if (thread_group != INVALID_GROUP_NUMBER) + current[*current_size].tid = thread_group; break; case VT_PLUGIN_CNTR_PER_HOST: if (current_plugin.info.run_per == VT_PLUGIN_CNTR_PER_HOST) @@ -783,7 +821,7 @@ static void add_events(struct vt_plugin current_plugin, VTThrd * thrd) { if (all_group != INVALID_GROUP_NUMBER) current[*current_size].tid = all_group; break; - } + }*/ /* Next counter */ (*current_size)++; } @@ -908,12 +946,12 @@ int32_t callback_function(void * ID, vt_plugin_cntr_timevalue tv) { #define WRITE_ASYNCH_DATA(thrd, counter, timevalue, dummy_time) \ if (VTTHRD_TRACE_STATUS(thrd) == VT_TRACE_ON){ \ if (timevalue.timestamp > 0){ \ - vt_guarantee_buffer(counter.tid, sizeof(VTBuf_Entry_KeyValue) \ + vt_guarantee_buffer(VT_MY_THREAD, sizeof(VTBuf_Entry_KeyValue) \ +sizeof(VTBuf_Entry_Counter)); \ - vt_next_async_time(counter.tid, \ + vt_next_async_time(VT_MY_THREAD, \ counter.vt_asynch_key, \ timevalue.timestamp); \ - vt_count( counter.tid, \ + vt_count( VT_MY_THREAD, \ &dummy_time, \ counter.vt_counter_id, \ timevalue.value); \ @@ -1056,5 +1094,5 @@ void vt_plugin_cntr_set_host_group(uint32_t group_id) { host_group = group_id; } void vt_plugin_cntr_set_process_group(uint32_t group_id) { - process_group = group_id; + thread_group = group_id; } diff --git a/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr_int.h b/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr_int.h index 048dd08c53..25b1290e24 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr_int.h +++ b/ompi/contrib/vt/vt/vtlib/vt_plugin_cntr_int.h @@ -80,8 +80,9 @@ void vt_plugin_cntr_thread_exit(VTThrd * thrd); /** * This should free all general ressources + * nr_threads is the number of threads the process generated */ -void vt_plugin_cntr_finalize(void); +void vt_plugin_cntr_finalize(uint32_t nr_threads); /** * This should be used to check whether the current thread is * a monitor thread of a callback function. diff --git a/ompi/contrib/vt/vt/vtlib/vt_pomp.c b/ompi/contrib/vt/vt/vtlib/vt_pomp.c index b06336097a..8f6b5884be 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_pomp.c +++ b/ompi/contrib/vt/vt/vtlib/vt_pomp.c @@ -397,6 +397,8 @@ void POMP_Workshare_exit(struct ompregdescr* r) { */ void POMP_Init_lock(omp_lock_t *s) { + if ( !pomp_initialized ) POMP_Init(); + if ( IS_POMP_TRACE_ON ) { uint64_t time; time = vt_pform_wtime(); @@ -464,6 +466,8 @@ int POMP_Test_lock(omp_lock_t *s) { } void POMP_Init_nest_lock(omp_nest_lock_t *s) { + if ( !pomp_initialized ) POMP_Init(); + if ( IS_POMP_TRACE_ON ) { uint64_t time; time = vt_pform_wtime(); @@ -537,6 +541,8 @@ int POMP_Test_nest_lock(omp_nest_lock_t *s) { */ VT_DECLDEF(void POMP_Init_lock_f(omp_lock_t *s)) { + if ( !pomp_initialized ) POMP_Init(); + if ( IS_POMP_TRACE_ON ) { uint64_t time; time = vt_pform_wtime(); @@ -620,6 +626,8 @@ VT_DECLDEF(int POMP_Test_lock_f(omp_lock_t *s)) { #ifndef __osf__ VT_DECLDEF(void POMP_Init_nest_lock_f(omp_nest_lock_t *s)) { + if ( !pomp_initialized ) POMP_Init(); + if ( IS_POMP_TRACE_ON ) { uint64_t time; time = vt_pform_wtime(); diff --git a/ompi/contrib/vt/vt/vtlib/vt_rusage.c b/ompi/contrib/vt/vt/vtlib/vt_rusage.c index 1fd60ceb30..2d7b0bba01 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_rusage.c +++ b/ompi/contrib/vt/vt/vtlib/vt_rusage.c @@ -10,6 +10,8 @@ * See the file COPYING in the package base directory for details **/ +#define _GNU_SOURCE + #include "config.h" #include @@ -34,6 +36,14 @@ /* maximum number of resource usage counters */ #define RU_CNTR_MAXNUM 16 +/* if possible, get resource usage measures for the calling thread + (RUSAGE_THREAD) instead of the process (RUSAGE_SELF) */ +#if defined(HAVE_DECL_RUSAGE_THREAD) && HAVE_DECL_RUSAGE_THREAD +# define RU_WHO RUSAGE_THREAD +#else /* HAVE_DECL_RUSAGE_THREAD */ +# define RU_WHO RUSAGE_SELF +#endif /* HAVE_DECL_RUSAGE_THREAD */ + /* resource usage counter indices */ typedef enum { RU_UTIME, RU_STIME, RU_MAXRSS, RU_IXRSS, @@ -201,9 +211,10 @@ void vt_rusage_init() vt_rusage_cidv[i] = vt_def_counter(VT_CURRENT_THREAD, ru_active_cntrv[i]->name, + ru_active_cntrv[i]->unit, ru_active_cntrv[i]->prop, gid, - ru_active_cntrv[i]->unit); + 0); } } @@ -213,7 +224,7 @@ void vt_rusage_read(struct vt_rusage* rusage, uint64_t* values, uint32_t* change uint64_t new_value = 0; /* get resource usage */ - if ( getrusage(RUSAGE_SELF, &(rusage->ru)) == -1 ) + if ( getrusage(RU_WHO, &(rusage->ru)) == -1 ) vt_error_msg("getrusage: %s", strerror(errno)); #ifdef RU_WRITE_ONLY_CHANGED_VALS @@ -227,13 +238,13 @@ void vt_rusage_read(struct vt_rusage* rusage, uint64_t* values, uint32_t* change { case RU_UTIME: { - new_value = ((uint64_t)rusage->ru.ru_utime.tv_sec * 1e6 + + new_value = ((uint64_t)rusage->ru.ru_utime.tv_sec * 1000000LL + (uint64_t)rusage->ru.ru_utime.tv_usec); break; } case RU_STIME: { - new_value = ((uint64_t)rusage->ru.ru_stime.tv_sec * 1e6 + + new_value = ((uint64_t)rusage->ru.ru_stime.tv_sec * 1000000LL + (uint64_t)rusage->ru.ru_stime.tv_usec); break; } diff --git a/ompi/contrib/vt/vt/vtlib/vt_thrd.c b/ompi/contrib/vt/vt/vtlib/vt_thrd.c index 9341be1f92..dc695fa826 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_thrd.c +++ b/ompi/contrib/vt/vt/vtlib/vt_thrd.c @@ -36,12 +36,12 @@ VTThrd** VTThrdv = NULL; /* number of thread objects */ -uint32_t VTThrdn = 1; +uint32_t VTThrdn = 0; /* maximum number of threads */ uint32_t VTThrdMaxNum = 0; -/* mutexes for locking */ +/* predefined mutexes for locking */ #if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) VTThrdMutex* VTThrdMutexEnv = NULL; VTThrdMutex* VTThrdMutexIds = NULL; @@ -78,7 +78,7 @@ void VTThrd_init() (for Java this will be done in VTThrd_initJava(), 'cause it gets the read thread name) */ #if !defined(VT_JAVA) - VTThrd_create(0, 0, NULL, 0); + VTThrd_create(NULL, 0, 0); VTThrd_open(0); #endif /* VT_JAVA */ } @@ -95,32 +95,13 @@ void VTThrd_finalize() free(VTThrdv); } -uint32_t VTThrd_createNewThreadId() +uint32_t VTThrd_create(const char* tname, uint32_t ptid, uint8_t is_virtual) { + VTThrd* thrd; + uint32_t tid; + uint32_t child_no = 0; -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) - VTTHRD_LOCK_ENV(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ - if ( VTThrdn > VTThrdMaxNum ) - { -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) - VTTHRD_UNLOCK_ENV(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ - vt_error_msg("Cannot create more than %d threads", VTThrdMaxNum); - } - tid = VTThrdn; - VTThrdn++; -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) - VTTHRD_UNLOCK_ENV(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ - - return tid; -} - -void VTThrd_create(uint32_t tid, uint32_t ptid, const char* tname, uint8_t is_virtual) -{ - VTThrd *thread; #if defined(VT_METR) uint32_t num_metrics = (uint32_t)vt_metric_num(); #endif /* VT_METR */ @@ -128,8 +109,31 @@ void VTThrd_create(uint32_t tid, uint32_t ptid, const char* tname, uint8_t is_vi uint32_t num_rusage = (uint32_t)vt_rusage_num(); #endif /* VT_RUSAGE */ - thread = (VTThrd*)calloc(1, sizeof(VTThrd)); - if ( thread == NULL ) +#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) + VTTHRD_LOCK_ENV(); +#endif /* VT_MT || VT_HYB || VT_JAVA */ + + /* create new thread ID */ + + tid = VTThrdn++; + if ( VTThrdn > VTThrdMaxNum ) + { +#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) + VTTHRD_UNLOCK_ENV(); +#endif /* VT_MT || VT_HYB || VT_JAVA */ + vt_error_msg("Cannot create more than %d threads", VTThrdMaxNum); + } + + /* get child number (thread name suffix) */ + if ( tid != 0 ) + child_no = ++(VTThrdv[ptid]->child_num); + +#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) + VTTHRD_UNLOCK_ENV(); +#endif /* VT_MT || VT_HYB || VT_JAVA */ + + thrd = (VTThrd*)calloc(1, sizeof(VTThrd)); + if ( thrd == NULL ) vt_error(); /* set thread name, if available */ @@ -140,47 +144,39 @@ void VTThrd_create(uint32_t tid, uint32_t ptid, const char* tname, uint8_t is_vi } /* set thread name */ - strncpy(thread->name, tname, sizeof(thread->name)); - thread->name[sizeof(thread->name)-1] = '\0'; + strncpy(thrd->name, tname, sizeof(thrd->name)); + thrd->name[sizeof(thrd->name)-1] = '\0'; /* set thread name suffix */ if ( tid != 0 ) { -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) - VTTHRD_LOCK_ENV(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ - - snprintf(thread->name_suffix, sizeof(thread->name_suffix)-1, "%s:%d", - VTThrdv[ptid]->name_suffix, ++(VTThrdv[ptid]->child_num)); - -#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) - VTTHRD_UNLOCK_ENV(); -#endif /* VT_MT || VT_HYB || VT_JAVA */ + snprintf(thrd->name_suffix, sizeof(thrd->name_suffix)-1, "%s:%d", + VTThrdv[ptid]->name_suffix, child_no); } /* set parent ID of thread */ - thread->parent_tid = ptid; + thrd->parent_tid = ptid; /* set the virtual thread flag */ - thread->is_virtual_thread = is_virtual; + thrd->is_virtual = is_virtual; #if defined(VT_GETCPU) - thread->cpuid_val = (uint32_t)-1; + thrd->cpuid_val = (uint32_t)-1; #endif /* VT_GETCPU */ #if defined(VT_RUSAGE) if ( num_rusage > 0 ) { /* create rusage object */ - thread->ru_obj = vt_rusage_create(); + thrd->ru_obj = vt_rusage_create(); /* initialize per-thread arrays for rusage counter values */ - thread->ru_valv = (uint64_t*)calloc(num_rusage, sizeof(uint64_t)); - if ( thread->ru_valv == NULL ) + thrd->ru_valv = (uint64_t*)calloc(num_rusage, sizeof(uint64_t)); + if ( thrd->ru_valv == NULL ) vt_error(); /* initialize next timestamp for reading rusage counters */ - thread->ru_next_read = 0; + thrd->ru_next_read = 0; } #endif /* VT_RUSAGE */ @@ -188,44 +184,45 @@ void VTThrd_create(uint32_t tid, uint32_t ptid, const char* tname, uint8_t is_vi if ( num_metrics > 0 && is_virtual == 0) { /* create event set */ - thread->metv = vt_metric_create(); + thrd->metv = vt_metric_create(); # if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) /* initialize per-thread arrays for counter offsets */ - thread->offv = (uint64_t*)calloc(num_metrics, sizeof(uint64_t)); - if ( thread->offv == NULL ) + thrd->offv = (uint64_t*)calloc(num_metrics, sizeof(uint64_t)); + if ( thrd->offv == NULL ) vt_error(); #endif /* VT_MT || VT_HYB || VT_JAVA */ /* initialize per-thread arrays for counter values */ - thread->valv = (uint64_t*)calloc(num_metrics, sizeof(uint64_t)); - if ( thread->valv == NULL ) + thrd->valv = (uint64_t*)calloc(num_metrics, sizeof(uint64_t)); + if ( thrd->valv == NULL ) vt_error(); } #endif /* VT_METR */ #if !defined(VT_DISABLE_RFG) /* initialize region filter and grouping management */ - thread->rfg_regions = RFG_Regions_init(); + thrd->rfg_regions = RFG_Regions_init(); - if( thread->rfg_regions == NULL ) + if( thrd->rfg_regions == NULL ) vt_error_msg("Could not initialize region filter and grouping management"); #endif /* VT_DISABLE_RFG */ /* enable tracing */ - thread->trace_status = VT_TRACE_ON; + thrd->trace_status = VT_TRACE_ON; - VTThrdv[tid] = thread; + VTThrdv[tid] = thrd; #if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) VTTHRD_LOCK_ENV(); +#endif /* VT_MT || VT_HYB || VT_JAVA */ vt_cntl_msg(2, "Thread object #%u created, total number is %u", tid, VTThrdn); +#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) VTTHRD_UNLOCK_ENV(); -#else /* VT_MT || VT_HYB || VT_JAVA */ - vt_cntl_msg(2, "Thread object #%u created, total number is %u", - tid, VTThrdn); #endif /* VT_MT || VT_HYB || VT_JAVA */ + + return tid; } void VTThrd_open(uint32_t tid) @@ -254,10 +251,8 @@ void VTThrd_open(uint32_t tid) thrd->parent_tid, tid, bsize); } -#if (defined(VT_PLUGIN_CNTR) || defined(VT_CUDARTWRAP)) - if ( tid != 0 && VTThrdv[tid]->is_virtual_thread ) + if ( tid != 0 && thrd->is_virtual ) return; -#endif /* VT_PLUGIN_CNTR || VT_CUDARTWRAP */ #if (defined (VT_MPI) || defined (VT_HYB)) /* initialize first matching ID for MPI collective ops. */ @@ -348,7 +343,7 @@ void VTThrd_delete(VTThrd* thrd, uint32_t tid) #endif /* VT_RUSAGE */ #if defined(VT_METR) - if ( vt_metric_num() > 0 && thrd->is_virtual_thread == 0 ) + if ( vt_metric_num() > 0 && thrd->is_virtual == 0 ) { if ( thrd->metv ) { @@ -406,7 +401,7 @@ void VTThrd_destroy(VTThrd* thrd, uint32_t tid) #endif /* VT_RUSAGE */ #if defined(VT_METR) - if ( vt_metric_num() > 0 && thrd->is_virtual_thread == 0 ) + if ( vt_metric_num() > 0 && thrd->is_virtual == 0 ) { if ( thrd->metv ) { diff --git a/ompi/contrib/vt/vt/vtlib/vt_thrd.h b/ompi/contrib/vt/vt/vtlib/vt_thrd.h index f8980b6d83..9036ab68af 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_thrd.h +++ b/ompi/contrib/vt/vt/vtlib/vt_thrd.h @@ -38,11 +38,11 @@ #include "rfg.h" #if (defined(VT_MT) || defined(VT_HYB)) -# define VT_MY_THREAD_IS_ALIVE VTThrd_is_alive() +# define VT_MY_THREAD_IS_ALIVE VTThrd_isAlive() # define VT_MY_THREAD VTThrd_getThreadId() # define VT_CHECK_THREAD VTThrd_registerThread(0) #elif defined(VT_JAVA) -# define VT_MY_THREAD_IS_ALIVE VTThrd_is_alive() +# define VT_MY_THREAD_IS_ALIVE VTThrd_isAlive() # define VT_MY_THREAD VTThrd_getThreadId() # define VT_CHECK_THREAD #else @@ -64,43 +64,44 @@ EXTERN uint32_t VTThrdMaxNum; */ typedef struct { - VTGen* gen; /**< trace file and buffer */ + VTGen* gen; /**< trace file and buffer */ - char name[512]; /**< thread name */ - char name_suffix[128]; /**< suffix of thread name */ + char name[512]; /**< thread name */ + char name_suffix[128]; /**< suffix of thread name */ - int stack_level; /**< current call stack level */ - int stack_level_at_off; /**< call stack level at trace off */ - int stack_level_at_rewind_mark; /**< call stack level at rewind mark */ + int stack_level; /**< current call stack level */ + int stack_level_at_off; /**< call stack level at trace off */ + int stack_level_at_rewind_mark; /**< call stack level at rewind mark */ - int8_t trace_status; /**< trace status: - VT_TRACE_ON, - VT_TRACE_OFF, or - VT_TRACE_OFF_PERMANENT */ + int8_t trace_status; /**< trace status: + VT_TRACE_ON, + VT_TRACE_OFF, or + VT_TRACE_OFF_PERMANENT */ - uint32_t parent_tid; /**< parent thread id */ - uint32_t child_num; /**< number of child threads */ + uint32_t parent_tid; /**< parent thread id */ + uint32_t child_num; /**< number of child threads */ - - uint8_t is_virtual_thread; /**< flag: virtual thread? */ + uint8_t is_virtual; /**< flag: is virtual thread? (e.g. GPU) */ #if !defined(VT_DISABLE_RFG) - RFG_Regions* rfg_regions; + RFG_Regions* rfg_regions; /**< RFG regions object */ #endif /* VT_DISABLE_RFG */ #if (defined (VT_MPI) || defined (VT_HYB)) - uint64_t mpicoll_next_matchingid; + uint64_t mpicoll_next_matchingid; /**< matching id counter for MPI collective + operations. */ #endif /* VT_MPI || VT_HYB */ #if defined(VT_IOWRAP) - uint8_t io_tracing_state; /**< save value of enabled flag during suspend */ - uint8_t io_tracing_suspend_cnt; /**< save how often suspend was called */ - uint8_t io_tracing_enabled; /**< actual mode of I/O tracing operation */ + uint8_t io_tracing_state; /**< save value of enabled flag during + suspend */ + uint8_t io_tracing_suspend_cnt; /**< save how often suspend was called */ + uint8_t io_tracing_enabled; /**< actual mode of I/O tracing operation */ #endif /* VT_IOWRAP */ @@ -113,30 +114,32 @@ typedef struct #if defined(VT_GETCPU) - uint32_t cpuid_val; /**< cpu id counter value */ + uint32_t cpuid_val; /**< cpu id counter value */ #endif /* VT_GETCPU */ #if defined(VT_RUSAGE) - uint64_t ru_next_read; /**< next timestamp for reading rusage counters */ - uint64_t* ru_valv; /**< vector of rusage values */ - struct vt_rusage* ru_obj; /**< rusage object */ + uint64_t ru_next_read; /**< next timestamp for reading rusage + counters */ + uint64_t* ru_valv; /**< vector of rusage values */ + struct vt_rusage* ru_obj; /**< rusage object */ #endif /* VT_RUSAGE */ #if defined(VT_METR) - uint64_t* offv; /**< vector of counter offsets */ - uint64_t* valv; /**< vector of counter values */ - struct vt_metv* metv; /**< vector of metric objects (i.e.the event set) */ + uint64_t* offv; /**< vector of counter offsets */ + uint64_t* valv; /**< vector of counter values */ + struct vt_metv* metv; /**< vector of metric objects + (i.e.the event sets) */ #endif /* VT_METR */ #if defined(VT_PLUGIN_CNTR) - void* plugin_cntr_defines; /**< plugin cntr handle */ + void* plugin_cntr_defines; /**< plugin cntr handle */ #endif /* VT_PLUGIN_CNTR || VT_CUDARTWRAP */ @@ -144,108 +147,118 @@ typedef struct /* Accessor macros */ -#define VTTHRD_MY_VTTHRD (VTThrdv[VT_MY_THREAD]) +#define VTTHRD_MY_VTTHRD (VTThrdv[VT_MY_THREAD]) /* flag: is tracing enabled? */ -#define VTTHRD_TRACE_STATUS(thrd) (thrd->trace_status) +#define VTTHRD_TRACE_STATUS(thrd) (thrd->trace_status) /* trace file and buffer */ -#define VTTHRD_GEN(thrd) (thrd->gen) +#define VTTHRD_GEN(thrd) (thrd->gen) /* prefix of thread's name */ -#define VTTHRD_NAME_PREFIX(thrd) (thrd->name_prefix) +#define VTTHRD_NAME_PREFIX(thrd) (thrd->name_prefix) /* suffix of thread's name */ -#define VTTHRD_NAME_SUFFIX(thrd) (thrd->name_suffix) +#define VTTHRD_NAME_SUFFIX(thrd) (thrd->name_suffix) /* external name of thread */ -#define VTTHRD_NAME_EXTERNAL(thrd) (thrd->name_extern) +#define VTTHRD_NAME_EXTERNAL(thrd) (thrd->name_extern) /* parent thread id */ -#define VTTHRD_PARENT_TID(thrd) (thrd->parent_tid); +#define VTTHRD_PARENT_TID(thrd) (thrd->parent_tid); /* number of child threads */ -#define VTTHRD_CHILD_NUM(thrd) (thrd->child_num); +#define VTTHRD_CHILD_NUM(thrd) (thrd->child_num); /* current call stack level */ -#define VTTHRD_STACK_LEVEL(thrd) (thrd->stack_level) +#define VTTHRD_STACK_LEVEL(thrd) (thrd->stack_level) /* call stack level at trace off */ -#define VTTHRD_STACK_LEVEL_AT_OFF(thrd) (thrd->stack_level_at_off) +#define VTTHRD_STACK_LEVEL_AT_OFF(thrd) \ + (thrd->stack_level_at_off) /* call stack level at rewind mark */ #define VTTHRD_STACK_LEVEL_AT_REWIND_MARK(thrd) \ - (thrd->stack_level_at_rewind_mark) + (thrd->stack_level_at_rewind_mark) /* push the call stack */ -#define VTTHRD_STACK_PUSH(thrd) (thrd->stack_level)++ +#define VTTHRD_STACK_PUSH(thrd) (thrd->stack_level)++ /* pop the call stack */ -#define VTTHRD_STACK_POP(thrd) (thrd->stack_level)-- +#define VTTHRD_STACK_POP(thrd) if(--(thrd->stack_level) < 0) \ + vt_error_msg("Stack underflow"); -/* RFG regions */ -#define VTTHRD_RFGREGIONS(thrd) (thrd->rfg_regions) +/* RFG regions object */ +#define VTTHRD_RFGREGIONS(thrd) (thrd->rfg_regions) -/* flag: virtual thread? */ -#define VTTHRD_IS_VIRTUAL_THREAD(thrd) (thrd->is_virtual_thread) +/* flag: is virtual thread? */ +#define VTTHRD_IS_VIRTUAL(thrd) (thrd->is_virtual) #if (defined (VT_MPI) || defined (VT_HYB)) -#define VTTHRD_MPICOLLOP_NEXT_MATCHINGID(thrd) (thrd->mpicoll_next_matchingid++) +/* matching id counter for MPI coll. ops. */ +#define VTTHRD_MPICOLLOP_NEXT_MATCHINGID(thrd) \ + (thrd->mpicoll_next_matchingid++) #endif /* VT_MPI || VT_HYB */ #if (defined (VT_IOWRAP)) /* save enabled/disabled state of I/O tracing when switching off temporarily */ -#define VTTHRD_IO_TRACING_STATE(thrd) (thrd->io_tracing_state) -#define VTTHRD_IO_TRACING_SUSPEND_CNT(thrd) (thrd->io_tracing_suspend_cnt) +#define VTTHRD_IO_TRACING_STATE(thrd) \ + (thrd->io_tracing_state) +#define VTTHRD_IO_TRACING_SUSPEND_CNT(thrd) \ + (thrd->io_tracing_suspend_cnt) /* flag: is I/O tracing enabled? */ -#define VTTHRD_IO_TRACING_ENABLED(thrd) (thrd->io_tracing_enabled) +#define VTTHRD_IO_TRACING_ENABLED(thrd) \ + (thrd->io_tracing_enabled) #endif /* VT_IOWRAP */ #if (defined (VT_IOWRAP) || (defined(HAVE_MPI2_IO) && HAVE_MPI2_IO)) -#define VTTHRD_IO_NEXT_MATCHINGID(thrd) (thrd->io_next_matchingid++) -#define VTTHRD_IO_NEXT_HANDLE(thrd) (thrd->io_next_handle++) +#define VTTHRD_IO_NEXT_MATCHINGID(thrd) \ + (thrd->io_next_matchingid++) +#define VTTHRD_IO_NEXT_HANDLE(thrd) \ + (thrd->io_next_handle++) #endif /* VT_IOWRAP || (HAVE_MPI2_IO && HAVE_MPI2_IO) */ #if (defined (VT_GETCPU)) /* cpu id counter value */ -#define VTTHRD_CPUID_VAL(thrd) (thrd->cpuid_val) +#define VTTHRD_CPUID_VAL(thrd) (thrd->cpuid_val) #endif /* VT_GETCPU */ #if (defined (VT_RUSAGE)) /* next timestamp for reading rusage counters */ -#define VTTHRD_RU_NEXT_READ(thrd) (thrd->ru_next_read) +#define VTTHRD_RU_NEXT_READ(thrd) (thrd->ru_next_read) /* rusage values */ -#define VTTHRD_RU_VALV(thrd) (thrd->ru_valv) +#define VTTHRD_RU_VALV(thrd) (thrd->ru_valv) /* rusage object */ -#define VTTHRD_RU_OBJ(thrd) (thrd->ru_obj) +#define VTTHRD_RU_OBJ(thrd) (thrd->ru_obj) #endif /* VT_RUSAGE */ #if (defined (VT_METR)) /* vector of metric offsets */ -#define VTTHRD_OFFV(thrd) (thrd->offv) +#define VTTHRD_OFFV(thrd) (thrd->offv) /* vector of metric values */ -#define VTTHRD_VALV(thrd) (thrd->valv) +#define VTTHRD_VALV(thrd) (thrd->valv) /* vector of metric objects (i.e., event sets) */ -#define VTTHRD_METV(thrd) (thrd->metv) +#define VTTHRD_METV(thrd) (thrd->metv) #endif /* VT_METR */ #if defined(VT_PLUGIN_CNTR) /* plugin cntr handle */ -#define VTTHRD_PLUGIN_CNTR_DEFINES(thrd) (thrd->plugin_cntr_defines) +#define VTTHRD_PLUGIN_CNTR_DEFINES(thrd) \ + (thrd->plugin_cntr_defines) #endif /* VT_PLUGIN_CNTR */ @@ -253,59 +266,54 @@ typedef struct /** * Initialize thread object management. */ -EXTERN void VTThrd_init( void ); +EXTERN void VTThrd_init( void ); /** * Finalize thread object management. */ -EXTERN void VTThrd_finalize( void ); +EXTERN void VTThrd_finalize( void ); /** - * Increments the global thread counter and returns a new valid thread id. + * Creates a new thread object. * - * @return the requested thread id - */ -EXTERN uint32_t VTThrd_createNewThreadId( void ); - -/** - * Creates a thread object. + * @param tname thread name (optional) + * @param ptid the ID of the parent thread/process + * @param is_virtual flag: is the thread a virtual thread? (e.g. GPU) * - * @param tid the thread id - * @param ptid the id of the parent thread/process - * @param tname the type of thread (e.g. PThread, OpenMP-Thread, VirtualThread) - * @param is_virtual flag: is the thread a virtual thread (e.g. CUDA) + * @return thread ID associated with the new thread object */ -EXTERN void VTThrd_create(uint32_t tid, uint32_t ptid, const char* tname, uint8_t is_virtual); +EXTERN uint32_t VTThrd_create(const char* tname, uint32_t ptid, + uint8_t is_virtual); /** * Free thread object. * - * @param thrd pointer to the thread structure - * @param tid the thread id + * @param thrd thread object + * @param tid thread ID */ -EXTERN void VTThrd_delete( VTThrd* thrd, uint32_t tid ); +EXTERN void VTThrd_delete(VTThrd* thrd, uint32_t tid); /** - * Destroys a thread object. + * Destroy thread object. * - * @param thrd pointer to the thread structure - * @param tid the thread id + * @param thrd thread object + * @param tid thread ID */ -EXTERN void VTThrd_destroy( VTThrd* thrd, uint32_t tid ); +EXTERN void VTThrd_destroy(VTThrd* thrd, uint32_t tid); /** * Open associated trace file. * - * @param tid the thread id + * @param tid thread ID */ -EXTERN void VTThrd_open( uint32_t tid ); +EXTERN void VTThrd_open(uint32_t tid); /** * Close associated trace file. * - * @param thrd pointer to the thread structure + * @param thrd pointer to the thread structure */ -EXTERN void VTThrd_close( VTThrd* thrd ); +EXTERN void VTThrd_close(VTThrd* thrd); #if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) @@ -319,55 +327,57 @@ typedef struct VTThrdMutex_struct VTThrdMutex; #if !defined(VT_JAVA) # if defined(VT_THRD_PTHREAD) - EXTERN void VTThrd_initPthread( void ); + EXTERN void VTThrd_initPthread(void); # elif defined(VT_THRD_OMP) - EXTERN void VTThrd_initOmp( void ); + EXTERN void VTThrd_initOmp(void); # endif /* VT_THRD_[PTHREAD|OMP] */ - EXTERN void VTThrd_registerThread( uint32_t ptid ); + EXTERN void VTThrd_registerThread( uint32_t ptid ); #else /* VT_JAVA */ - EXTERN void VTThrd_initJava( void ); - EXTERN void VTThrd_registerThread( jthread thread, const char* tname ); + EXTERN void VTThrd_initJava(void); + EXTERN void VTThrd_registerThread(jthread thread, const char* tname); #endif /* VT_JAVA */ /** * Check whether current thread is alive. - * @return 1 if alive, otherwise 0 + * + * @return 1 if alive, otherwise 0 */ -EXTERN uint8_t VTThrd_is_alive( void ); +EXTERN uint8_t VTThrd_isAlive(void); /** * Get ID of current thread. - * @return a new thread ID + * + * @return thread ID */ -EXTERN uint32_t VTThrd_getThreadId( void ); +EXTERN uint32_t VTThrd_getThreadId(void); /** * Create a mutex for locking (*mutex must be NULL). * - * @param mutex the generic VampirTrace thread mutex + * @param mutex the generic VampirTrace thread mutex */ -EXTERN void VTThrd_createMutex( VTThrdMutex** mutex ); +EXTERN void VTThrd_createMutex(VTThrdMutex** mutex); /** * Delete a mutex for locking. * - * @param mutex the generic VampirTrace thread mutex + * @param mutex the generic VampirTrace thread mutex */ -EXTERN void VTThrd_deleteMutex( VTThrdMutex** mutex ); +EXTERN void VTThrd_deleteMutex(VTThrdMutex** mutex); /** * Lock a mutex (*mutex will be initialized, if NULL). * - * @param mutex the generic VampirTrace thread mutex + * @param mutex the generic VampirTrace thread mutex */ -EXTERN void VTThrd_lock( VTThrdMutex** mutex ); +EXTERN void VTThrd_lock(VTThrdMutex** mutex); /** * Unlock a mutex. * - * @param mutex the generic VampirTrace thread mutex + * @param mutex the generic VampirTrace thread mutex */ -EXTERN void VTThrd_unlock( VTThrdMutex** mutex ); +EXTERN void VTThrd_unlock(VTThrdMutex** mutex); /* predefined mutexes for locking ... */ EXTERN VTThrdMutex* VTThrdMutexEnv; /* ... VT Thread environment */ diff --git a/ompi/contrib/vt/vt/vtlib/vt_thrd_java.c b/ompi/contrib/vt/vt/vtlib/vt_thrd_java.c index 9067121ddf..90c49eb77f 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_thrd_java.c +++ b/ompi/contrib/vt/vt/vtlib/vt_thrd_java.c @@ -68,7 +68,7 @@ void VTThrd_initJava() vt_java_get_thread_name(NULL, NULL, tname, sizeof(tname)); /* create thread object for master thread */ - VTThrd_create(0, 0, tname, 0); + VTThrd_create(tname, 0, 0); VTThrd_open(0); } } @@ -86,23 +86,18 @@ void VTThrd_registerThread(jthread thread, const char* tname) /* create new thread-ID */ tid = (uint32_t*)malloc(sizeof(uint32_t)); if (tid == NULL) vt_error(); + *tid = VTThrd_create(tname, 0, 0); - /* increment number of threads */ - *tid = VTThrd_createNewThreadId(); - - /* put new ID to thread-specific data */ + /* put new thread-ID to thread-specific data */ error = (*jvmti)->SetThreadLocalStorage(jvmti, thread, (void*)tid); vt_java_check_error(jvmti, error, "SetThreadLocalStorage"); - /* create new thread object */ - vt_cntl_msg(2, "Dynamic thread creation. Thread #%d (%s)", - *tid, tname ? tname : "unnamed"); - VTThrd_create(*tid, 0, tname, 0); + /* open thread associated trace file */ VTThrd_open(*tid); } } -uint8_t VTThrd_is_alive() +uint8_t VTThrd_isAlive() { jvmtiError error; uint32_t *tid; diff --git a/ompi/contrib/vt/vt/vtlib/vt_thrd_omp.c b/ompi/contrib/vt/vt/vtlib/vt_thrd_omp.c index 2078065ae0..2a8fcfa7e2 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_thrd_omp.c +++ b/ompi/contrib/vt/vt/vtlib/vt_thrd_omp.c @@ -51,16 +51,15 @@ void VTThrd_registerThread(uint32_t ptid) { if (!vt_is_alive) return; - /* check whether an ID is already created for this thread */ - if (threadId == VT_NO_ID){ - /* create new thread object */ - vt_cntl_msg(2, "Dynamic thread creation. Thread #%d", threadId); - VTThrd_create(threadId, ptid, NULL, 0); + /* create new thread-ID, if necessary */ + if (threadId == VT_NO_ID) + { + threadId = VTThrd_create(NULL, ptid, 0); VTThrd_open(threadId); } } -uint8_t VTThrd_is_alive() +uint8_t VTThrd_isAlive() { return (threadId != VT_NO_ID); } diff --git a/ompi/contrib/vt/vt/vtlib/vt_thrd_pthread.c b/ompi/contrib/vt/vt/vtlib/vt_thrd_pthread.c index 262fbfc6f8..fb75e904b3 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_thrd_pthread.c +++ b/ompi/contrib/vt/vt/vtlib/vt_thrd_pthread.c @@ -230,16 +230,16 @@ void VTThrd_registerThread(uint32_t ptid) pthread_mutex_unlock(&threadReuseMutex); } - if (!tid_reuse) *tid = VTThrd_createNewThreadId(); + /* create new thread-ID, if not reusing */ + if (!tid_reuse) + *tid = VTThrd_create(NULL, ptid, 0); - /* put (new) thread-ID to thread-specific data - no IO before this call (fflush calls this function) */ + /* put (new) thread-ID to thread-specific data */ pthread_setspecific(pthreadKey, tid); - /* create new thread object, if new thread-ID was created */ - if (!tid_reuse){ - vt_cntl_msg(2, "Dynamic thread creation. Thread #%d", *tid); - VTThrd_create(*tid, ptid, NULL, 0); + /* open thread associated trace file, if new thread object was created */ + if (!tid_reuse ) + { VTThrd_open(*tid); } /* otherwise, re-create metrics for reused thread object */ @@ -259,7 +259,7 @@ void VTThrd_registerThread(uint32_t ptid) } } -uint8_t VTThrd_is_alive() +uint8_t VTThrd_isAlive() { uint32_t *tid; diff --git a/ompi/contrib/vt/vt/vtlib/vt_trc.c b/ompi/contrib/vt/vt/vtlib/vt_trc.c index 57bf8c9b7b..9a939ec19e 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_trc.c +++ b/ompi/contrib/vt/vt/vtlib/vt_trc.c @@ -48,6 +48,10 @@ # include "vt_cudartwrap.h" #endif /* VT_CUDARTWRAP */ +#if defined(VT_CUPTI) +# include "vt_cupti_callback.h" +#endif /* VT_CUPTI */ + #if ((defined(VT_MT) || defined(VT_HYB)) && defined(VT_PTHREAD)) # include "vt_pthreadreg.h" #endif /* (VT_MT || VT_HYB) && VT_PTHREAD */ @@ -201,9 +205,15 @@ int vt_my_funique = 0; /* unique file id */ /* array of indices for internal regions */ uint32_t vt_trc_regid[VT__TRC_REGID_NUM]; -/* array of induces for internal markers (error, warnings, hints) */ +/* array of indices for internal markers (error, warnings, hints) */ uint32_t vt_trc_mid[3]; +/* id of process group containing all processes */ +uint32_t vt_all_pgid = 0; + +/* node process group id */ +uint32_t vt_node_pgid = 0; + /* counter group id for miscellaneous counters (e.g. cpu id) */ uint32_t vt_misc_cgid = 0; @@ -414,6 +424,16 @@ static void write_def_header(void) VT_UNIFY_STRID_VT_COMMENT" VT_SYNC_FLUSH_LEVEL: %i", vt_env_sync_flush_level()); + /* VT_ONOFF_CHECK_STACK_BALANCE */ + vt_def_comment(VT_MASTER_THREAD, + VT_UNIFY_STRID_VT_COMMENT" VT_ONOFF_CHECK_STACK_BALANCE: %s", + vt_env_onoff_check_stack_balance() ? "yes" : "no"); + + /* VT_MAX_STACK_DEPTH */ + vt_def_comment(VT_MASTER_THREAD, + VT_UNIFY_STRID_VT_COMMENT" VT_MAX_STACK_DEPTH: %i", + vt_env_max_stack_depth()); + /* VT_MAX_FLUSHES */ vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_MAX_FLUSHES: %i", @@ -532,7 +552,7 @@ static void write_def_header(void) vt_env_iotrace() ? "yes" : "no"); #endif /* VT_IOWRAP */ -#if defined(VT_CUDARTWRAP) +#if (defined(VT_CUDARTWRAP) || defined(VT_CUPTI)) /* VT_CUDARTTRACE */ vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_CUDARTTRACE: %s", @@ -564,7 +584,10 @@ static void write_def_header(void) vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_CUDATRACE_BUFFER_SIZE: %s", tmp_char); - + } + + if( vt_env_cudarttrace() || vt_env_cupti_api_callback()) + { /* VT_CUDATRACE_IDLE */ vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_CUDATRACE_IDLE: %s", @@ -590,7 +613,7 @@ static void write_def_header(void) VT_UNIFY_STRID_VT_COMMENT" VT_CUDATRACE_SYNC: %s", vt_env_cudatrace_sync() ? "yes" : "no"); } -#endif /* VT_CUDARTWRAP */ +#endif /* VT_CUDARTWRAP || VT_CUPTI */ #if (defined (VT_ETIMESYNC) && TIMER_IS_GLOBAL == 0) /* VT_ETIMESYNC */ @@ -1032,6 +1055,14 @@ void vt_open() vt_trc_mid[VT__TRC_MARKER_WARNING] = VT_NO_ID; vt_trc_mid[VT__TRC_MARKER_HINT] = VT_NO_ID; + /* define process group containing all processes; members will be collected + from the node process groups during trace unification */ + vt_all_pgid = + vt_def_procgrp(VT_MASTER_THREAD, VT_UNIFY_STRID_ALL_PROCGRP, 0, 0, NULL, 0); + + /* get id for node process group; define later when its members are known */ + vt_node_pgid = curid++; + /* define counter group for miscellaneous counters (e.g. cpu id) */ vt_misc_cgid = vt_def_counter_group(VT_MASTER_THREAD, "Miscellaneous"); @@ -1150,9 +1181,10 @@ void vt_open() VTGen_write_DEF_COUNTER(VTTHRD_GEN(VTThrdv[0]), i+1, vt_metric_name(i), + vt_metric_unit(i), vt_metric_props(i), gid, - vt_metric_unit(i)); + 0); } } } @@ -1172,20 +1204,17 @@ void vt_open() } #endif /* !VT_MPI && !VT_HYB */ #endif /* VT_PLUGIN_CNTR */ + +#if defined(VT_CUPTI) + if(vt_env_cupti_api_callback()) + vt_cupti_callback_init(); +#endif vt_is_alive = 1; #if (defined(VT_MT) || defined(VT_HYB)) VTThrd_unlock(&init_mutex); #endif /* VT_MT || VT_HYB */ - - if( vt_env_debug() > 0 ) - { - uint64_t t = vt_pform_wtime(); - vt_comment(VT_MASTER_THREAD, &t, "NODEID: %lx", vt_pform_node_id()); - vt_comment(VT_MASTER_THREAD, &t, "NODEID_31: %x", - vt_pform_node_id() & 0x7fffffff); - } } void vt_reset() @@ -1204,7 +1233,7 @@ void vt_reset() #if defined(VT_PLUGIN_CNTR) /* finalize counter plugins */ - vt_plugin_cntr_finalize(); + vt_plugin_cntr_finalize(0); #endif /* VT_PLUGIN_CNTR */ @@ -1327,7 +1356,7 @@ void vt_close_by_signal(int signum) void vt_close() { - int tnum; + int tnum = (int)VTThrdn; int i; /* catch vt_close called from child processes through atexit */ @@ -1356,26 +1385,49 @@ void vt_close() } #endif /* VT_MPI || VT_HYB */ - if( vt_env_debug() > 0 ) + vt_close_called = 1; + + /* write node process group definition */ { - uint64_t t = vt_pform_wtime(); - vt_comment(VT_MASTER_THREAD, &t, "NODEID: %lx", vt_pform_node_id()); - vt_comment(VT_MASTER_THREAD, &t, "NODEID_31: %x", - vt_pform_node_id() & 0x7fffffff); + uint32_t* grpv; + char tmp_char[128]; + + /* get member array */ + + grpv = (uint32_t*)malloc(tnum * sizeof(uint32_t)); + if ( grpv == NULL ) + vt_error(); + + for (i = 0; i < tnum; i++) + grpv[i] = VT_PROCESS_ID(vt_my_trace, i); + + /* prepend node process group identifier to name */ + snprintf(tmp_char, sizeof(tmp_char) - 1, + VT_UNIFY_STRID_NODE_PROCGRP"%s", vt_pform_node_name()); + + /* write node process group definition */ + vt_def_procgrp(VT_MASTER_THREAD, tmp_char, 0, tnum, grpv, vt_node_pgid); + + free(grpv); } - vt_close_called = 1; vt_is_alive = 0; - tnum = (int)VTThrdn; - #if defined(VT_CUDARTWRAP) - /* finalize cuda wrapping if enabled */ + /* finalize CUDA runtime wrapping if enabled */ if (vt_env_cudarttrace()) vt_cudartwrap_finalize(); #endif /* VT_CUDARTWRAP */ + +#if defined(VT_CUPTI) + + /* finalize CUPTI API callback if enabled */ + if (vt_env_cupti_api_callback()) + vt_cupti_callback_finalize(); + +#endif /* VT_CUPTI */ #if defined(VT_MEMHOOK) @@ -1515,7 +1567,7 @@ void vt_close() #if defined(VT_PLUGIN_CNTR) /* finalize counter plugins */ - vt_plugin_cntr_finalize(); + vt_plugin_cntr_finalize(tnum); #endif /* VT_PLUGIN_CNTR */ @@ -1542,7 +1594,8 @@ void vt_trace_on(uint32_t tid, uint8_t mark) { /* switch tracing on, if current call stack level is equal to the stored one at switching trace off */ - if ( VTTHRD_STACK_LEVEL(VTThrdv[tid]) == + if ( !vt_env_onoff_check_stack_balance() || + VTTHRD_STACK_LEVEL(VTThrdv[tid]) == VTTHRD_STACK_LEVEL_AT_OFF(VTThrdv[tid]) ) { VTTHRD_TRACE_STATUS(VTThrdv[tid]) = VT_TRACE_ON; @@ -1553,14 +1606,17 @@ void vt_trace_on(uint32_t tid, uint8_t mark) vt_exit(tid, &time); } - vt_cntl_msg(2, "Tracing switched on"); + vt_cntl_msg(2, "Tracing switched on at call stack level (%i)", + VTTHRD_STACK_LEVEL(VTThrdv[tid])); } /* otherwise: abort */ else { vt_error_msg("Could not switch tracing on.\n" - "The current call stack level (%i) isn't equal to the " - "stored one (%i) at switching trace off.", + "The current call stack level (%i) isn't the same as when " + "the tracing was switched off (%i).\n" + "This limitation can be disabled by setting the environment " + "variable VT_ONOFF_CHECK_STACK_BALANCE to 'no'.", VTTHRD_STACK_LEVEL(VTThrdv[tid]), VTTHRD_STACK_LEVEL_AT_OFF(VTThrdv[tid]) ); } @@ -1583,7 +1639,8 @@ void vt_trace_off(uint32_t tid, uint8_t mark, uint8_t permanent) if tracing is going to switch off permanently */ if ( permanent ) VTGen_write_ENTER(VTTHRD_GEN(VTThrdv[tid]), &time, - vt_trc_regid[VT__TRC_OFF], 0); + vt_trc_regid[VT__TRC_OFF], + 0); else vt_enter(tid, &time, vt_trc_regid[VT__TRC_OFF]); } @@ -1602,7 +1659,7 @@ void vt_trace_off(uint32_t tid, uint8_t mark, uint8_t permanent) VTTHRD_STACK_LEVEL(VTThrdv[tid]); vt_cntl_msg(2, "Tracing switched off at call stack level (%i)", - VTTHRD_STACK_LEVEL_AT_OFF(VTThrdv[tid])); + VTTHRD_STACK_LEVEL_AT_OFF(VTThrdv[tid])); } } } @@ -1639,7 +1696,7 @@ void vt_update_counter(uint32_t tid, uint64_t* time) #if defined(VT_METR) /* update hardware performance counters (VT_METRICS) */ - if ( num_metrics > 0 && VTThrdv[tid]->is_virtual_thread == 0 && + if ( num_metrics > 0 && !VTTHRD_IS_VIRTUAL(VTThrdv[tid]) && VTTHRD_TRACE_STATUS(VTThrdv[tid]) == VT_TRACE_ON ) { int i; @@ -1891,8 +1948,10 @@ void vt_mpi_sync(uint32_t tid, uint64_t* time, void* comm) static int sync_flush_env = -1; static int sync_flush_level_env = -1; static int sync_time_env = -1; + static int sync_flush_skip = -1; VT_MPI_INT lsync_mask = 0; VT_MPI_INT sync_mask = 0; + uint8_t was_recorded; GET_THREAD_ID(tid); @@ -1902,6 +1961,8 @@ void vt_mpi_sync(uint32_t tid, uint64_t* time, void* comm) sync_flush_env = vt_env_sync_flush(); if (sync_flush_level_env == -1) sync_flush_level_env = vt_env_sync_flush_level(); + if (sync_flush_skip == -1) + sync_flush_skip = vt_env_sync_flush_skip(); #if (defined(VT_ETIMESYNC) && TIMER_IS_GLOBAL == 0) if (sync_time_env == -1) sync_time_env = vt_env_etimesync(); @@ -1921,9 +1982,15 @@ void vt_mpi_sync(uint32_t tid, uint64_t* time, void* comm) PMPI_Comm_size(*((MPI_Comm*)comm), &comm_size); if ((int)comm_size != vt_num_traces) return; } + + /* return if we should skip this sync */ + if (sync_flush_skip--) { + return; + } + /* mark begin of synchronization */ - vt_enter(tid, time, vt_trc_regid[VT__TRC_SYNC]); + was_recorded = vt_enter(tid, time, vt_trc_regid[VT__TRC_SYNC]); /* checking whether buffer flush needed */ @@ -1968,7 +2035,10 @@ void vt_mpi_sync(uint32_t tid, uint64_t* time, void* comm) /* mark end of synchronization */ *time = vt_pform_wtime(); - vt_exit(tid, time); + if (was_recorded) + { + vt_exit(tid, time); + } #endif /* VT_MPI || VT_HYB */ } @@ -1996,7 +2066,8 @@ void vt_def_comment(uint32_t tid, const char* fmt, ...) va_end(ap); - VTGen_write_DEFINITION_COMMENT(VTTHRD_GEN(VTThrdv[tid]), comment); + VTGen_write_DEFINITION_COMMENT(VTTHRD_GEN(VTThrdv[tid]), + comment); } uint32_t vt_def_scl_file(uint32_t tid, const char* fname) @@ -2013,7 +2084,9 @@ uint32_t vt_def_scl_file(uint32_t tid, const char* fname) { fid = curid++; - VTGen_write_DEF_SCL_FILE(VTTHRD_GEN(VTThrdv[tid]), fid, fname); + VTGen_write_DEF_SCL_FILE(VTTHRD_GEN(VTThrdv[tid]), + fid, + fname); hash_put(HASH_TAB__SFILE, fname, fid); } @@ -2036,7 +2109,10 @@ uint32_t vt_def_scl(uint32_t tid, uint32_t fid, uint32_t begln, uint32_t endln) sid = curid++; - VTGen_write_DEF_SCL(VTTHRD_GEN(VTThrdv[tid]), sid, fid, begln); + VTGen_write_DEF_SCL(VTTHRD_GEN(VTThrdv[tid]), + sid, + fid, + begln); return sid; } @@ -2047,10 +2123,11 @@ uint32_t vt_def_file_group(uint32_t tid, const char* gname) GET_THREAD_ID(tid); - gid = curid++; - VTGen_write_DEF_FILE_GROUP(VTTHRD_GEN(VTThrdv[tid]), gid, gname); + VTGen_write_DEF_FILE_GROUP(VTTHRD_GEN(VTThrdv[tid]), + gid, + gname); return gid; } @@ -2086,7 +2163,8 @@ uint32_t vt_def_region_group(uint32_t tid, const char* gname) rdid = curid++; VTGen_write_DEF_FUNCTION_GROUP(VTTHRD_GEN(VTThrdv[tid]), - rdid, gname); + rdid, + gname); hash_put(HASH_TAB__RDESC, gname, rdid); } @@ -2146,7 +2224,6 @@ uint32_t vt_def_region(uint32_t tid, const char* rname, uint32_t fid, case VT_MPI_COLL_ALL2ONE: case VT_MPI_COLL_BARRIER: case VT_MPI_COLL_ONE2ALL: - case VT_MPI_COLL_OTHER: rdesc = "MPI"; break; case VT_OMP_FUNCTION: @@ -2193,8 +2270,7 @@ uint32_t vt_def_region(uint32_t tid, const char* rname, uint32_t fid, if ( rtype == VT_MPI_COLL_ALL2ALL || rtype == VT_MPI_COLL_ALL2ONE || rtype == VT_MPI_COLL_BARRIER || - rtype == VT_MPI_COLL_ONE2ALL || - rtype == VT_MPI_COLL_OTHER ) + rtype == VT_MPI_COLL_ONE2ALL ) { VTGen_write_DEF_COLLECTIVE_OPERATION(VTTHRD_GEN(VTThrdv[tid]), rid, /* collective id equal region id */ @@ -2221,34 +2297,82 @@ uint32_t vt_def_counter_group(uint32_t tid, const char* gname) gid = curid++; - VTGen_write_DEF_COUNTER_GROUP(VTTHRD_GEN(VTThrdv[tid]), gid, gname); + VTGen_write_DEF_COUNTER_GROUP(VTTHRD_GEN(VTThrdv[tid]), + gid, + gname); return gid; } -uint32_t vt_def_counter(uint32_t tid, const char* cname, uint32_t cprop, - uint32_t gid, const char* cunit) +uint32_t vt_def_counter(uint32_t tid, const char* cname, const char* cunit, + uint32_t cprop, uint32_t gid, uint32_t pgid) { uint32_t cid = 0; GET_THREAD_ID(tid); + /* get new counter id */ #if defined(VT_METR) cid = num_metrics; #endif /* VT_METR */ - cid += curid++; + /* write counter definition */ VTGen_write_DEF_COUNTER(VTTHRD_GEN(VTThrdv[tid]), cid, cname, + cunit, cprop, gid, - cunit); + pgid); + + /* write process group attributes definition, if it's a group counter */ + if( pgid != 0 ) + { + VTGen_write_DEF_PROCESS_GROUP_ATTRIBUTES(VTTHRD_GEN(VTThrdv[tid]), + pgid, + VT_PROCGRP_HASCOUNTERS); + } return cid; } +uint32_t vt_def_procgrp(uint32_t tid, const char* gname, uint32_t gattr, + uint32_t grpc, uint32_t grpv[], uint32_t gid) +{ + GET_THREAD_ID(tid); + + /* get new process group id, if not given */ + if( gid == 0 ) + gid = curid++; + + /* write process group definition */ + VTGen_write_DEF_PROCESS_GROUP(VTTHRD_GEN(VTThrdv[tid]), + gid, + gname, + grpc, + grpv); + + /* write process group attributes definition, if attributes are given */ + if( gattr != 0 ) + { + VTGen_write_DEF_PROCESS_GROUP_ATTRIBUTES(VTTHRD_GEN(VTThrdv[tid]), + gid, + gattr); + } + + return gid; +} + +void vt_def_procgrp_attributes(uint32_t tid, uint32_t gid, uint32_t gattr) +{ + GET_THREAD_ID(tid); + + VTGen_write_DEF_PROCESS_GROUP_ATTRIBUTES(VTTHRD_GEN(VTThrdv[tid]), + gid, + gattr); +} + uint32_t vt_def_marker(uint32_t tid, const char* mname, uint32_t mtype) { uint32_t mid; @@ -2265,38 +2389,20 @@ uint32_t vt_def_marker(uint32_t tid, const char* mname, uint32_t mtype) return mid; } -void vt_def_gpu_comm(uint32_t grpc, uint32_t grpv[], const char *name, - uint32_t cid) -{ - uint32_t i; - uint32_t tid; - - if(grpc > 0 && grpv != NULL){ - /* Process group will be written to first thread in given array*/ - tid = grpv[0]; - }else return; - - for(i = 0; i < grpc; i++){ - grpv[i] = VT_PROCESS_ID(vt_my_trace, grpv[i]); - } - - VTGen_write_DEF_PROCESS_GROUP(VTTHRD_GEN(VTThrdv[tid]), - cid+1, name, grpc, grpv); -} - uint32_t vt_def_mpi_comm(uint32_t tid, uint8_t ctype, uint32_t grpc, uint8_t grpv[]) { uint32_t cid; - uint32_t cgrpc = 0; + uint32_t cgrpc = 0; uint32_t* cgrpv = NULL; - char cname[20]; + char cname[128]; GET_THREAD_ID(tid); cid = curid++; + /* "unpack" bit-vector of members */ if( grpc > 0 ) { uint32_t i; @@ -2318,15 +2424,31 @@ uint32_t vt_def_mpi_comm(uint32_t tid, uint8_t ctype, uint32_t grpc, } } + /* set process group name to identifier of communicator type */ + if(ctype == VT_MPI_COMM_WORLD) strncpy(cname, VT_UNIFY_STRID_MPI_COMM_WORLD_PROCGRP, sizeof(cname) - 1); else if(ctype == VT_MPI_COMM_SELF) strncpy(cname, VT_UNIFY_STRID_MPI_COMM_SELF_PROCGRP, sizeof(cname) - 1); - else /* VT_MPI_COMM_OTHER */ + else if(ctype == VT_MPI_COMM_OTHER) strncpy(cname, VT_UNIFY_STRID_MPI_COMM_OTHER_PROCGRP, sizeof(cname) - 1); + else /* VT_MPI_GROUP */ + strncpy(cname, VT_UNIFY_STRID_MPI_GROUP_PROCGRP, sizeof(cname) - 1); + /* write process group definition */ VTGen_write_DEF_PROCESS_GROUP(VTTHRD_GEN(VTThrdv[tid]), - cid+1, cname, cgrpc, cgrpv); + cid, + cname, + cgrpc, + cgrpv); + + if(ctype != VT_MPI_GROUP) + { + /* write process group attributes definition */ + VTGen_write_DEF_PROCESS_GROUP_ATTRIBUTES(VTTHRD_GEN(VTThrdv[tid]), + cid, + VT_PROCGRP_ISCOMMUNICATOR); + } if(cgrpv) free(cgrpv); @@ -2336,18 +2458,28 @@ uint32_t vt_def_mpi_comm(uint32_t tid, uint8_t ctype, uint32_t grpc, uint32_t vt_def_user_comm(uint32_t tid, const char* cname) { - uint32_t cid; - char tmp_char[128]; + uint32_t cid; + char tmp_char[128]; GET_THREAD_ID(tid); cid = curid++; + /* prepend identifier of communicator type to process group name */ snprintf(tmp_char, sizeof(tmp_char) - 1, VT_UNIFY_STRID_USER_COMM_PROCGRP"%s", cname); + /* write process group definition */ VTGen_write_DEF_PROCESS_GROUP(VTTHRD_GEN(VTThrdv[tid]), - cid+1, tmp_char, 0, NULL); + cid, + tmp_char, + 0, + NULL); + + /* write process group attributes definition */ + VTGen_write_DEF_PROCESS_GROUP_ATTRIBUTES(VTTHRD_GEN(VTThrdv[tid]), + cid, + VT_PROCGRP_ISCOMMUNICATOR); return cid; } @@ -2416,7 +2548,7 @@ uint8_t vt_enter(uint32_t tid, uint64_t* time, uint32_t rid) if( !RFG_Regions_stackPush(VTTHRD_RFGREGIONS(VTThrdv[tid]), rid, do_trace, &rinf) ) { -# if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA) || defined(VT_CUDARTWRAP)) +# if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA) || defined(VT_GPU)) RFG_RegionInfo* rinf_master; #if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA)) VTTHRD_LOCK_IDS(); @@ -2439,9 +2571,9 @@ uint8_t vt_enter(uint32_t tid, uint64_t* time, uint32_t rid) if (!RFG_Regions_stackPush(VTTHRD_RFGREGIONS(VTThrdv[tid]), rid, do_trace, &rinf)) vt_assert(0); -# else /* VT_MT || VT_HYB || VT_JAVA || VT_CUDARTWRAP */ +# else /* VT_MT || VT_HYB || VT_JAVA || VT_GPU */ vt_assert(0); -# endif /* VT_MT || VT_HYB || VT_JAVA || VT_CUDARTWRAP */ +# endif /* VT_MT || VT_HYB || VT_JAVA || VT_GPU */ } if (do_trace) @@ -2467,7 +2599,10 @@ uint8_t vt_enter(uint32_t tid, uint64_t* time, uint32_t rid) if (do_trace) { - VTGen_write_ENTER(VTTHRD_GEN(VTThrdv[tid]), time, rid, 0); + VTGen_write_ENTER(VTTHRD_GEN(VTThrdv[tid]), + time, + rid, + 0); vt_update_counter(tid, time); } @@ -2508,7 +2643,10 @@ void vt_exit(uint32_t tid, uint64_t* time) { vt_update_counter(tid, time); - VTGen_write_LEAVE(VTTHRD_GEN(VTThrdv[tid]), time, 0, 0); + VTGen_write_LEAVE(VTTHRD_GEN(VTThrdv[tid]), + time, + 0, + 0); } } @@ -2767,7 +2905,7 @@ void vt_mpi_send(uint32_t tid, uint64_t* time, uint32_t dpid, uint32_t cid, VTGen_write_SEND_MSG(VTTHRD_GEN(VTThrdv[tid]), time, dpid+1, - cid+1, + cid, tag, sent, 0); @@ -2783,7 +2921,7 @@ void vt_mpi_recv(uint32_t tid, uint64_t* time, uint32_t spid, uint32_t cid, VTGen_write_RECV_MSG(VTTHRD_GEN(VTThrdv[tid]), time, spid+1, - cid+1, + cid, tag, recvd, 0); @@ -2801,7 +2939,7 @@ void vt_mpi_collexit(uint32_t tid, uint64_t* time, uint64_t* etime, time, etime, rid, - cid+1, + cid, rpid != VT_NO_ID ? rpid+1 : 0, sent, recvd, @@ -2828,17 +2966,19 @@ void vt_mpi_collbegin(uint32_t tid, uint64_t* time, uint32_t rid, uint64_t mid, rid, mid, rpid != VT_NO_ID ? rpid+1 : 0, - cid+1, + cid, sent, recvd, 0); } -void vt_mpi_collend(uint32_t tid, uint64_t* time, uint64_t mid, void* comm) +/* NOTE: collend has to be called by each process to ENSURE that sync is done + even by processes that have tracing disabled (allreduce) */ +void vt_mpi_collend(uint32_t tid, uint64_t* time, uint64_t mid, void* comm, uint8_t was_recorded) { GET_THREAD_ID(tid); - if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) == VT_TRACE_ON) + if (was_recorded && (VTTHRD_TRACE_STATUS(VTThrdv[tid]) == VT_TRACE_ON)) { VTGen_write_END_COLLECTIVE_OPERATION(VTTHRD_GEN(VTThrdv[tid]), time, @@ -2863,7 +3003,7 @@ void vt_mpi_rma_put(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid, time, VT_PROCESS_ID(vt_my_trace, tid), tpid+1, - cid+1, + cid, tag, sent, 0); @@ -2880,7 +3020,7 @@ void vt_mpi_rma_putre(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid, time, 0, tpid+1, - cid+1, + cid, tag, sent, 0); @@ -2897,7 +3037,7 @@ void vt_mpi_rma_get(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid, time, 0, tpid+1, - cid+1, + cid, tag, recvd, 0); @@ -2912,7 +3052,7 @@ void vt_mpi_rma_end(uint32_t tid, uint64_t* time, uint32_t cid, uint32_t tag) VTGen_write_RMA_END(VTTHRD_GEN(VTThrdv[tid]), time, 0, - cid+1, + cid, tag, 0); } @@ -3008,7 +3148,7 @@ void vt_user_send(uint32_t tid, uint64_t* time, uint32_t cid, uint32_t tag, if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return; snprintf(comid_comment, sizeof(comid_comment) - 1, - VT_UNIFY_STRID_USRCOM_SEND_COMMENT"C%xT%x", cid+1, tag); + VT_UNIFY_STRID_USRCOM_SEND_COMMENT"C%xT%x", cid, tag); VTGen_write_DEFINITION_COMMENT(VTTHRD_GEN(VTThrdv[tid]), comid_comment); @@ -3016,7 +3156,7 @@ void vt_user_send(uint32_t tid, uint64_t* time, uint32_t cid, uint32_t tag, VTGen_write_SEND_MSG(VTTHRD_GEN(VTThrdv[tid]), time, 1, - cid+1, + cid, tag, sent, 0); @@ -3032,7 +3172,7 @@ void vt_user_recv(uint32_t tid, uint64_t* time, uint32_t cid, uint32_t tag, if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return; snprintf(comid_comment, sizeof(comid_comment) - 1, - VT_UNIFY_STRID_USRCOM_RECV_COMMENT"C%xT%x", cid+1, tag); + VT_UNIFY_STRID_USRCOM_RECV_COMMENT"C%xT%x", cid, tag); VTGen_write_DEFINITION_COMMENT(VTTHRD_GEN(VTThrdv[tid]), comid_comment); @@ -3040,7 +3180,7 @@ void vt_user_recv(uint32_t tid, uint64_t* time, uint32_t cid, uint32_t tag, VTGen_write_RECV_MSG(VTTHRD_GEN(VTThrdv[tid]), time, 1, - cid+1, + cid, tag, recvd, 0); diff --git a/ompi/contrib/vt/vt/vtlib/vt_trc.h b/ompi/contrib/vt/vt/vtlib/vt_trc.h index f099260c83..0db58f795f 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_trc.h +++ b/ompi/contrib/vt/vt/vtlib/vt_trc.h @@ -40,28 +40,28 @@ #define VT__TRC_MARKER_HINT 2 /** - * Description + * TODO: Description */ EXTERN void vt_open(void); /** - * Description + * TODO: Description */ EXTERN void vt_reset(void); /** - * Description + * TODO: Description * * @param signum signal number */ EXTERN void vt_close_by_signal(int signum); /** - * Description + * TODO: Description */ EXTERN void vt_close(void); /** - * Description + * TODO: Description * * @param tid thread id * @param mark flag: mark trace status as function enter/exit? @@ -70,7 +70,7 @@ EXTERN void vt_close(void); EXTERN void vt_trace_on(uint32_t tid, uint8_t mark); /** - * Description + * TODO: Description * * @param tid thread id * @param mark flag: mark trace status as function enter/exit? @@ -82,7 +82,7 @@ EXTERN void vt_trace_on(uint32_t tid, uint8_t mark); EXTERN void vt_trace_off(uint32_t tid, uint8_t mark, uint8_t permanent); /** - * Description + * TODO: Description * * @param tid thread id * @@ -92,7 +92,7 @@ EXTERN void vt_trace_off(uint32_t tid, uint8_t mark, uint8_t permanent); EXTERN uint8_t vt_is_trace_on(uint32_t tid); /** - * Description + * TODO: Description * * @param tid thread id * @param size buffer size to be guaranteed @@ -100,14 +100,14 @@ EXTERN uint8_t vt_is_trace_on(uint32_t tid); EXTERN void vt_guarantee_buffer(uint32_t tid, size_t size); /** - * Description + * TODO: Description * * @param tid thread id */ EXTERN void vt_buffer_flush(uint32_t tid); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -115,21 +115,21 @@ EXTERN void vt_buffer_flush(uint32_t tid); EXTERN void vt_update_counter(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * This function have to be called immediately after initializing the * communication middle-ware, e.g. atfer MPI_Init(). */ EXTERN void vt_mpi_init(void); /** - * Description + * TODO: Description * This function have to be called immediately before finalizing the * communication middle-ware, e.g. before MPI_Finalize(). */ EXTERN void vt_mpi_finalize(void); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -151,7 +151,7 @@ EXTERN uint32_t vt_get_curid(void); */ /** - * Description + * TODO: Description * * @param tid thread id * @param fmt comment as format string like printf @@ -159,7 +159,7 @@ EXTERN uint32_t vt_get_curid(void); EXTERN void vt_def_comment(uint32_t tid, const char* fmt, ...); /** - * Description + * TODO: Description * * @param tid thread id * @param fname source file name @@ -169,7 +169,7 @@ EXTERN void vt_def_comment(uint32_t tid, const char* fmt, ...); EXTERN uint32_t vt_def_scl_file(uint32_t tid, const char* fname); /** - * Description + * TODO: Description * * @param tid thread id * @param fid source file id (created by vt_def_scl_file) @@ -183,7 +183,7 @@ EXTERN uint32_t vt_def_scl(uint32_t tid, uint32_t fid, uint32_t begln, uint32_t endln); /** - * Description + * TODO: Description * * @param tid thread id * @param gname file group name @@ -193,7 +193,7 @@ EXTERN uint32_t vt_def_scl(uint32_t tid, uint32_t fid, uint32_t begln, EXTERN uint32_t vt_def_file_group(uint32_t tid, const char* gname); /** - * Description + * TODO: Description * * @param tid thread id * @param fname file name @@ -204,7 +204,7 @@ EXTERN uint32_t vt_def_file_group(uint32_t tid, const char* gname); EXTERN uint32_t vt_def_file(uint32_t tid, const char* fname, uint32_t gid); /** - * Description + * TODO: Description * * @param tid thread id * @param gname region group name @@ -214,7 +214,7 @@ EXTERN uint32_t vt_def_file(uint32_t tid, const char* fname, uint32_t gid); EXTERN uint32_t vt_def_region_group(uint32_t tid, const char* gname); /** - * Description + * TODO: Description * * @param tid thread id * @param rname region name @@ -231,7 +231,7 @@ EXTERN uint32_t vt_def_region(uint32_t tid, const char* rname, uint32_t fid, const char* rdesc, uint8_t rtype); /** - * Description + * TODO: Description * * @param tid thread id * @param gname counter group name @@ -241,21 +241,48 @@ EXTERN uint32_t vt_def_region(uint32_t tid, const char* rname, uint32_t fid, EXTERN uint32_t vt_def_counter_group(uint32_t tid, const char* gname); /** - * Description + * TODO: Description * * @param tid thread id * @param cname counter name - * @param cprop counter properties - * @param cgid counter group id (created by vt_def_counter_group) * @param cunit counter unit + * @param cprop counter properties bitmask + * @param cgid counter group id (created by vt_def_counter_group) + * @param pgid process group id (created by vt_def_procgrp if group counter, + * otherwise 0) * * @return counter id */ -EXTERN uint32_t vt_def_counter(uint32_t tid, const char* cname, uint32_t cprop, - uint32_t gid, const char* cunit); +EXTERN uint32_t vt_def_counter(uint32_t tid, const char* cname, + const char* cunit, uint32_t cprop, uint32_t gid, + uint32_t pgid); /** - * Description + * TODO: Description + * + * @param tid thread id + * @param gname process group name + * @param grpc number of entries in @grpv array + * @param grpv array of member process/thread ids + * @param gid previous created process group id (if 0, create a new one) + * + * @return process group id + */ +EXTERN uint32_t vt_def_procgrp(uint32_t tid, const char* gname, uint32_t gattr, + uint32_t grpc, uint32_t grpv[], uint32_t gid); + +/** + * TODO: Description + * + * @param tid thread id + * @param gid process group id (created by vt_def_procgrp) + * @param gattr process group attributes bitmask + */ +EXTERN void vt_def_procgrp_attributes(uint32_t tid, uint32_t gid, + uint32_t gattr); + +/** + * TODO: Description * * @param tid thread id * @param mname marker name @@ -267,18 +294,7 @@ EXTERN uint32_t vt_def_marker(uint32_t tid, const char* mname, uint32_t mtype); /** - * Defines a marker with the given name. - * - * @param grpc number of GPU thread ids the array contains - * @param grpv array of GPU thread ids - * @param name name of process/thread group to be identified in vt_unify - * @param cid the communictor id for this process/thread group - */ -EXTERN void vt_def_gpu_comm(uint32_t grpc, uint32_t grpv[], const char* name, - uint32_t cid); - -/** - * Description + * TODO: Description * * @param tid thread id * @param ctype MPI communicator type @@ -292,7 +308,7 @@ EXTERN uint32_t vt_def_mpi_comm(uint32_t tid, uint8_t ctype, uint32_t grpc, uint8_t grpv[]); /** - * Description + * TODO: Description * * @param tid thread id * @param cname communicator name @@ -302,7 +318,7 @@ EXTERN uint32_t vt_def_mpi_comm(uint32_t tid, uint8_t ctype, uint32_t grpc, EXTERN uint32_t vt_def_user_comm(uint32_t tid, const char* cname); /** - * Description + * TODO: Description * * @param tid thread id * @param vtype value type @@ -313,7 +329,7 @@ EXTERN uint32_t vt_def_user_comm(uint32_t tid, const char* cname); EXTERN uint32_t vt_def_keyval(uint32_t tid, uint8_t vtype, const char* kname); /** - * Description + * TODO: Description * * @param tid thread id * @param sname unique async. source name @@ -331,7 +347,7 @@ EXTERN uint32_t vt_def_async_source(uint32_t tid, const char* sname); /* -- Region -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -343,7 +359,7 @@ EXTERN uint32_t vt_def_async_source(uint32_t tid, const char* sname); EXTERN uint8_t vt_enter(uint32_t tid, uint64_t* time, uint32_t rid); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -354,7 +370,7 @@ EXTERN void vt_exit(uint32_t tid, uint64_t* time); /** * DEPRECATED - * Description + * TODO: Description * * @param tid thread id * @param time begin timestamp @@ -368,7 +384,7 @@ EXTERN void vt_ioexit(uint32_t tid, uint64_t* time, uint64_t* etime, uint32_t fid, uint64_t hid, uint32_t op, uint64_t bytes ); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -377,7 +393,7 @@ EXTERN void vt_ioexit(uint32_t tid, uint64_t* time, uint64_t* etime, EXTERN void vt_iobegin( uint32_t tid, uint64_t* time, uint64_t mid ); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -393,20 +409,19 @@ EXTERN void vt_ioend(uint32_t tid, uint64_t* time, uint32_t fid, uint64_t mid, /* -- Counter -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp * @param hid counter id (created by vt_def_counter) * @param cval counter value */ -EXTERN void vt_count(uint32_t tid, uint64_t* time, uint32_t cid, - uint64_t cval); +EXTERN void vt_count(uint32_t tid, uint64_t* time, uint32_t cid, uint64_t cval); /* -- Comment -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -417,7 +432,7 @@ EXTERN void vt_comment(uint32_t tid, uint64_t* time, const char* fmt, ... ); /* -- Rewind -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -425,7 +440,7 @@ EXTERN void vt_comment(uint32_t tid, uint64_t* time, const char* fmt, ... ); EXTERN void vt_rewind(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -435,7 +450,7 @@ EXTERN void vt_set_rewind_mark(uint32_t tid, uint64_t* time); /* -- Marker -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -446,7 +461,7 @@ EXTERN void vt_marker(uint32_t tid, uint64_t* time, uint32_t mid, const char* fmt, ...); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -456,7 +471,7 @@ EXTERN void vt_marker_error(uint32_t tid, uint64_t* time, const char* fmt, ...); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -466,7 +481,7 @@ EXTERN void vt_marker_warning(uint32_t tid, uint64_t* time, const char* fmt, ...); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -478,7 +493,7 @@ EXTERN void vt_marker_hint(uint32_t tid, uint64_t* time, /* -- Key-Value -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -489,7 +504,7 @@ EXTERN void vt_marker_hint(uint32_t tid, uint64_t* time, EXTERN void vt_keyval(uint32_t tid, uint32_t kid, uint8_t vtype, void* kvalue); /** - * Description + * TODO: Description * * @param tid thread id * @param kid async. source key id @@ -500,7 +515,7 @@ EXTERN void vt_next_async_time(uint32_t tid, uint32_t kid, uint64_t atime); /* -- MPI-1 -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -513,7 +528,7 @@ EXTERN void vt_mpi_send(uint32_t tid, uint64_t* time, uint32_t dpid, uint32_t cid, uint32_t tag, uint32_t sent); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -528,7 +543,7 @@ EXTERN void vt_mpi_recv(uint32_t tid, uint64_t* time, uint32_t spid, /** * DEPRECATED - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -545,7 +560,7 @@ EXTERN void vt_mpi_collexit(uint32_t tid, uint64_t* time, uint64_t* etime, void* comm, uint32_t sent, uint32_t recvd); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -561,7 +576,7 @@ EXTERN void vt_mpi_collbegin(uint32_t tid, uint64_t* time, uint32_t rid, uint64_t sent, uint64_t recvd); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -569,12 +584,12 @@ EXTERN void vt_mpi_collbegin(uint32_t tid, uint64_t* time, uint32_t rid, * @param comm MPI communicator */ EXTERN void vt_mpi_collend(uint32_t tid, uint64_t* time, uint64_t mid, - void* comm); + void* comm, uint8_t was_recorded); /* -- MPI2 - 1sided -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -587,7 +602,7 @@ EXTERN void vt_mpi_rma_put(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid, uint32_t tag, uint64_t sent); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -600,7 +615,7 @@ EXTERN void vt_mpi_rma_putre(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid, uint32_t tag, uint64_t sent); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -613,7 +628,7 @@ EXTERN void vt_mpi_rma_get(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid, uint32_t tag, uint64_t recvd); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -626,14 +641,14 @@ EXTERN void vt_mpi_rma_end(uint32_t tid, uint64_t* time, uint32_t cid, /* -- OpenMP -- */ /** - * Description + * TODO: Description * * @param tid thread id */ EXTERN void vt_omp_fork(uint32_t tid); /** - * Description + * TODO: Description * * @param tid thread id * @param ptid parent thread id @@ -641,21 +656,21 @@ EXTERN void vt_omp_fork(uint32_t tid); EXTERN void vt_omp_fork2(uint32_t tid, uint32_t* ptid); /** - * Description + * TODO: Description * * @param tid thread id */ EXTERN void vt_omp_join(uint32_t tid); /** - * Description + * TODO: Description * * @param tid thread id */ EXTERN void vt_omp_parallel_begin(uint32_t tid); /** - * Description + * TODO: Description * * @param tid thread id * @param ptid parent thread id @@ -663,7 +678,7 @@ EXTERN void vt_omp_parallel_begin(uint32_t tid); EXTERN void vt_omp_parallel_begin2(uint32_t tid, uint32_t ptid); /** - * Description + * TODO: Description * * @param tid thread id */ @@ -672,28 +687,26 @@ EXTERN void vt_omp_parallel_end(uint32_t tid); /* -- User Point-to-Point Communication -- */ /** - * Description - * - * @param tid thread id - * @param time timestamp - * @param cid communicator id (created by vt_def_user_comm) - * @param tag message tag (has to be unique per communication pair) - * @param sent sent bytes + * TODO: Description * + * @param tid thread id + * @param time timestamp + * @param cid communicator id (created by vt_def_user_comm) + * @param tag message tag (has to be unique per communication pair) + * @param sent sent bytes */ EXTERN void vt_user_send(uint32_t tid, uint64_t* time, uint32_t cid, uint32_t tag, uint32_t sent); /** - * Description - * - * @param tid thread id - * @param time timestamp - * @param cid communicator id (created by vt_def_user_comm) - * @param tag message tag (has to be unique per communication pair) - * @param sent received bytes + * TODO: Description * + * @param tid thread id + * @param time timestamp + * @param cid communicator id (created by vt_def_user_comm) + * @param tag message tag (has to be unique per communication pair) + * @param sent received bytes */ EXTERN void vt_user_recv(uint32_t tid, uint64_t* time, uint32_t cid, @@ -702,7 +715,7 @@ EXTERN void vt_user_recv(uint32_t tid, uint64_t* time, uint32_t cid, /* -- VampirTrace Internal -- */ /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -710,7 +723,7 @@ EXTERN void vt_user_recv(uint32_t tid, uint64_t* time, uint32_t cid, EXTERN void vt_enter_user(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -718,7 +731,7 @@ EXTERN void vt_enter_user(uint32_t tid, uint64_t* time); EXTERN void vt_exit_user(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -726,7 +739,7 @@ EXTERN void vt_exit_user(uint32_t tid, uint64_t* time); EXTERN void vt_enter_stat(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -734,7 +747,7 @@ EXTERN void vt_enter_stat(uint32_t tid, uint64_t* time); EXTERN void vt_exit_stat(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -742,7 +755,7 @@ EXTERN void vt_exit_stat(uint32_t tid, uint64_t* time); EXTERN void vt_enter_flush(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -750,7 +763,7 @@ EXTERN void vt_enter_flush(uint32_t tid, uint64_t* time); EXTERN void vt_exit_flush(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -758,7 +771,7 @@ EXTERN void vt_exit_flush(uint32_t tid, uint64_t* time); EXTERN void vt_enter_rewind(uint32_t tid, uint64_t* time); /** - * Description + * TODO: Description * * @param tid thread id * @param time timestamp @@ -782,9 +795,15 @@ EXTERN int vt_my_funique; /** unique file id */ /** array of indices for internal regions */ EXTERN uint32_t vt_trc_regid[VT__TRC_REGID_NUM]; -/** array of induces for internal markers (error, warnings, hints) */ +/** array of indices for internal markers (error, warnings, hints) */ EXTERN uint32_t vt_trc_mid[3]; +/** id of process group containing all processes */ +EXTERN uint32_t vt_all_pgid; + +/** node process group id */ +EXTERN uint32_t vt_node_pgid; + /** counter group id for miscellaneous counters (e.g. cpu id) */ EXTERN uint32_t vt_misc_cgid; diff --git a/ompi/contrib/vt/vt/vtlib/vt_user_count.c b/ompi/contrib/vt/vt/vtlib/vt_user_count.c index 725e8bb01f..c2f71084d1 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_user_count.c +++ b/ompi/contrib/vt/vt/vtlib/vt_user_count.c @@ -111,7 +111,7 @@ unsigned int VT_User_count_def__(const char* cname, const char* cunit, int ctype #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif - cid = vt_def_counter(VT_CURRENT_THREAD, cname, cprop, gid, cunit); + cid = vt_def_counter(VT_CURRENT_THREAD, cname, cunit, cprop, gid, 0); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif