Changes to VT:
- added CUDA stream reuse for both, CUDA tracing with CUPTI and CUDART wrapper - removed CUDA stream number from thread name, when CUDA stream reuse is enabled - disable tracking of MPI communicators, requests, windows, etc. if MPI is initialized with MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE (only MPI function enter/leave events will be recorded) - configure: - fixed detection of compiler instrumentation type on Cray platforms using the cc compiler wrapper - compiler wrappers: - fixed preprocessing source files to be parsed by OPARI (add path to empty omp.h to the preprocessor flags to avoid multiple declarations of OpenMP functions, types, etc.) - vtnvcc: Remove 'compinst' instrumentation type, if VT is configured with a non-GNU compiler instrumentation support (Fixed "unrecognized option" error) - vtdyn: - added support for instrumenting outer- and inner loops and its iterations (outer=loops within a function, inner=loops within outer loops) - try to get the full prototype of functions to be instrumented - consider default filter rules also if no filter file is given - fixed potential segfault if adding a filter rule w/o stack bounds - print verbose messages on stdout if vtdyn is started from the Dyninst attach library (libvt-dynatt) - vtunify: - print verbose messages on stderr if vtunify is started automatically from the VT library This commit was SVN r26836.
Этот коммит содержится в:
родитель
8c4a3e1269
Коммит
45dfa0d0b1
@ -14,6 +14,7 @@
|
||||
version 11/13
|
||||
- introduced environment variable VT_GPUTRACE, which replaces several
|
||||
CUDA tracing environment variables
|
||||
- added option to enable CUDA stream reuse
|
||||
- expose flush of CUPTI activities as measurement overhead
|
||||
- added CUDA runtime API functions to CUDA runtime API wrapper, which
|
||||
are new in CUDA 4.2 and 5.0.7 (preview release)
|
||||
@ -22,8 +23,14 @@
|
||||
filtered function)
|
||||
- function group filtering
|
||||
- stack level filtering
|
||||
- vtdyn:
|
||||
- added support for instrumenting loops
|
||||
- try to get the full prototype of functions to be instrumented
|
||||
- print verbose messages to stderr if started by the Dyninst
|
||||
attach library (libvt-dynatt)
|
||||
- vtunify[-mpi]:
|
||||
- removed strict order of command line parameters
|
||||
- print verbose messages to stderr if started automatically
|
||||
- added compatibility check for input trace file generated
|
||||
by a different VampirTrace version
|
||||
- drop/adapt records which have references to processes
|
||||
@ -37,12 +44,14 @@
|
||||
- added option '-vt:reusefiles' to reuse kept intermediate
|
||||
files generated by a previous run of OPARI and/or PDT/TAU
|
||||
- added option '-vt:keepfiles' as synonym for '-vt:nocleanup'
|
||||
- fixed preprocessing of OpenMP source files
|
||||
- vtnvcc: fixed forwarding of compiler/linker options
|
||||
- fixed handling of 'errno' in LIBC[-I/O] wrappers on AIX
|
||||
- fixed handling of MPI_Aint in MPI-2 Fortran wrappers
|
||||
(MPI_Aint=INTEGER(KIND=MPI_ADDRESS_KIND))
|
||||
- do not abort if the application requests the MPI thread support level
|
||||
MPI_THREAD_SERIALIZED (no MPI communication events will be recorded)
|
||||
MPI_THREAD_SERIALIZED or MPI_THREAD_MULTIPLE
|
||||
(disable recording of MPI communication events instead)
|
||||
- fixed GPU compute idle time measurement with CUPTI activity tracing
|
||||
- fixed installation issue when prefix is set to the source directory
|
||||
(e.g. ./configure --prefix=$PWD)
|
||||
|
@ -159,10 +159,6 @@ AC_DEFUN([ACVT_COMPINST],
|
||||
compinst_type="pgi9"
|
||||
AC_MSG_RESULT([pgi9])
|
||||
;;
|
||||
*Cray*)
|
||||
compinst_type="craycce"
|
||||
AC_MSG_RESULT([craycce])
|
||||
;;
|
||||
*PathScale*)
|
||||
compinst_type="gnu"
|
||||
AC_MSG_RESULT([gnu (pathscale)])
|
||||
@ -175,6 +171,10 @@ AC_DEFUN([ACVT_COMPINST],
|
||||
compinst_type="gnu"
|
||||
AC_MSG_RESULT([gnu])
|
||||
;;
|
||||
*Cray*)
|
||||
compinst_type="craycce"
|
||||
AC_MSG_RESULT([craycce])
|
||||
;;
|
||||
esac
|
||||
AS_IF([test x"$compinst_type" != x], [break])
|
||||
done
|
||||
|
@ -51,6 +51,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_BIN=
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS=
|
||||
VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS=
|
||||
VT_WRAPPER_NVCC_AVAIL_INST="manual"
|
||||
VT_WRAPPER_NVCC_DEFAULT_INST="manual"
|
||||
VT_WRAPPER_NVCC_DEFAULT_PARTYPE="seq"
|
||||
|
||||
VT_WRAPPER_VTLIB="-lvt"
|
||||
@ -293,7 +295,12 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_CC_COMPINST_COMPILER_FLAGS="$compinst_cflags"
|
||||
VT_WRAPPER_CXX_COMPINST_COMPILER_FLAGS="$compinst_cxxflags"
|
||||
VT_WRAPPER_FC_COMPINST_COMPILER_FLAGS="$compinst_fcflags"
|
||||
VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS="$compinst_cflags"
|
||||
AS_IF([test x"$compinst_type" = "xgnu"],
|
||||
[
|
||||
VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS="$compinst_cflags"
|
||||
VT_WRAPPER_NVCC_AVAIL_INST="$VT_WRAPPER_NVCC_AVAIL_INST compinst"
|
||||
VT_WRAPPER_NVCC_DEFAULT_INST="compinst"
|
||||
])
|
||||
VT_WRAPPER_AVAIL_INST="$VT_WRAPPER_AVAIL_INST compinst"
|
||||
VT_WRAPPER_DEFAULT_INST="compinst"
|
||||
])
|
||||
@ -304,6 +311,7 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_CXX_DYNINST_COMPILER_FLAGS="-g"
|
||||
VT_WRAPPER_FC_DYNINST_COMPILER_FLAGS="-g"
|
||||
VT_WRAPPER_NVCC_DYNINST_COMPILER_FLAGS="-g"
|
||||
VT_WRAPPER_NVCC_AVAIL_INST="$VT_WRAPPER_NVCC_AVAIL_INST dyninst"
|
||||
VT_WRAPPER_AVAIL_INST="$VT_WRAPPER_AVAIL_INST dyninst"
|
||||
])
|
||||
|
||||
@ -343,6 +351,7 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
VT_WRAPPER_NVCC_TAUINST_OPTS="$VT_WRAPPER_CC_TAUINST_OPTS"
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_BIN="$VT_WRAPPER_CC_TAUINST_PARSE_BIN"
|
||||
VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS="$VT_WRAPPER_CC_TAUINST_PARSE_OPTS"
|
||||
VT_WRAPPER_NVCC_AVAIL_INST="$VT_WRAPPER_NVCC_AVAIL_INST tauinst"
|
||||
VT_WRAPPER_AVAIL_INST="$VT_WRAPPER_AVAIL_INST tauinst"
|
||||
])
|
||||
|
||||
@ -396,6 +405,8 @@ AC_DEFUN([ACVT_COMPWRAP],
|
||||
AC_SUBST(VT_WRAPPER_NVCC_TAUINST_PARSE_BIN)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_COMPINST_COMPILER_FLAGS)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_AVAIL_INST)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_DEFAULT_INST)
|
||||
AC_SUBST(VT_WRAPPER_NVCC_DEFAULT_PARTYPE)
|
||||
|
||||
AC_SUBST(VT_WRAPPER_VTLIB)
|
||||
|
@ -147,48 +147,74 @@ cat <<End-of-File >$tmp.awk
|
||||
print "{"
|
||||
|
||||
printf" %s result;\n", rettype
|
||||
print " uint64_t time;"
|
||||
print " uint8_t was_recorded;"
|
||||
print " uint32_t tid;"
|
||||
print ""
|
||||
print " if (IS_MPI_TRACE_ON)"
|
||||
print " GET_THREAD_ID(tid);"
|
||||
print ""
|
||||
print " if (IS_MPI_TRACE_ON(tid))"
|
||||
print " {"
|
||||
print " MPI_TRACE_OFF();"
|
||||
print " uint64_t time;"
|
||||
print " uint8_t was_recorded;"
|
||||
print ""
|
||||
print " MPI_TRACE_OFF(tid);"
|
||||
print ""
|
||||
print " time = vt_pform_wtime();"
|
||||
printf" was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__%s]);\n", toupper(mpicall)
|
||||
printf" was_recorded = vt_enter(tid, &time, vt_mpi_regid[VT__%s]);\n", toupper(mpicall)
|
||||
print ""
|
||||
|
||||
printf" CALL_PMPI_%d(%s", NF-2, mpicall
|
||||
printf" VT_UNIMCI_CHECK_PRE(%s,\n", mpicall
|
||||
printf" ("
|
||||
if (NF > 2) {
|
||||
for (i=3; i<=NF; i++) {
|
||||
printf ", "
|
||||
#remove arrays declarations from parameter:
|
||||
gsub("[[].*[]]","",para[i-2])
|
||||
printf para[i-2]
|
||||
printf", "
|
||||
}
|
||||
}
|
||||
print", result, was_recorded, &time);"
|
||||
|
||||
print "\"\", 0, 0),"
|
||||
print " was_recorded, &time);"
|
||||
print ""
|
||||
printf" result = P%s(", mpicall
|
||||
if (NF > 2) {
|
||||
for (i=3; i<=NF; i++) {
|
||||
gsub("[[].*[]]","",para[i-2])
|
||||
printf para[i-2]
|
||||
if (i < NF) {
|
||||
printf", "
|
||||
}
|
||||
}
|
||||
}
|
||||
print ");"
|
||||
print ""
|
||||
printf" VT_UNIMCI_CHECK_POST(%s,\n", mpicall
|
||||
printf" ("
|
||||
if (NF > 2) {
|
||||
for (i=3; i<=NF; i++) {
|
||||
gsub("[[].*[]]","",para[i-2])
|
||||
printf para[i-2]
|
||||
printf ", "
|
||||
}
|
||||
}
|
||||
print "\"\", 0, 0),"
|
||||
print " was_recorded, &time);"
|
||||
print ""
|
||||
print " time = vt_pform_wtime();"
|
||||
print " vt_exit(VT_CURRENT_THREAD, &time);"
|
||||
print " vt_exit(tid, &time);"
|
||||
print ""
|
||||
print " MPI_TRACE_ON();"
|
||||
print " MPI_TRACE_ON(tid);"
|
||||
print " }"
|
||||
print " else"
|
||||
print " {"
|
||||
|
||||
printf" CALL_PMPI_%d(%s", NF-2, mpicall
|
||||
printf" result = P%s(", mpicall
|
||||
if (NF > 2) {
|
||||
for (i=3; i<=NF; i++) {
|
||||
printf ", "
|
||||
#remove arrays declarations from parameter:
|
||||
gsub("[[].*[]]","",para[i-2])
|
||||
printf para[i-2]
|
||||
if (i < NF) {
|
||||
printf", "
|
||||
}
|
||||
}
|
||||
}
|
||||
print", result, 0, NULL);"
|
||||
|
||||
print ");"
|
||||
print " }"
|
||||
print ""
|
||||
print " return result;"
|
||||
|
@ -355,20 +355,20 @@ OpenMP events, and performance counters.
|
||||
<P>
|
||||
After a successful tracing run, VampirTrace writes all collected data to a
|
||||
trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
|
||||
HREF="#foot1526"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1549"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
As a result, the information is available for post-mortem analysis and
|
||||
visualization by various tools.
|
||||
Most notably, VampirTrace provides the input data for the Vampir analysis
|
||||
and visualization tool<A NAME="tex2html2"
|
||||
HREF="#foot1527"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1550"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
VampirTrace is included in OpenMPI 1.3 and later versions.
|
||||
If not disabled explicitly, VampirTrace is built automatically when installing
|
||||
OpenMPI<A NAME="tex2html3"
|
||||
HREF="#foot1528"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1551"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
@ -1185,7 +1185,7 @@ in a single file, that
|
||||
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
||||
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
||||
profiling have a look at the TAU documentation<A NAME="tex2html4"
|
||||
HREF="#foot1552"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1575"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
||||
<PRE>
|
||||
@ -1202,7 +1202,7 @@ Binary Instrumentation Using Dyninst
|
||||
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
||||
instrument the application during runtime (binary instrumentation), by using
|
||||
Dyninst<A NAME="tex2html5"
|
||||
HREF="#foot1553"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1576"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
Recompiling is not necessary for this kind of instrumentation,
|
||||
but relinking:
|
||||
@ -1353,7 +1353,7 @@ Tracing Calls to 3rd-Party Libraries
|
||||
VampirTrace is also capable to trace calls to third party libraries, which come with
|
||||
at least one C header file even without the library's source code. If VampirTrace was
|
||||
built with support for library tracing (the CTool library<A NAME="tex2html6"
|
||||
HREF="#foot1554"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1577"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is required), the tool <TT>vtlibwrapgen</TT> can be used to
|
||||
generate a wrapper library to intercept each call to the actual library functions.
|
||||
This wrapper library can be linked to the application or used in combination with the
|
||||
@ -1573,6 +1573,33 @@ of a VampirTrace instrumented executable:
|
||||
Quiet (0), Critical (1), Information (2)</TD>
|
||||
<TD ALIGN="LEFT">1</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_IOFSL"></A></TD>
|
||||
<TD ALIGN="LEFT">
|
||||
<BR>
|
||||
<SPAN CLASS="textbf">I/O Forwarding (IOFSL)</SPAN>
|
||||
<BR></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_IOFSL_ASYNC_IO"></A><TT>VT_IOFSL_ASYNC_IO</TT></TD>
|
||||
<TD ALIGN="LEFT">Enable buffered IOFSL writes?
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:iofsl-manual">D.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_IOFSL_SERVERS"></A><TT>VT_IOFSL_SERVERS</TT></TD>
|
||||
<TD ALIGN="LEFT">Comma-separated list of IOFSL server addresses.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:iofsl-manual">D.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">-</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_IOFSL_MODE"></A><TT>VT_IOFSL_MODE</TT></TD>
|
||||
<TD ALIGN="LEFT">Mode of the IOFSL communication:
|
||||
<BR>
|
||||
(MULTIFILE_SPLIT or MULTIFILE)
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:iofsl-manual">D.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">MULTIFILE_SPLIT</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_OPTIONAL_FEATURES"></A></TD>
|
||||
<TD ALIGN="LEFT">
|
||||
<BR>
|
||||
@ -1673,24 +1700,6 @@ of a VampirTrace instrumented executable:
|
||||
<TD ALIGN="LEFT">Minimum buffer fill level for synchronized buffer flush in percent.</TD>
|
||||
<TD ALIGN="LEFT">80</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_IOFSL_SERVERS"></A><TT>VT_IOFSL_SERVERS</TT></TD>
|
||||
<TD ALIGN="LEFT">Comma-separated list of IOFSL server addresses.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:iofsl-manual">D.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">-</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_IOFSL_MODE"></A><TT>VT_IOFSL_MODE</TT></TD>
|
||||
<TD ALIGN="LEFT">Mode of the IOFSL communication (MULTIFILE_SPLIT, MULTIFILE)
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:iofsl-manual">D.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">MULTIFILE_SPLIT</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_IOFSL_ASYNC_IO"></A><TT>VT_IOFSL_ASYNC_IO</TT></TD>
|
||||
<TD ALIGN="LEFT">Enable buffered IOFSL writes?
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:iofsl-manual">D.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_COUNTERS"></A></TD>
|
||||
<TD ALIGN="LEFT">
|
||||
<BR>
|
||||
@ -1709,6 +1718,12 @@ of a VampirTrace instrumented executable:
|
||||
<TD ALIGN="LEFT">Separator string between counter specifications in VT_METRICS.</TD>
|
||||
<TD ALIGN="LEFT">:</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_PLUGIN_CNTR_METRICS"></A><TT>VT_PLUGIN_CNTR_METRICS</TT></TD>
|
||||
<TD ALIGN="LEFT">Colon-separated list of plugin counter metrics which shall be recorded.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:plugin_counter_metrics">4.7</A></TD>
|
||||
<TD ALIGN="LEFT">-</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_RUSAGE"></A><TT>VT_RUSAGE</TT></TD>
|
||||
<TD ALIGN="LEFT">Colon-separated list of resource usage counters which shall be recorded.
|
||||
<BR>
|
||||
@ -1719,10 +1734,41 @@ of a VampirTrace instrumented executable:
|
||||
<TD ALIGN="LEFT">Sample interval for recording resource usage counters in ms.</TD>
|
||||
<TD ALIGN="LEFT">100</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_PLUGIN_CNTR_METRICS"></A><TT>VT_PLUGIN_CNTR_METRICS</TT></TD>
|
||||
<TD ALIGN="LEFT">Colon-separated list of plugin counter metrics which shall be recorded.
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_DYNINST"></A></TD>
|
||||
<TD ALIGN="LEFT">
|
||||
<BR>
|
||||
<SPAN CLASS="textbf">Binary Instrumentation (Dyninst)</SPAN>
|
||||
<BR></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_DETACH"></A><TT>VT_DYN_DETACH</TT></TD>
|
||||
<TD ALIGN="LEFT">Detach Dyninst mutator-program <TT>vtdyn</TT> from application process?</TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_IGNORE_NODBG"></A><TT>VT_DYN_IGNORE_NODBG</TT></TD>
|
||||
<TD ALIGN="LEFT">Disable instrumentation of functions which have no debug information?</TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_INNER_LOOPS"></A><TT>VT_DYN_INNER_LOOPS</TT></TD>
|
||||
<TD ALIGN="LEFT">Instrument inner loops within outer loops?
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:plugin_counter_metrics">4.7</A></TD>
|
||||
(implies <TT>VT_DYN_OUTER_LOOPS=yes</TT>)</TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_LOOP_ITERS"></A><TT>VT_DYN_LOOP_ITERS</TT></TD>
|
||||
<TD ALIGN="LEFT">Instrument loop iterations?
|
||||
<BR>
|
||||
(implies <TT>VT_DYN_OUTER_LOOPS=yes</TT>)</TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_OUTER_LOOPS"></A><TT>VT_DYN_OUTER_LOOPS</TT></TD>
|
||||
<TD ALIGN="LEFT">Instrument outer loops within functions?</TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_SHLIBS"></A><TT>VT_DYN_SHLIBS</TT></TD>
|
||||
<TD ALIGN="LEFT">Colon-separated list of shared libraries for Dyninst instrumentation.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:dyninst">2.6</A></TD>
|
||||
<TD ALIGN="LEFT">-</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_FILTERING_GROUPING"></A></TD>
|
||||
@ -1732,20 +1778,6 @@ of a VampirTrace instrumented executable:
|
||||
<BR></TD>
|
||||
<TD></TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_SHLIBS"></A><TT>VT_DYN_SHLIBS</TT></TD>
|
||||
<TD ALIGN="LEFT">Colon-separated list of shared libraries for Dyninst instrumentation.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:dyninst">2.6</A></TD>
|
||||
<TD ALIGN="LEFT">-</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_IGNORE_NODBG"></A><TT>VT_DYN_IGNORE_NODBG</TT></TD>
|
||||
<TD ALIGN="LEFT">Disable instrumentation of functions which have no debug information?</TD>
|
||||
<TD ALIGN="LEFT">no</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_DYN_DETACH"></A><TT>VT_DYN_DETACH</TT></TD>
|
||||
<TD ALIGN="LEFT">Detach Dyninst mutator-program <TT>vtdyn</TT> from application process?</TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_FILTER_SPEC"></A><TT>VT_FILTER_SPEC</TT></TD>
|
||||
<TD ALIGN="LEFT">Name of function/region filter file.
|
||||
<BR>
|
||||
@ -1764,22 +1796,22 @@ of a VampirTrace instrumented executable:
|
||||
⇒ Section <A HREF="#sec:java_filter">5.2</A></TD>
|
||||
<TD ALIGN="LEFT">-</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_GROUP_CLASSES"></A><TT>VT_GROUP_CLASSES</TT></TD>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_JAVA_GROUP_CLASSES"></A><TT>VT_JAVA_GROUP_CLASSES</TT></TD>
|
||||
<TD ALIGN="LEFT">Create a group for each Java class automatically?</TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_ONOFF_CHECK_STACK_BALANCE"></A><TT>VT_ONOFF_CHECK_STACK_BALANCE</TT></TD>
|
||||
<TD ALIGN="LEFT">Check stack level balance when switching tracing on/off.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#subsec:onoff">2.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_MAX_STACK_DEPTH"></A><TT>VT_MAX_STACK_DEPTH</TT></TD>
|
||||
<TD ALIGN="LEFT">Maximum number of stack level to be traced.
|
||||
<BR>
|
||||
(0 = unlimited)</TD>
|
||||
<TD ALIGN="LEFT">0</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_ONOFF_CHECK_STACK_BALANCE"></A><TT>VT_ONOFF_CHECK_STACK_BALANCE</TT></TD>
|
||||
<TD ALIGN="LEFT">Check stack level balance when switching tracing on/off.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#subsec:onoff">2.4.2</A></TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_SYMBOL_LIST"></A></TD>
|
||||
<TD ALIGN="LEFT">
|
||||
<BR>
|
||||
@ -1993,7 +2025,7 @@ for the enhanced timer synchronization:
|
||||
|
||||
<UL>
|
||||
<LI>CLAPACK <A NAME="tex2html7"
|
||||
HREF="#foot1564"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1587"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
||||
</LI>
|
||||
<LI>AMD ACML
|
||||
@ -2305,6 +2337,9 @@ To enable a particular composition of CUDA measurement features the variable
|
||||
<TR><TD ALIGN="LEFT"><TT>memcpy</TT></TD>
|
||||
<TD ALIGN="LEFT">CUDA memory copies</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><TT>stream_reuse</TT></TD>
|
||||
<TD ALIGN="LEFT">force reusing of CUDA streams after cudaStreamDestroy()</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><TT>memusage</TT></TD>
|
||||
<TD ALIGN="LEFT">CUDA memory allocation</TD>
|
||||
</TR>
|
||||
@ -2405,7 +2440,7 @@ Record GPU memory usage as counter ``gpu_mem_usage``, if set to <TT>yes</TT>,
|
||||
<P>
|
||||
Every CUDA stream, which is executed on a cuda-capable device and
|
||||
used during program execution, creates an own thread. ``CUDA-Threads'' can
|
||||
contain communication and kernel events and have the following notation:
|
||||
contain CUDA communication, kernel and counter records and have the following notation:
|
||||
|
||||
<P>
|
||||
<DIV ALIGN="CENTER">
|
||||
@ -2415,9 +2450,20 @@ Every CUDA stream, which is executed on a cuda-capable device and
|
||||
|
||||
<P>
|
||||
Due to an issue with CUPTI, the device is not always properly
|
||||
shown. The CUDA stream number is increasing, beginning with the default
|
||||
stream '1'. The stream number provided by CUPTI might not be evenly increasing.
|
||||
Only streams with traceable information will be written.
|
||||
shown (<TT>device</TT> is displayed as <TT>?</TT>). The CUDA stream number is
|
||||
increasing, beginning with the default stream <TT>1</TT>. The stream number
|
||||
provided by CUPTI might not be evenly increasing. CUDA streams without records
|
||||
will not be written.
|
||||
<P></P>
|
||||
|
||||
<P>
|
||||
If CUDA libraries are used, which create CUDA streams themselves,
|
||||
many CUDA threads (CUDA streams per device) can appear in a program trace.
|
||||
In that case, it may be useful to force reusing of CUDA streams (add option
|
||||
<TT>stream_reuse</TT> to <TT>VT_GPUTRACE</TT>). This enables VampirTrace to
|
||||
reuse an existing thread buffer of a destroyed CUDA stream and therewith
|
||||
reduces the number of separate CUDA threads (or streams) in the trace. The
|
||||
CUDA stream number will then be missing in the CUDA thread notation.
|
||||
<P></P>
|
||||
|
||||
<P>
|
||||
@ -2426,10 +2472,12 @@ As kernels and asynchronous memory copies are executed asynchronously
|
||||
a synchronizing CUDA API function call or the program exits.
|
||||
Every used CUDA device and its corresponding host thread has an own buffer
|
||||
(8192 bytes by default), when CUDA tracing is done via the CUDA runtime API wrapper.
|
||||
If the buffer is full, it will be flushed immediately.
|
||||
When using CUDA tracing via CUPTI every CUDA context creation initiates the
|
||||
allocation of an own buffer (65536 bytes by default).
|
||||
allocation of an own buffer (65536 bytes by default). If the buffer is full,
|
||||
further records will be dropped and a warning will be shown in <SPAN CLASS="textit">stderr</SPAN> output.
|
||||
The buffer size can be specified in bytes with the environment variable
|
||||
<TT>VT_CUDATRACE_BUFFER_SIZE</TT>.
|
||||
<TT>VT_CUDATRACE_BUFFER_SIZE</TT>.
|
||||
<P></P>
|
||||
|
||||
<P>
|
||||
@ -2467,13 +2515,13 @@ Several new region groups have been introduced:
|
||||
<H3><A NAME="SECTION00550100000000000000">
|
||||
Tracing CUDA Runtime API via CUPTI</A>
|
||||
</H3>
|
||||
Using CUPTI to trace the CUDA runtime API and GPU activities needs the
|
||||
environment variable <TT>VT_CUDATRACE_CUPTI</TT> to be set to <TT>yes</TT>.
|
||||
By default, the library wrapper will be used. If both tracing methods are
|
||||
configured during the VampirTrace build process, the CUDA runtime library
|
||||
should be preloaded to reduce tracing overhead (<TT>LD_PRELOAD=libcudart.so</TT>).
|
||||
Otherwise the library wrapper intercepts every CUDA runtime API call and makes
|
||||
a short but unnecessary check, whether it is enabled.
|
||||
To enable CUDA runtime API tracing via CUPTI, the option <TT>cupti</TT> has to be
|
||||
added to <TT>VT_GPUTRACE</TT>. By default, the library wrapper will be used.
|
||||
If both tracing methods are configured during the VampirTrace build process,
|
||||
the CUDA runtime library should be preloaded to reduce tracing overhead
|
||||
(the dynamic linker can use <TT>LD_PRELOAD=libcudart.so</TT>). Otherwise
|
||||
the library wrapper intercepts every CUDA runtime API call and makes a short
|
||||
but unnecessary check, whether it is enabled.
|
||||
<P></P>
|
||||
|
||||
<P>
|
||||
@ -3563,7 +3611,7 @@ default: automatically by configure.
|
||||
enable support for Dyninst instrumentation,
|
||||
default: enable if found by configure.
|
||||
<SPAN CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
|
||||
HREF="#foot1586"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1609"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
|
||||
|
||||
<P>
|
||||
@ -3585,9 +3633,9 @@ enable support for automatic source code
|
||||
instrumentation by using TAU, default: enable if
|
||||
found by configure.
|
||||
<SPAN CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
|
||||
HREF="#foot1587"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1610"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
|
||||
HREF="#foot1588"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1611"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
|
||||
|
||||
<P>
|
||||
@ -4156,7 +4204,7 @@ give the path for JVMTI-include files, default:
|
||||
|
||||
<P>
|
||||
To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
|
||||
HREF="#foot1589"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1612"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
|
||||
|
||||
<P>
|
||||
@ -4570,11 +4618,19 @@ options:
|
||||
|
||||
-o, --output FILE Rewrite instrumented executable to specified pathname.
|
||||
|
||||
-f, --filter FILE Pathname of input filter file.
|
||||
|
||||
-s, --shlibs SHLIBS[,...]
|
||||
Comma-separated list of shared libraries which shall
|
||||
also be instrumented.
|
||||
|
||||
-f, --filter FILE Pathname of input filter file.
|
||||
--outer-loops Do instrument outer loops within functions.
|
||||
|
||||
--inner-loops Do instrument inner loops within outer loops.
|
||||
(implies --outer-loops)
|
||||
|
||||
--loop-iters Do instrument loop iterations.
|
||||
(implies --outer-loops)
|
||||
|
||||
--ignore-nodbg Don't instrument functions which have no debug
|
||||
information.
|
||||
@ -5278,7 +5334,7 @@ Since IOFSL servers can handle multiple clients, an N:M mapping of clients to se
|
||||
|
||||
<P>
|
||||
When using the IOFSL integration, all write requests in OTF are issued using the zoidfs API<A NAME="tex2html12"
|
||||
HREF="#foot3154"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3199"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>. Those writes are handled by the IOFSL forwarding servers and aggregated into a single file using the atomic append feature. The offset in the multifile is returned to OTF and stored in a second file, the so called index file, in order to maintain the mapping between written blocks and streams. For any block of a stream written into the multifile, the index file contains the ID of the stream, the start of the block, and its length. This allows for an efficient reading of blocks since only the index file has to be scanned for entries for a given stream ID. Additionally, a large number of logical files (streams) can be stored using only two physical files.
|
||||
|
||||
<P>
|
||||
@ -5290,7 +5346,7 @@ Installation</A>
|
||||
<P>
|
||||
In order to use this setup, IOFSL and VampirTrace have to be compiled in order.
|
||||
In the following sections, the directory <TT><install_dir></TT> should be replaced with a - possibly user-local - directory used for installation, e.g. <TT>$HOME/local</TT><A NAME="tex2html13"
|
||||
HREF="#foot3158"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3203"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
The installation procedure for IOFSL is described at https://trac.mcs.anl.gov/projects/iofsl/wiki/Building.
|
||||
Currently the <TT>iofsl_vampir</TT> git branch is required.
|
||||
@ -5469,7 +5525,7 @@ They will be launched on dedicated compute nodes that are part of the batch Job
|
||||
PBS Options</A>
|
||||
</H4>
|
||||
It is important to reserve a sufficient number of processor cores. The number of cores requested must be large enough to contain the number of application cores plus the number of cores required for the IOFSL server instances. Each IOFSL server will run on a dedicated node<A NAME="tex2html14"
|
||||
HREF="#foot3202"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3247"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.Thus N_allocated ≥((N_IOFSL * 16) + N_Application) must hold.
|
||||
<P>
|
||||
Example using 64 server instances:
|
||||
@ -5566,10 +5622,10 @@ Configuring the Server</A>
|
||||
The server is configured using a configuration file.
|
||||
At server start-up, this file is provided using the <TT>-config</TT> argument.
|
||||
The cray XK6 configuration file is provided in the package<A NAME="tex2html15"
|
||||
HREF="#foot3342"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3387"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
For more information about the options available please refer to the IOFSL documentation<A NAME="tex2html16"
|
||||
HREF="#foot3343"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot3388"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
The most important option is the <TT>serverlist</TT> entry in the <TT>bmi</TT> section which takes a list of server addresses, e.g. :
|
||||
<PRE>
|
||||
@ -5975,99 +6031,99 @@ If you provide us with your additions afterwards we will consider merging them
|
||||
into the official VampirTrace package.
|
||||
<BR><HR><H4>Footnotes</H4>
|
||||
<DL>
|
||||
<DT><A NAME="foot1526">... (OTF)</A><A
|
||||
<DT><A NAME="foot1549">... (OTF)</A><A
|
||||
HREF="#tex2html1"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.tu-dresden.de/zih/otf
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1527">... tool </A><A
|
||||
<DT><A NAME="foot1550">... tool </A><A
|
||||
HREF="#tex2html2"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.vampir.eu
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1528">...
|
||||
<DT><A NAME="foot1551">...
|
||||
Open MPI </A><A
|
||||
HREF="#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.open-mpi.org/faq/?category=vampirtrace
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1552">... documentation </A><A
|
||||
<DT><A NAME="foot1575">... documentation </A><A
|
||||
HREF="#tex2html4"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1553">...
|
||||
<DT><A NAME="foot1576">...
|
||||
Dyninst </A><A
|
||||
HREF="#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.dyninst.org
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1554">... library </A><A
|
||||
<DT><A NAME="foot1577">... library </A><A
|
||||
HREF="#tex2html6"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://sourceforge.net/projects/ctool
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1564">... CLAPACK</A><A
|
||||
<DT><A NAME="foot1587">... CLAPACK</A><A
|
||||
HREF="#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>www.netlib.org/clapack
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1586">... Dyninst </A><A
|
||||
<DT><A NAME="foot1609">... Dyninst </A><A
|
||||
HREF="#tex2html8"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.dyninst.org
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1587">... PDToolkit </A><A
|
||||
<DT><A NAME="foot1610">... PDToolkit </A><A
|
||||
HREF="#tex2html9"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.cs.uoregon.edu/research/pdt/home.php
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1588">... TAU </A><A
|
||||
<DT><A NAME="foot1611">... TAU </A><A
|
||||
HREF="#tex2html10"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://tau.uoregon.edu
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1589">... CTool </A><A
|
||||
<DT><A NAME="foot1612">... CTool </A><A
|
||||
HREF="#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://sourceforge.net/projects/ctool
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3154">... API</A><A
|
||||
<DT><A NAME="foot3199">... API</A><A
|
||||
HREF="#tex2html12"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>The OTF master control file is written using POSIX I/O in any case.
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3158">...$HOME/local</A><A
|
||||
<DT><A NAME="foot3203">...$HOME/local</A><A
|
||||
HREF="#tex2html13"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>The software packages can be installed in different directories.
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3202">... node</A><A
|
||||
<DT><A NAME="foot3247">... node</A><A
|
||||
HREF="#tex2html14"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>The server makes use of all the nodes resources by multithreading and allocating large I/O buffers
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3342">... package</A><A
|
||||
<DT><A NAME="foot3387">... package</A><A
|
||||
HREF="#tex2html15"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD><TT>tools/vtiofsl/platform/crayxk6-iofwd.cf</TT>
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot3343">... documentation</A><A
|
||||
<DT><A NAME="foot3388">... documentation</A><A
|
||||
HREF="#tex2html16"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>https://trac.mcs.anl.gov/projects/iofsl/wiki/ConfigurationFile
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -572,8 +572,13 @@ int RFG_Filter_add( RFG_Filter* filter, const char* pattern, int32_t climit,
|
||||
/* add new filter assignment */
|
||||
|
||||
filter->assigns[filter->nassigns].climit = climit;
|
||||
filter->assigns[filter->nassigns].sbounds[0] = sbounds[0];
|
||||
filter->assigns[filter->nassigns].sbounds[1] = sbounds[1];
|
||||
filter->assigns[filter->nassigns].sbounds[0] = 1;
|
||||
filter->assigns[filter->nassigns].sbounds[1] = (uint32_t)-1;
|
||||
if( sbounds )
|
||||
{
|
||||
filter->assigns[filter->nassigns].sbounds[0] = sbounds[0];
|
||||
filter->assigns[filter->nassigns].sbounds[1] = sbounds[1];
|
||||
}
|
||||
filter->assigns[filter->nassigns].flags = flags;
|
||||
filter->assigns[filter->nassigns].pattern = strdup( pattern );
|
||||
filter->nassigns++;
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -26,7 +26,6 @@
|
||||
#include "BPatch_image.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -53,19 +52,23 @@ typedef enum
|
||||
struct ParamsS
|
||||
{
|
||||
ParamsS()
|
||||
: mode(MODE_CREATE), mutatee_pid(-1), verbose_level(1),
|
||||
detach(true), ignore_no_dbg(false), show_usage(false),
|
||||
show_version(false) {}
|
||||
: mode( MODE_CREATE ), mutatee_pid( -1 ), verbose_level( 1 ),
|
||||
detach( true ), outer_loops( false ), inner_loops( false ),
|
||||
loop_iters( false ), ignore_no_dbg( false ), show_usage( false ),
|
||||
show_version( false ) {}
|
||||
|
||||
MutationT mode; // mutation mode
|
||||
std::string mutatee; // mutatee executable name
|
||||
int mutatee_pid; // mutatee PID
|
||||
std::vector<std::string> mutatee_args; // mutatee arguments
|
||||
std::vector<std::string> shlibs; // shared libs. to be instrumented
|
||||
std::vector<std::string> shlibs; // shared libs. to instrument
|
||||
std::string filtfile; // pathname of filter file
|
||||
std::string outfile; // file name of binary to rewrite
|
||||
uint32_t verbose_level; // verbose level
|
||||
bool detach; // flag: detach from mutatee?
|
||||
bool outer_loops; // flag: instrument outer loops?
|
||||
bool inner_loops; // flag: instrument inner loops?
|
||||
bool loop_iters; // flag: instrument loop iterations?
|
||||
bool ignore_no_dbg; // flag: ignore funcs. without debug?
|
||||
bool show_usage; // flag: show usage text?
|
||||
bool show_version; // flag: show VampirTrace version?
|
||||
@ -91,66 +94,155 @@ public:
|
||||
private:
|
||||
|
||||
//
|
||||
// structure that contains context information about functions to
|
||||
// be instrumented
|
||||
// base structure for regions (=functions, loop, or loop iterations)
|
||||
// to instrument
|
||||
//
|
||||
struct InstFuncS
|
||||
struct RegionS
|
||||
{
|
||||
InstFuncS( const uint32_t & _index, const std::string & _name,
|
||||
const std::string & _file, const uint32_t & _lno,
|
||||
const BPatch_Vector<BPatch_point*> *& _entry_points,
|
||||
const BPatch_Vector<BPatch_point*> *& _exit_points )
|
||||
: index( _index ), name( _name ), file( _file ), lno( _lno ),
|
||||
entry_points( _entry_points ), exit_points( _exit_points ) {}
|
||||
//
|
||||
// structure for region source code location
|
||||
//
|
||||
struct SclS
|
||||
{
|
||||
SclS() : line_number( 0 ) {}
|
||||
|
||||
// function index within region id table
|
||||
// check whether source code location is valid
|
||||
bool valid() const
|
||||
{
|
||||
return ( line_number > 0 && file_name.length() > 0 );
|
||||
}
|
||||
|
||||
std::string file_name; // source file name
|
||||
uint32_t line_number; // line number within source file
|
||||
|
||||
};
|
||||
|
||||
//
|
||||
// structure for region instrumentation points
|
||||
//
|
||||
struct InstPointsS
|
||||
{
|
||||
InstPointsS() : entries( 0 ), exits( 0 ) {}
|
||||
|
||||
const BPatch_Vector<BPatch_point*> * entries; // entry points
|
||||
const BPatch_Vector<BPatch_point*> * exits; // exit points
|
||||
|
||||
};
|
||||
|
||||
// constructor
|
||||
RegionS( const std::string & _name, const SclS & _scl,
|
||||
const InstPointsS & _inst_points );
|
||||
|
||||
// destructor
|
||||
virtual ~RegionS();
|
||||
|
||||
// new operator to check number of created regions
|
||||
// (returns 0 if VT_MAX_DYNINST_REGIONS will be exceeded)
|
||||
static inline void * operator new( size_t size ) throw();
|
||||
|
||||
// the overloaded new operator calls malloc(), so we have to have a
|
||||
// delete operator which calls free()
|
||||
static inline void operator delete( void * ptr );
|
||||
|
||||
// counter of regions to instrument (max. VT_MAX_DYNINST_REGIONS)
|
||||
static uint32_t Count;
|
||||
|
||||
// region index
|
||||
uint32_t index;
|
||||
|
||||
// function name
|
||||
// region name
|
||||
std::string name;
|
||||
|
||||
// source file name and line number of function definition
|
||||
//
|
||||
std::string file;
|
||||
uint32_t lno;
|
||||
// region source code location
|
||||
SclS scl;
|
||||
|
||||
// function entry and exit points to be instrumented
|
||||
//
|
||||
const BPatch_Vector<BPatch_point*> * entry_points;
|
||||
const BPatch_Vector<BPatch_point*> * exit_points;
|
||||
// region instrumentation points
|
||||
InstPointsS inst_points;
|
||||
|
||||
};
|
||||
|
||||
//
|
||||
// structure for loop regions to instrument
|
||||
//
|
||||
struct LoopS : RegionS
|
||||
{
|
||||
//
|
||||
// type for loop iteration regions
|
||||
//
|
||||
typedef RegionS IterationT;
|
||||
|
||||
// constructor
|
||||
LoopS( const std::string & _name, const SclS & _scl,
|
||||
const InstPointsS & _inst_points, IterationT * _iteration = 0 )
|
||||
: RegionS( _name, _scl, _inst_points ), iteration( _iteration ) {}
|
||||
|
||||
// destructor
|
||||
~LoopS()
|
||||
{
|
||||
if( iteration )
|
||||
delete iteration;
|
||||
}
|
||||
|
||||
// loop iteration region to instrument
|
||||
IterationT * iteration;
|
||||
|
||||
};
|
||||
|
||||
//
|
||||
// structure for function regions to instrument
|
||||
//
|
||||
struct FunctionS : RegionS
|
||||
{
|
||||
// constructor
|
||||
FunctionS( const std::string & _name, const SclS & _scl,
|
||||
const InstPointsS & _inst_points,
|
||||
const std::vector<LoopS*> & _loops = std::vector<LoopS*>() )
|
||||
: RegionS( _name, _scl, _inst_points ), loops( _loops ) {}
|
||||
|
||||
// loops within the function
|
||||
std::vector<LoopS*> loops;
|
||||
|
||||
// destructor
|
||||
~FunctionS()
|
||||
{
|
||||
for( uint32_t i = 0; i < loops.size(); i++ )
|
||||
delete loops[i];
|
||||
}
|
||||
};
|
||||
|
||||
// create/attach to a process or open binary for rewriting
|
||||
bool initialize();
|
||||
|
||||
// continue execution of mutatee or rewrite binary
|
||||
bool finalize( bool & error );
|
||||
|
||||
// get functions to be instrumented
|
||||
bool getFunctions( std::vector<InstFuncS> & instFuncs );
|
||||
// get functions to instrument
|
||||
bool getFunctions( std::vector<FunctionS*> & funcRegions ) const;
|
||||
|
||||
// instrument a function entry
|
||||
bool instrumentFunctionEntry( const InstFuncS & instFunc );
|
||||
// instrument functions
|
||||
bool instrumentFunctions(
|
||||
const std::vector<FunctionS*> & funcRegions ) const;
|
||||
|
||||
// instrument a function exit
|
||||
bool instrumentFunctionExit( const InstFuncS & instFunc );
|
||||
// instrument a region entry
|
||||
inline bool instrumentRegionEntry( const RegionS * region,
|
||||
const bool isLoop ) const;
|
||||
|
||||
// read input filter file
|
||||
bool readFilter();
|
||||
// instrument a region exit
|
||||
inline bool instrumentRegionExit( const RegionS * region,
|
||||
const bool isLoop ) const;
|
||||
|
||||
// check whether module is excluded from instrumentation
|
||||
inline bool constraintModule( const std::string & name ) const;
|
||||
|
||||
// check whether function is excluded from instrumentation
|
||||
inline bool constraintFunction( const std::string & name ) const;
|
||||
// check whether region is excluded from instrumentation
|
||||
inline bool constraintRegion( const std::string & name ) const;
|
||||
|
||||
// check whether mutatee uses MPI
|
||||
inline bool isMPI() const;
|
||||
|
||||
// find certain function in mutatee
|
||||
inline bool findFunction( const std::string & name,
|
||||
BPatch_function *& func ) const;
|
||||
BPatch_function *& func ) const;
|
||||
|
||||
// entire Dyninst library object
|
||||
BPatch m_bpatch;
|
||||
@ -161,7 +253,7 @@ private:
|
||||
// mutatee's image object
|
||||
BPatch_image * m_appImage;
|
||||
|
||||
// instrumentation functions to be inserted at entry/exit points
|
||||
// instrumentation functions to insert at entry/exit points
|
||||
//
|
||||
BPatch_function * m_vtStartFunc;
|
||||
BPatch_function * m_vtEndFunc;
|
||||
|
@ -42,10 +42,6 @@
|
||||
<desc>Resource Usage Counter</desc>
|
||||
<parentid>count</parentid>
|
||||
</group>
|
||||
<group identifier="Dyninst" priority="200" descname="Dyninst">
|
||||
<desc>Dyninst</desc>
|
||||
<parentid>FiltGrp</parentid>
|
||||
</group>
|
||||
<group identifier="java" priority="100" descname="Java">
|
||||
<desc> </desc>
|
||||
<parentid>FiltGrp</parentid>
|
||||
@ -66,13 +62,17 @@
|
||||
<desc>Operation Mode</desc>
|
||||
<parentid>optFeat</parentid>
|
||||
</group>
|
||||
<group identifier="Dyninst" priority="500" descname="Binary instrumentation (Dyninst)">
|
||||
<desc>Binary instrumentation (Dyninst)</desc>
|
||||
<parentid>optFeat</parentid>
|
||||
</group>
|
||||
<group identifier="mcor" priority="600" descname="MPI Correctness">
|
||||
<desc>MPI Correctness</desc>
|
||||
<parentid>optFeat</parentid>
|
||||
</group>
|
||||
<group identifier="iofsl" priority="700" descname="I/O Forwarding (IOFSL)">
|
||||
<desc>IOFSL</desc>
|
||||
<parentid>optFeat</parentid>
|
||||
<parentid>gen</parentid>
|
||||
</group>
|
||||
<group identifier="snam" priority="000" descname="Symbol Names">
|
||||
<desc>Symbol Names</desc>
|
||||
@ -572,7 +572,7 @@
|
||||
<since major="5" minor="0" patch="0" /><available>@VT_SETUP_RUSAGE@</available>
|
||||
<anchor>VT_SETUP_VT_RUSAGE_INTV</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="100" descname="Additional libraries for Dyninst binary instrumentation" isadvanced="1" group="Dyninst"
|
||||
<env isdeprecated="0" priority="100" descname="Additional libraries to instrument" isadvanced="1" group="Dyninst"
|
||||
envname="VT_DYN_SHLIBS">
|
||||
<desc>Colon-separeted list of shared libraries for Dyninst instrumentation.</desc>
|
||||
<valuedesc>
|
||||
@ -594,6 +594,39 @@
|
||||
<since major="5" minor="11" patch="0" /><available>@VT_SETUP_DYNINST@</available>
|
||||
<anchor>VT_SETUP_VT_DYN_IGNORE_NODBG</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="100" descname="Instrumentation of outer loops within functions" isadvanced="1" group="Dyninst"
|
||||
envname="VT_DYN_OUTER_LOOPS">
|
||||
<desc>Enable / disable instrumentation of outer loops within functions.</desc>
|
||||
<valuedesc>
|
||||
<value type="bool">
|
||||
<default>0</default>
|
||||
</value>
|
||||
</valuedesc>
|
||||
<since major="5" minor="13" patch="0" /><available>@VT_SETUP_DYNINST@</available>
|
||||
<anchor>VT_SETUP_VT_DYN_OUTER_LOOPS</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="100" descname="Instrumentation of inner loops within outer loops" isadvanced="1" group="Dyninst"
|
||||
envname="VT_DYN_INNER_LOOPS">
|
||||
<desc>Enable / disable instrumentation of inner loops within outer loops.</desc>
|
||||
<valuedesc>
|
||||
<value type="bool">
|
||||
<default>0</default>
|
||||
</value>
|
||||
</valuedesc>
|
||||
<since major="5" minor="13" patch="0" /><available>@VT_SETUP_DYNINST@</available>
|
||||
<anchor>VT_SETUP_VT_DYN_INNER_LOOPS</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="100" descname="Instrumentation of loop iterations" isadvanced="1" group="Dyninst"
|
||||
envname="VT_DYN_OUTER_LOOPS">
|
||||
<desc>Enable / disable instrumentation of loop iterations.</desc>
|
||||
<valuedesc>
|
||||
<value type="bool">
|
||||
<default>0</default>
|
||||
</value>
|
||||
</valuedesc>
|
||||
<since major="5" minor="13" patch="0" /><available>@VT_SETUP_DYNINST@</available>
|
||||
<anchor>VT_SETUP_VT_DYN_LOOP_ITERS</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="100" descname="Detach Dyninst mutator-program from application process" isadvanced="1" group="Dyninst"
|
||||
envname="VT_DYN_DETACH">
|
||||
<desc>Whether to detach the Dyninst mutator-program (vtdyn) from the application process.</desc>
|
||||
@ -641,7 +674,7 @@
|
||||
<anchor>VT_SETUP_VT_JAVA_FILTER_SPEC</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="100" descname="Java class grouping" isadvanced="1" group="java"
|
||||
envname="VT_GROUP_CLASSES">
|
||||
envname="VT_JAVA_GROUP_CLASSES">
|
||||
<desc>Whether to create a group for each Java class automatically or not.</desc>
|
||||
<valuedesc>
|
||||
<value type="bool">
|
||||
@ -649,7 +682,7 @@
|
||||
</value>
|
||||
</valuedesc>
|
||||
<since major="5" minor="0" patch="0" /><available>@VT_SETUP_JAVA@</available>
|
||||
<anchor>VT_SETUP_VT_GROUP_CLASSES</anchor>
|
||||
<anchor>VT_SETUP_VT_JAVA_GROUP_CLASSES</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="100" descname="Check stack level balance when switching tracing on/off" isadvanced="1" group="FiltGrpMisc"
|
||||
envname="VT_ONOFF_CHECK_STACK_BALANCE">
|
||||
|
@ -79,6 +79,13 @@ static void showUsage( void );
|
||||
static bool shareUnifyControls( void );
|
||||
#endif // VT_MPI
|
||||
|
||||
// local variables
|
||||
//
|
||||
|
||||
// output stream for verbose messages
|
||||
// (stdout if vtunify is started from the command line, otherwise stderr)
|
||||
static FILE * verboseStream = stderr;
|
||||
|
||||
// global variables
|
||||
//
|
||||
|
||||
@ -207,6 +214,13 @@ VTUNIFY_MAIN( int argc, char ** argv )
|
||||
break;
|
||||
}
|
||||
|
||||
#ifndef VT_LIB
|
||||
// set output stream for verbose messages to stdout, if vtunify is
|
||||
// started from the command line
|
||||
if( !Params.autostart )
|
||||
verboseStream = stdout;
|
||||
#endif // VT_LIB
|
||||
|
||||
// register hook classes
|
||||
theHooks->registerHooks();
|
||||
|
||||
@ -1301,6 +1315,14 @@ parseCommandLine( int argc, char ** argv )
|
||||
{
|
||||
Params.verbose_level++;
|
||||
}
|
||||
#ifndef VT_LIB
|
||||
// --autostart (hidden)
|
||||
//
|
||||
else if( strcmp( argv[i], "--autostart" ) == 0 )
|
||||
{
|
||||
Params.autostart = true;
|
||||
}
|
||||
#endif // VT_LIB
|
||||
// input trace file prefix
|
||||
//
|
||||
else if( Params.in_file_prefix.length() == 0 )
|
||||
@ -1709,6 +1731,13 @@ shareParams()
|
||||
7, MPI_CHAR, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
|
||||
#ifndef VT_LIB
|
||||
// autostart
|
||||
//
|
||||
CALL_MPI( MPI_Pack_size( 1, MPI_CHAR, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
#endif // VT_LIB
|
||||
|
||||
#if defined(HAVE_IOFSL) && HAVE_IOFSL
|
||||
// iofsl_mode + iofsl_flags + iofsl_num_servers
|
||||
//
|
||||
@ -1821,6 +1850,12 @@ shareParams()
|
||||
CALL_MPI( MPI_Pack( &(Params.onlystats), 1, MPI_CHAR, buffer,
|
||||
buffer_size, &position, MPI_COMM_WORLD ) );
|
||||
|
||||
#ifndef VT_LIB
|
||||
// autostart
|
||||
CALL_MPI( MPI_Pack( &(Params.autostart), 1, MPI_CHAR, buffer,
|
||||
buffer_size, &position, MPI_COMM_WORLD ) );
|
||||
#endif // VT_LIB
|
||||
|
||||
#if defined(HAVE_IOFSL) && HAVE_IOFSL
|
||||
// iofsl_mode
|
||||
CALL_MPI( MPI_Pack( &(Params.iofsl_mode), 1, MPI_UNSIGNED, buffer,
|
||||
@ -1961,6 +1996,12 @@ shareParams()
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &position, &(Params.onlystats),
|
||||
1, MPI_CHAR, MPI_COMM_WORLD ) );
|
||||
|
||||
#ifndef VT_LIB
|
||||
// autostart
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &position, &(Params.autostart),
|
||||
1, MPI_CHAR, MPI_COMM_WORLD ) );
|
||||
#endif // VT_LIB
|
||||
|
||||
#if defined(HAVE_IOFSL) && HAVE_IOFSL
|
||||
// iofsl_mode
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &position,
|
||||
@ -2535,7 +2576,7 @@ VPrint( uint8_t level, const char * fmt, ... )
|
||||
#endif // VT_UNIFY_VERBOSE_TIME_PREFIX
|
||||
|
||||
va_start( ap, fmt );
|
||||
vprintf( fmt, ap );
|
||||
vfprintf( verboseStream, fmt, ap );
|
||||
va_end( ap );
|
||||
} // MASTER
|
||||
}
|
||||
@ -2559,7 +2600,7 @@ PVPrint( uint8_t level, const char * fmt, ... )
|
||||
|
||||
va_start( ap, fmt );
|
||||
#if !(defined(VT_MPI) || (defined(HAVE_OMP) && HAVE_OMP))
|
||||
vprintf( fmt, ap );
|
||||
vfprintf( verboseStream, fmt, ap );
|
||||
#else // !(VT_MPI || HAVE_OMP)
|
||||
char msg[STRBUFSIZE] = "";
|
||||
# if defined(VT_MPI) && !(defined(HAVE_OMP) && HAVE_OMP)
|
||||
@ -2582,7 +2623,7 @@ PVPrint( uint8_t level, const char * fmt, ... )
|
||||
# if defined(HAVE_OMP) && HAVE_OMP
|
||||
# pragma omp critical
|
||||
# endif // HAVE_OMP
|
||||
printf( "%s", msg );
|
||||
fprintf( verboseStream, "%s", msg );
|
||||
#endif // !(VT_MPI || HAVE_OMP)
|
||||
va_end( ap );
|
||||
}
|
||||
|
@ -104,6 +104,10 @@ struct ParamsS
|
||||
showusage( false ), showversion( false ), showprogress( false ),
|
||||
bequiet( false ), onlystats( false )
|
||||
{
|
||||
#ifndef VT_LIB
|
||||
autostart = false;
|
||||
#endif // VT_LIB
|
||||
|
||||
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||
docompress = true;
|
||||
#endif // HAVE_ZLIB
|
||||
@ -150,6 +154,11 @@ struct ParamsS
|
||||
bool bequiet; // flag: print no messages?
|
||||
bool onlystats; // flag: unify only summarized information?
|
||||
|
||||
#ifndef VT_LIB
|
||||
bool autostart; // flag: vtunify started automatically after
|
||||
// program termination?
|
||||
#endif // VT_LIB
|
||||
|
||||
#if defined(HAVE_IOFSL) && HAVE_IOFSL
|
||||
// IOFSL parameters
|
||||
//
|
||||
|
@ -59,7 +59,8 @@ vtdata_DATA = \
|
||||
@CROSS_PREFIX@vtc++-wrapper-data.txt \
|
||||
@CROSS_PREFIX@vtfort-wrapper-data.txt \
|
||||
$(vtdata_DATA_vtnvcc) \
|
||||
$(vtdata_DATA_tauinst_spec)
|
||||
$(vtdata_DATA_tauinst_spec) \
|
||||
omp.h
|
||||
vtdatadir = $(datadir)
|
||||
|
||||
install-data-hook-always:
|
||||
@ -177,6 +178,7 @@ LIBS = $(LIBS_FOR_BUILD)
|
||||
CLEANFILES = $(CROSSPREFIXCLEANFILES)
|
||||
|
||||
EXTRA_DIST = \
|
||||
omp.h \
|
||||
TAUINST.SPEC \
|
||||
TAUINST_MACOS.SPEC
|
||||
|
||||
|
6
ompi/contrib/vt/vt/tools/vtwrapper/omp.h
Обычный файл
6
ompi/contrib/vt/vt/tools/vtwrapper/omp.h
Обычный файл
@ -0,0 +1,6 @@
|
||||
/**
|
||||
* This header file temporary replaces the original omp.h when the VT compiler
|
||||
* wrapper is preprocessing a source file before it will be parsed by OPARI.
|
||||
* This is necessary to prevent multiple declarations of OpenMP functions,
|
||||
* types, etc. which results in various compiler errors.
|
||||
**/
|
@ -1344,10 +1344,14 @@ doWrap()
|
||||
|
||||
files_to_remove.push_back( cpp_file );
|
||||
|
||||
// add macro definition '_OPENMP' to preprocessor flags, if OpenMP
|
||||
// is enabled
|
||||
// add path to empty omp.h and macro definition '_OPENMP' to preprocessor
|
||||
// flags, if OpenMP is enabled
|
||||
if( Config.uses_openmp )
|
||||
Config.addPrepFlag( "-D_OPENMP" );
|
||||
{
|
||||
Config.addPrepFlag( std::string( "-I" ) +
|
||||
vt_installdirs_get( VT_INSTALLDIR_DATADIR ) +
|
||||
" -D_OPENMP" );
|
||||
}
|
||||
|
||||
// compose C preprocessor command
|
||||
//
|
||||
|
@ -26,6 +26,6 @@ tauinst_bin=@VT_WRAPPER_TAUINST_BIN@
|
||||
tauinst_opts=@VT_WRAPPER_NVCC_TAUINST_OPTS@
|
||||
tauinst_parse_bin=@VT_WRAPPER_NVCC_TAUINST_PARSE_BIN@
|
||||
tauinst_parse_opts=@VT_WRAPPER_NVCC_TAUINST_PARSE_OPTS@
|
||||
inst_avail=@VT_WRAPPER_AVAIL_INST@
|
||||
inst_default=@VT_WRAPPER_DEFAULT_INST@
|
||||
inst_avail=@VT_WRAPPER_NVCC_AVAIL_INST@
|
||||
inst_default=@VT_WRAPPER_NVCC_DEFAULT_INST@
|
||||
partype_default=@VT_WRAPPER_NVCC_DEFAULT_PARTYPE@
|
||||
|
@ -373,6 +373,7 @@ typedef struct vtcudaStream
|
||||
uint32_t tid; /**< VT thread id for this stream (unique) */
|
||||
cudaEvent_t lastEvt; /**< last written CUDA event (needed in flush) */
|
||||
uint64_t lastVTTime; /**< last written VampirTrace time */
|
||||
uint8_t destroyed; /**< Is stream destroyed? Ready for reuse? */
|
||||
struct vtcudaStream *next; /**< points to next cuda stream in list */
|
||||
}VTCUDAStrm;
|
||||
|
||||
@ -1483,26 +1484,44 @@ static VTCUDADevice* VTCUDAcheckThread(cudaStream_t cuStrm, uint32_t ptid,
|
||||
while(vtDev != NULL){
|
||||
if(vtDev->device == device && vtDev->ptid == ptid){
|
||||
/* the CUDA device is already listed -> stream 0 exists */
|
||||
VTCUDAStrm *curStrm, *ptrLastStrm;
|
||||
VTCUDAStrm *curStrm, *ptrLastStrm, *reusableStrm;
|
||||
reusableStrm = NULL;
|
||||
curStrm = vtDev->strmList;
|
||||
|
||||
CUDARTWRAP_UNLOCK();
|
||||
do{
|
||||
/* check for existing stream */
|
||||
if(cuStrm == curStrm->stream){
|
||||
*vtStrm = curStrm;
|
||||
time_check = vt_pform_wtime();
|
||||
vt_exit(ptid, &time_check);
|
||||
return vtDev;
|
||||
}
|
||||
|
||||
/* check for reusable stream */
|
||||
if(vt_gpu_stream_reuse && reusableStrm == NULL && curStrm->destroyed == 1){
|
||||
reusableStrm = curStrm;
|
||||
}
|
||||
|
||||
ptrLastStrm = curStrm;
|
||||
curStrm = curStrm->next;
|
||||
}while(curStrm != NULL);
|
||||
/* stream not found */
|
||||
|
||||
/* append newly created stream (stream 0 is probably used most, will
|
||||
therefore always be the first element in the list */
|
||||
ptrLastStrm->next = VTCUDAcreateStream(vtDev, cuStrm);
|
||||
*vtStrm = ptrLastStrm->next;
|
||||
/* reuse a destroyed stream, if available */
|
||||
if(vt_gpu_stream_reuse && reusableStrm){
|
||||
vt_cntl_msg(2, "[CUDART] Reusing CUDA stream %d for stream %d",
|
||||
reusableStrm->stream, cuStrm);
|
||||
|
||||
reusableStrm->destroyed = 0;
|
||||
reusableStrm->stream = cuStrm;
|
||||
*vtStrm = reusableStrm;
|
||||
}else{
|
||||
/* append newly created stream (stream 0 is probably used most, will
|
||||
therefore always be the first element in the list */
|
||||
ptrLastStrm->next = VTCUDAcreateStream(vtDev, cuStrm);
|
||||
*vtStrm = ptrLastStrm->next;
|
||||
}
|
||||
|
||||
time_check = vt_pform_wtime();
|
||||
vt_exit(ptid, &time_check);
|
||||
@ -1524,7 +1543,7 @@ static VTCUDADevice* VTCUDAcheckThread(cudaStream_t cuStrm, uint32_t ptid,
|
||||
|
||||
time_check = vt_pform_wtime();
|
||||
vt_exit(ptid, &time_check);
|
||||
|
||||
|
||||
return vtDev;
|
||||
}
|
||||
|
||||
@ -1587,6 +1606,7 @@ static VTCUDAMemcpy* addMemcpy2Buf(enum cudaMemcpyKind kind, int count,
|
||||
ptid = VT_MY_THREAD;
|
||||
vtDev = VTCUDAcheckThread(stream, ptid, &ptrStrm);
|
||||
|
||||
|
||||
/* check if there is enough buffer space */
|
||||
if(vtDev->buf_pos + sizeof(VTCUDAMemcpy) > vtDev->conf_stack){
|
||||
VTCUDAflush(vtDev, ptid);
|
||||
@ -2677,8 +2697,52 @@ cudaError_t cudaLaunch(const char *entry)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaThreadExit -- */
|
||||
/* -- cuda_runtime_api.h:cudaStreamDestroy -- */
|
||||
cudaError_t cudaStreamDestroy(cudaStream_t stream)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaStreamDestroy",
|
||||
cudaError_t , (cudaStream_t ),
|
||||
NULL, 0);
|
||||
|
||||
/* find the CUDA stream and mark it as destroyed */
|
||||
if(vt_cudart_trace_enabled){
|
||||
if(vt_gpu_stream_reuse){
|
||||
uint32_t ptid;
|
||||
VTCUDADevice *vtDev;
|
||||
VTCUDAStrm *vtStrm = NULL;
|
||||
|
||||
VT_CHECK_THREAD;
|
||||
ptid = VT_MY_THREAD;
|
||||
|
||||
vtDev = VTCUDAgetDevice(ptid);
|
||||
if(vtDev)
|
||||
vtStrm = vtDev->strmList;
|
||||
|
||||
while(vtStrm != NULL){
|
||||
|
||||
if(stream == vtStrm->stream){
|
||||
vtStrm->destroyed = 1;
|
||||
vt_cntl_msg(2, "cudaStreamDestroy() called, reusing stream %d", stream);
|
||||
break;
|
||||
}
|
||||
|
||||
vtStrm = vtStrm->next;
|
||||
}
|
||||
}
|
||||
|
||||
VT_LIBWRAP_FUNC_START(vt_cudart_lw); /* no extra if(trace_enabled) */
|
||||
}
|
||||
|
||||
ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (stream));
|
||||
|
||||
CUDARTWRAP_FUNC_END(vt_cudart_lw);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaThreadExit -- */
|
||||
cudaError_t cudaThreadExit()
|
||||
{
|
||||
cudaError_t ret;
|
||||
@ -2711,7 +2775,6 @@ cudaError_t cudaThreadExit()
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaThreadSynchronize -- */
|
||||
|
||||
cudaError_t cudaThreadSynchronize()
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
@ -533,24 +533,6 @@ cudaError_t cudaStreamCreate(cudaStream_t *pStream)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaStreamDestroy -- */
|
||||
cudaError_t cudaStreamDestroy(cudaStream_t stream)
|
||||
{
|
||||
cudaError_t ret;
|
||||
|
||||
CUDARTWRAP_FUNC_INIT(vt_cudart_lw, vt_cudart_lw_attr, "cudaStreamDestroy",
|
||||
cudaError_t , (cudaStream_t ),
|
||||
NULL, 0);
|
||||
|
||||
CUDARTWRAP_FUNC_START(vt_cudart_lw);
|
||||
|
||||
ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (stream));
|
||||
|
||||
CUDARTWRAP_FUNC_END(vt_cudart_lw);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* -- cuda_runtime_api.h:cudaStreamSynchronize -- */
|
||||
cudaError_t cudaStreamSynchronize(cudaStream_t stream)
|
||||
{
|
||||
|
@ -79,6 +79,7 @@ typedef struct vt_cuptiact_strm_st
|
||||
uint32_t strmID; /**< the CUDA stream */
|
||||
uint32_t vtThrdID; /**< VT thread id for this stream (unique) */
|
||||
uint64_t vtLastTime; /**< last written VampirTrace timestamp */
|
||||
uint8_t destroyed; /**< Is stream destroyed? Ready for reuse? */
|
||||
struct vt_cuptiact_strm_st *next;
|
||||
}vt_cuptiact_strm_t;
|
||||
|
||||
@ -315,18 +316,26 @@ static vt_cuptiact_strm_t* vt_cuptiact_createStream(vt_cuptiact_ctx_t *vtCtx,
|
||||
vt_error_msg("[CUPTI Activity] Could not allocate memory for stream!");
|
||||
vtStrm->strmID = strmID;
|
||||
vtStrm->vtLastTime = vt_gpu_init_time;
|
||||
vtStrm->destroyed = 0;
|
||||
vtStrm->next = NULL;
|
||||
|
||||
/* create VT-User-Thread with name and parent id and get its id */
|
||||
{
|
||||
char thread_name[16];
|
||||
|
||||
/* create VT-User-Thread with name and parent id and get its id */
|
||||
if(vtCtx->devID == VT_NO_ID){
|
||||
if(-1 == snprintf(thread_name, 15, "CUDA[?:%d]", strmID))
|
||||
vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
|
||||
char thread_name[16] = "CUDA";
|
||||
|
||||
if(vt_gpu_stream_reuse){
|
||||
if(vtCtx->devID != VT_NO_ID){
|
||||
if(-1 == snprintf(thread_name+4, 12, "[%d]", vtCtx->devID))
|
||||
vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
|
||||
}
|
||||
}else{
|
||||
if(-1 == snprintf(thread_name, 15, "CUDA[%d:%d]", vtCtx->devID, strmID))
|
||||
vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
|
||||
if(vtCtx->devID == VT_NO_ID){
|
||||
if(-1 == snprintf(thread_name+4, 12, "[?:%d]", strmID))
|
||||
vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
|
||||
}else{
|
||||
if(-1 == snprintf(thread_name+4, 12, "[%d:%d]", vtCtx->devID, strmID))
|
||||
vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
|
||||
}
|
||||
}
|
||||
|
||||
VT_CHECK_THREAD;
|
||||
@ -500,9 +509,10 @@ static vt_cuptiact_strm_t* vt_cuptiact_checkStream(vt_cuptiact_ctx_t* vtCtx,
|
||||
{
|
||||
vt_cuptiact_strm_t *currStrm = NULL;
|
||||
vt_cuptiact_strm_t *lastStrm = NULL;
|
||||
vt_cuptiact_strm_t *reusableStrm = NULL;
|
||||
|
||||
if(vtCtx == NULL){
|
||||
vt_warning("[CUPTI Activity] No context given!");
|
||||
vt_warning("[CUPTI Activity] No context given in vt_cuptiact_checkStream()!");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -511,14 +521,31 @@ static vt_cuptiact_strm_t* vt_cuptiact_checkStream(vt_cuptiact_ctx_t* vtCtx,
|
||||
currStrm = vtCtx->strmList;
|
||||
lastStrm = vtCtx->strmList;
|
||||
while(currStrm != NULL){
|
||||
/* check for existing stream */
|
||||
if(currStrm->strmID == strmID){
|
||||
/*VT_CUPTI_ACT_UNLOCK();*/
|
||||
return currStrm;
|
||||
}
|
||||
|
||||
/* check for reusable stream */
|
||||
if(vt_gpu_stream_reuse && reusableStrm == NULL && currStrm->destroyed == 1){
|
||||
reusableStrm = currStrm;
|
||||
}
|
||||
|
||||
lastStrm = currStrm;
|
||||
currStrm = currStrm->next;
|
||||
}
|
||||
|
||||
/* reuse a destroyed stream, if one is available */
|
||||
if(vt_gpu_stream_reuse && reusableStrm){
|
||||
vt_cntl_msg(2, "[CUPTI Activity] Reusing CUDA stream %d with stream %d",
|
||||
reusableStrm->strmID, strmID);
|
||||
reusableStrm->destroyed = 0;
|
||||
reusableStrm->strmID = strmID;
|
||||
|
||||
return reusableStrm;
|
||||
}
|
||||
|
||||
/*
|
||||
* If stream list is empty, the stream to be created is not the default
|
||||
* stream and GPU idle and memory copy tracing is enabled, then create
|
||||
@ -541,6 +568,30 @@ static vt_cuptiact_strm_t* vt_cuptiact_checkStream(vt_cuptiact_ctx_t* vtCtx,
|
||||
return currStrm;
|
||||
}
|
||||
|
||||
void vt_cuptiact_markStreamAsDestroyed(CUcontext cuCtx, uint32_t strmID)
|
||||
{
|
||||
vt_cuptiact_ctx_t *vtCtx = vt_cuptiact_getCtx(cuCtx);
|
||||
vt_cuptiact_strm_t *currStrm = NULL;
|
||||
|
||||
if(vtCtx == NULL){
|
||||
vt_warning("[CUPTI Activity] No context given in "
|
||||
"vt_cuptiact_markStreamAsDestroyed()!");
|
||||
return;
|
||||
}
|
||||
|
||||
VT_CUPTI_ACT_LOCK();
|
||||
currStrm = vtCtx->strmList;
|
||||
while(currStrm != NULL){
|
||||
if(currStrm->strmID == strmID){
|
||||
currStrm->destroyed = 1;
|
||||
VT_CUPTI_ACT_UNLOCK();
|
||||
return;
|
||||
}
|
||||
currStrm = currStrm->next;
|
||||
}
|
||||
VT_CUPTI_ACT_UNLOCK();
|
||||
}
|
||||
|
||||
void vt_cuptiact_flushCtxActivities(CUcontext cuCtx)
|
||||
{
|
||||
CUptiResult status;
|
||||
|
@ -59,6 +59,15 @@ EXTERN void vt_cuptiact_addContext(CUcontext cuCtx, CUdevice cuDev);
|
||||
*/
|
||||
EXTERN void vt_cuptiact_flushCtxActivities(CUcontext cuCtx);
|
||||
|
||||
/*
|
||||
* Mark a CUDA stream as destroyed, so that it can be reused afterwards.
|
||||
*
|
||||
* @param cuCtx CUDA context, which contains the stream
|
||||
* @param strmID the CUDA stream ID to be marked as destroyed
|
||||
*
|
||||
*/
|
||||
EXTERN void vt_cuptiact_markStreamAsDestroyed(CUcontext cuCtx, uint32_t strmID);
|
||||
|
||||
|
||||
EXTERN void vt_cuptiact_writeMalloc(uint32_t ctxID, CUcontext cuCtx,
|
||||
void *devPtr, size_t size);
|
||||
|
@ -161,6 +161,9 @@ uint32_t vt_cupti_cid_threadsPerKernel = VT_NO_ID;
|
||||
static uint8_t vt_cupticb_initialized = 0;
|
||||
static uint8_t vt_cupticb_finalized = 0;
|
||||
|
||||
/* VampirTrace region ID for synchronization of host and CUDA device*/
|
||||
static uint32_t vt_cupticb_rid_sync = VT_NO_ID;
|
||||
|
||||
#if !defined(VT_CUPTI_ACTIVITY)
|
||||
|
||||
/* list of VampirTrace CUDA contexts */
|
||||
@ -174,9 +177,6 @@ static vt_cupticb_ctx_t* vt_cupticb_ctxList = NULL;
|
||||
* 2 show synchronization in extra region group to get host wait time
|
||||
*/
|
||||
static uint8_t vt_cupticb_syncLevel = 3;
|
||||
|
||||
/* VampirTrace region ID for synchronization of host and CUDA device*/
|
||||
static uint32_t vt_cupticb_rid_sync = VT_NO_ID;
|
||||
#endif /* !defined(VT_CUPTI_ACTIVITY) */
|
||||
|
||||
/**************** The callback functions to be registered *********************/
|
||||
@ -1091,38 +1091,85 @@ void vt_cupticb_resource(CUpti_CallbackId cbid,
|
||||
/********************** CUDA memory allocation ******************************/
|
||||
case CUPTI_CBID_RESOURCE_CONTEXT_CREATED: {
|
||||
vt_cntl_msg(2, "[CUPTI Callbacks] Creating context %d", resData->context);
|
||||
if(vt_cupticb_trace_driverAPI)
|
||||
|
||||
/* add the context without tracing CUDA driver API calls, if enabled */
|
||||
if(vt_cupticb_trace_driverAPI){
|
||||
cuptiEnableDomain(0, vt_cupticb_subscriber, CUPTI_CB_DOMAIN_DRIVER_API);
|
||||
vt_cuptiact_addContext(resData->context, (CUdevice)-1);
|
||||
if(vt_cupticb_trace_driverAPI)
|
||||
vt_cuptiact_addContext(resData->context, (CUdevice)-1);
|
||||
cuptiEnableDomain(1, vt_cupticb_subscriber, CUPTI_CB_DOMAIN_DRIVER_API);
|
||||
}else{
|
||||
vt_cuptiact_addContext(resData->context, (CUdevice)-1);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING: {
|
||||
vt_cntl_msg(2, "[CUPTI Callbacks] Destroying context");
|
||||
|
||||
/* Only flush the activities of the context. The user code has to ensure,
|
||||
that the context is synchronized */
|
||||
vt_cuptiact_flushCtxActivities(resData->context);
|
||||
|
||||
break;
|
||||
}
|
||||
/*
|
||||
|
||||
case CUPTI_CBID_RESOURCE_STREAM_CREATED: {
|
||||
resData->context;
|
||||
resData->resourceHandle.stream;
|
||||
vt_cntl_msg(1, "[CUPTI Callbacks] Creating stream");
|
||||
if(vt_gpu_stream_reuse){
|
||||
uint32_t strmID;
|
||||
|
||||
VT_CUPTI_CALL(cuptiGetStreamId(resData->context,
|
||||
resData->resourceHandle.stream,
|
||||
&strmID),
|
||||
"cuptiGetStreamId");
|
||||
|
||||
vt_cntl_msg(2, "[CUPTI Callbacks] Creating stream %d (context %d)",
|
||||
strmID, resData->context);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING: {
|
||||
resData->context;
|
||||
resData->resourceHandle.stream;
|
||||
vt_cntl_msg(1, "[CUPTI Callbacks] Destroying stream");
|
||||
case CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING: {
|
||||
if(vt_gpu_stream_reuse){
|
||||
uint32_t ptid, strmID;
|
||||
uint64_t time;
|
||||
|
||||
VT_CHECK_THREAD;
|
||||
ptid = VT_MY_THREAD;
|
||||
|
||||
time = vt_pform_wtime();
|
||||
vt_enter(ptid, &time, vt_cupticb_rid_sync);
|
||||
|
||||
/* implicitly flush context activities via cuCtxSynchronize() */
|
||||
if(vt_cupticb_trace_driverAPI){
|
||||
cuptiEnableDomain(0, vt_cupticb_subscriber, CUPTI_CB_DOMAIN_DRIVER_API);
|
||||
CHECK_CU_ERROR(cuCtxSynchronize(), NULL);
|
||||
cuptiEnableDomain(1, vt_cupticb_subscriber, CUPTI_CB_DOMAIN_DRIVER_API);
|
||||
}else{
|
||||
CHECK_CU_ERROR(cuCtxSynchronize(), NULL);
|
||||
}
|
||||
|
||||
time = vt_pform_wtime();
|
||||
vt_exit(ptid, &time);
|
||||
|
||||
/* get the stream id from stream type */
|
||||
VT_CUPTI_CALL(cuptiGetStreamId(resData->context,
|
||||
resData->resourceHandle.stream,
|
||||
&strmID),
|
||||
"cuptiGetStreamId");
|
||||
|
||||
/* mark the stream as destroyed to be available for reuse */
|
||||
vt_cuptiact_markStreamAsDestroyed(resData->context,
|
||||
strmID);
|
||||
|
||||
vt_cntl_msg(2, "[CUPTI Callbacks] Destroying stream %d (context %d)",
|
||||
strmID, resData->context);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
*/
|
||||
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
@ -1688,11 +1735,16 @@ void vt_cupti_callback_init()
|
||||
vt_cupti_cgid_cuda_kernel, 0);
|
||||
}
|
||||
|
||||
#if !defined(VT_CUPTI_ACTIVITY)
|
||||
#if defined(VT_CUPTI_ACTIVITY)
|
||||
if(vt_gpu_stream_reuse){
|
||||
vt_cupticb_rid_sync = vt_def_region(VT_MASTER_THREAD, "cudaSynchronize",
|
||||
VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "CUDA_SYNC", VT_FUNCTION);
|
||||
}
|
||||
#else
|
||||
if(vt_gpu_trace_kernels > 0 || vt_gpu_trace_mcpy){
|
||||
vt_cupticb_syncLevel = (uint8_t)vt_env_cudatrace_sync();
|
||||
vt_cupticb_rid_sync = vt_def_region(VT_MASTER_THREAD, "cudaSynchronize",
|
||||
VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "CUDA_SYNC", VT_FUNCTION);
|
||||
VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "CUDA_SYNC", VT_FUNCTION);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -37,7 +37,8 @@ static uint32_t* rtab = NULL; /* region id lookup table */
|
||||
* Register new region
|
||||
*/
|
||||
|
||||
static void register_region(uint32_t* rid, char* func, char* file, int lno)
|
||||
static uint32_t register_region(const char* name, const char* file,
|
||||
uint32_t lno, uint32_t loop)
|
||||
{
|
||||
uint32_t fid;
|
||||
|
||||
@ -54,11 +55,12 @@ static void register_region(uint32_t* rid, char* func, char* file, int lno)
|
||||
}
|
||||
|
||||
/* Register region and store region identifier */
|
||||
*rid = vt_def_region(VT_CURRENT_THREAD, func, fid, lno, VT_NO_LNO, NULL,
|
||||
VT_FUNCTION);
|
||||
return vt_def_region(VT_CURRENT_THREAD, name, fid, lno, VT_NO_LNO, NULL,
|
||||
loop ? VT_LOOP : VT_FUNCTION);
|
||||
}
|
||||
|
||||
void VT_Dyn_start(uint32_t index, char* name, char* fname, int lno);
|
||||
void VT_Dyn_start(uint32_t index, const char* name, const char* fname,
|
||||
uint32_t lno, uint32_t loop);
|
||||
void VT_Dyn_end(uint32_t index);
|
||||
void VT_Dyn_attach(void);
|
||||
void VT_Dyn_finalize(void);
|
||||
@ -67,7 +69,8 @@ void VT_Dyn_finalize(void);
|
||||
* This function is called at the entry of each function
|
||||
*/
|
||||
|
||||
void VT_Dyn_start(uint32_t index, char* name, char* fname, int lno)
|
||||
void VT_Dyn_start(uint32_t index, const char* name, const char* fname,
|
||||
uint32_t lno, uint32_t loop)
|
||||
{
|
||||
uint64_t time;
|
||||
uint32_t* rid;
|
||||
@ -107,10 +110,10 @@ void VT_Dyn_start(uint32_t index, char* name, char* fname, int lno)
|
||||
#if (defined(VT_MT) || defined(VT_HYB))
|
||||
VTTHRD_LOCK_IDS();
|
||||
if ( *rid == 0 )
|
||||
register_region(rid, name, fname, lno);
|
||||
*rid = register_region(name, fname, lno, loop);
|
||||
VTTHRD_UNLOCK_IDS();
|
||||
#else /* VT_MT || VT_HYB */
|
||||
register_region(rid, name, fname, lno);
|
||||
*rid = register_region(name, fname, lno, loop);
|
||||
#endif /* VT_MT || VT_HYB */
|
||||
}
|
||||
|
||||
@ -235,12 +238,17 @@ void VT_Dyn_attach()
|
||||
shlibs_arg[strlen(shlibs_arg)-1] = '\0';
|
||||
}
|
||||
|
||||
snprintf(cmd, sizeof(cmd)-1, "%s/vtdyn %s %s %s %s %s %s %s %s -p %i %s",
|
||||
snprintf(cmd,
|
||||
sizeof(cmd)-1, "%s/vtdyn %s %s %s %s %s %s %s %s %s %s %s "
|
||||
"-p %i %s",
|
||||
vt_installdirs_get(VT_INSTALLDIR_BINDIR),
|
||||
(vt_env_verbose() == 0) ? "-q" : "",
|
||||
(vt_env_verbose() >= 2) ? "-v" : "",
|
||||
filter ? "-f" : "", filter ? filter : "",
|
||||
shlibs_arg ? "-s" : "", shlibs_arg ? shlibs_arg : "",
|
||||
(vt_env_dyn_outer_loops()) ? "--outer-loops" : "",
|
||||
(vt_env_dyn_inner_loops()) ? "--inner-loops" : "",
|
||||
(vt_env_dyn_loop_iters()) ? "--loop-iters" : "",
|
||||
(vt_env_dyn_ignore_nodbg()) ? "--ignore-nodbg" : "",
|
||||
(vt_env_dyn_detach()) ? "" : "--nodetach",
|
||||
mutatee_pid,
|
||||
|
@ -201,6 +201,72 @@ char* vt_env_dyn_shlibs()
|
||||
return dyn_shlibs;
|
||||
}
|
||||
|
||||
int vt_env_dyn_outer_loops()
|
||||
{
|
||||
static int dyn_outer_loops = -1;
|
||||
char* tmp;
|
||||
|
||||
if (dyn_outer_loops == -1)
|
||||
{
|
||||
tmp = getenv("VT_DYN_OUTER_LOOPS");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
vt_cntl_msg(2, "VT_DYN_OUTER_LOOPS=%s", tmp);
|
||||
|
||||
dyn_outer_loops = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
dyn_outer_loops = 0;
|
||||
}
|
||||
}
|
||||
return dyn_outer_loops;
|
||||
}
|
||||
|
||||
int vt_env_dyn_inner_loops()
|
||||
{
|
||||
static int dyn_inner_loops = -1;
|
||||
char* tmp;
|
||||
|
||||
if (dyn_inner_loops == -1)
|
||||
{
|
||||
tmp = getenv("VT_DYN_INNER_LOOPS");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
vt_cntl_msg(2, "VT_DYN_INNER_LOOPS=%s", tmp);
|
||||
|
||||
dyn_inner_loops = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
dyn_inner_loops = 0;
|
||||
}
|
||||
}
|
||||
return dyn_inner_loops;
|
||||
}
|
||||
|
||||
int vt_env_dyn_loop_iters()
|
||||
{
|
||||
static int dyn_loop_iters = -1;
|
||||
char* tmp;
|
||||
|
||||
if (dyn_loop_iters == -1)
|
||||
{
|
||||
tmp = getenv("VT_DYN_LOOP_ITERS");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
vt_cntl_msg(2, "VT_DYN_LOOP_ITERS=%s", tmp);
|
||||
|
||||
dyn_loop_iters = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
dyn_loop_iters = 0;
|
||||
}
|
||||
}
|
||||
return dyn_loop_iters;
|
||||
}
|
||||
|
||||
int vt_env_dyn_ignore_nodbg()
|
||||
{
|
||||
static int dyn_ignore_nodbg = -1;
|
||||
|
@ -25,6 +25,9 @@
|
||||
|
||||
EXTERN char* vt_env_apppath(void);
|
||||
EXTERN char* vt_env_dyn_shlibs(void);
|
||||
EXTERN int vt_env_dyn_outer_loops(void);
|
||||
EXTERN int vt_env_dyn_inner_loops(void);
|
||||
EXTERN int vt_env_dyn_loop_iters(void);
|
||||
EXTERN int vt_env_dyn_ignore_nodbg(void);
|
||||
EXTERN int vt_env_dyn_detach(void);
|
||||
EXTERN char* vt_env_gnu_nm(void);
|
||||
|
@ -28,6 +28,8 @@ uint8_t vt_gpu_trace_idle = 0;
|
||||
|
||||
uint8_t vt_gpu_trace_mcpy = 0;
|
||||
|
||||
uint8_t vt_gpu_stream_reuse = 0;
|
||||
|
||||
uint8_t vt_gpu_trace_memusage = 0;
|
||||
|
||||
uint8_t vt_gpu_debug = 0;
|
||||
@ -80,6 +82,10 @@ void vt_gpu_init(void)
|
||||
vt_def_counter_group(VT_MASTER_THREAD, "GPU_MEMORY_USAGE"),
|
||||
0);
|
||||
}
|
||||
|
||||
/* disable stream reuse if neither kernels nor memory copies are enabled */
|
||||
if(vt_gpu_stream_reuse && !(vt_gpu_trace_kernels > 0 || vt_gpu_trace_mcpy))
|
||||
vt_gpu_stream_reuse = 0;
|
||||
|
||||
vt_gpu_initialized = 1;
|
||||
}
|
||||
@ -151,6 +157,9 @@ uint32_t vt_gpu_get_config(void)
|
||||
}else if(strcmp(feature, "memcpy") == 0){
|
||||
vt_gpu_config |= VT_GPU_TRACE_MEMCPY;
|
||||
vt_gpu_trace_mcpy = 1;
|
||||
}else if(strcmp(feature, "stream_reuse") == 0){
|
||||
vt_gpu_config |= VT_GPU_TRACE_STREAM_REUSE;
|
||||
vt_gpu_stream_reuse = 1;
|
||||
}else if(strcmp(feature, "memusage") == 0){
|
||||
vt_gpu_config |= VT_GPU_TRACE_MEMUSAGE;
|
||||
vt_gpu_trace_memusage = 1;
|
||||
|
@ -37,6 +37,7 @@
|
||||
#define VT_GPU_TRACE_MEMUSAGE (1 << 8)
|
||||
#define VT_GPU_TRACE_DEBUG (1 << 9)
|
||||
#define VT_GPU_TRACE_ERROR (1 << 10)
|
||||
#define VT_GPU_TRACE_STREAM_REUSE (1 << 11)
|
||||
#define VT_GPU_TRACE_DEFAULT \
|
||||
(VT_GPU_TRACE_CUDA | VT_GPU_TRACE_RUNTIME_API | VT_GPU_TRACE_OPENCL | \
|
||||
VT_GPU_TRACE_KERNEL | VT_GPU_TRACE_MEMCPY)
|
||||
@ -163,6 +164,11 @@ EXTERN uint8_t vt_gpu_trace_idle;
|
||||
*/
|
||||
EXTERN uint8_t vt_gpu_trace_mcpy;
|
||||
|
||||
/*
|
||||
* flag: Reuse destroyed GPU streams?
|
||||
*/
|
||||
EXTERN uint8_t vt_gpu_stream_reuse;
|
||||
|
||||
/*
|
||||
* flag: trace GPU memory usage (allocation, free)?
|
||||
*/
|
||||
|
@ -231,8 +231,8 @@ void vt_request_free(struct VTRequest* req)
|
||||
}
|
||||
}
|
||||
|
||||
void vt_check_request(uint64_t* time, struct VTRequest* req, MPI_Status *status,
|
||||
uint8_t record_event)
|
||||
void vt_check_request(uint32_t tid, uint64_t* time, struct VTRequest* req,
|
||||
MPI_Status *status, uint8_t record_event)
|
||||
{
|
||||
if (!req ||
|
||||
((req->flags & ERF_IS_PERSISTENT) && !(req->flags & ERF_IS_ACTIVE)))
|
||||
@ -247,7 +247,7 @@ void vt_check_request(uint64_t* time, struct VTRequest* req, MPI_Status *status,
|
||||
VT_MPI_INT count, sz;
|
||||
PMPI_Type_size(req->datatype, &sz);
|
||||
PMPI_Get_count(status, req->datatype, &count);
|
||||
vt_mpi_recv(VT_CURRENT_THREAD, time,
|
||||
vt_mpi_recv(tid, time,
|
||||
VT_RANK_TO_PE(status->MPI_SOURCE, req->comm),
|
||||
VT_COMM_ID(req->comm), status->MPI_TAG, count * sz);
|
||||
}
|
||||
@ -259,7 +259,7 @@ void vt_check_request(uint64_t* time, struct VTRequest* req, MPI_Status *status,
|
||||
PMPI_Get_count(status, req->datatype, &count);
|
||||
if (count == MPI_UNDEFINED)
|
||||
count = 0;
|
||||
vt_ioend(VT_CURRENT_THREAD, time, req->fileid, req->matchingid, req->handleid, req->fileop,
|
||||
vt_ioend(tid, time, req->fileid, req->matchingid, req->handleid, req->fileop,
|
||||
(uint64_t)count*(uint64_t)sz);
|
||||
}
|
||||
|
||||
|
@ -59,8 +59,9 @@ EXTERN void vt_iorequest_create( MPI_Request request,
|
||||
uint32_t flags );
|
||||
EXTERN struct VTRequest* vt_request_get(MPI_Request request);
|
||||
EXTERN void vt_request_free(struct VTRequest* req);
|
||||
EXTERN void vt_check_request(uint64_t* time, struct VTRequest* req,
|
||||
MPI_Status *status, uint8_t record_event);
|
||||
EXTERN void vt_check_request(uint32_t tid, uint64_t* time,
|
||||
struct VTRequest* req, MPI_Status *status,
|
||||
uint8_t record_event);
|
||||
EXTERN void vt_save_request_array(MPI_Request *arr_req, int arr_req_size);
|
||||
EXTERN struct VTRequest* vt_saved_request_get(int i);
|
||||
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -163,6 +163,10 @@ uint32_t VTThrd_create(const char* tname, uint32_t ptid, uint8_t is_virtual)
|
||||
/* set the virtual thread flag */
|
||||
thrd->is_virtual = is_virtual;
|
||||
|
||||
#if (defined (VT_MPI) || defined (VT_HYB))
|
||||
thrd->mpi_tracing_enabled = vt_env_mpitrace();
|
||||
#endif /* VT_MPI || VT_HYB */
|
||||
|
||||
#if defined(VT_GETCPU)
|
||||
thrd->cpuid_val = (uint32_t)-1;
|
||||
#endif /* VT_GETCPU */
|
||||
|
@ -92,8 +92,10 @@ typedef struct
|
||||
|
||||
#if (defined (VT_MPI) || defined (VT_HYB))
|
||||
|
||||
uint64_t mpicoll_next_matchingid; /**< matching id counter for MPI collective
|
||||
operations. */
|
||||
uint8_t mpi_tracing_enabled; /**< actual mode of MPI tracing
|
||||
operation */
|
||||
uint64_t mpicoll_next_matchingid; /**< matching id counter for MPI collective
|
||||
operations */
|
||||
|
||||
#endif /* VT_MPI || VT_HYB */
|
||||
|
||||
@ -108,8 +110,9 @@ typedef struct
|
||||
|
||||
#if (defined (VT_IOWRAP) || (defined(HAVE_MPI2_IO) && HAVE_MPI2_IO))
|
||||
|
||||
uint64_t io_next_matchingid;
|
||||
uint64_t io_next_handle;
|
||||
uint64_t io_next_matchingid; /**< matching id counter for I/O
|
||||
operations */
|
||||
uint64_t io_next_handle; /**< handle id counter for I/O operations */
|
||||
|
||||
#endif
|
||||
|
||||
@ -203,30 +206,42 @@ typedef struct
|
||||
#endif /* VT_DISABLE_RFG */
|
||||
|
||||
#if (defined (VT_MPI) || defined (VT_HYB))
|
||||
/* matching id counter for MPI coll. ops. */
|
||||
|
||||
/* actual mode of MPI tracing operation */
|
||||
#define VTTHRD_MPI_TRACING_ENABLED(thrd) \
|
||||
(thrd->mpi_tracing_enabled)
|
||||
|
||||
/* increment matching id counter for MPI collective operations */
|
||||
#define VTTHRD_MPICOLLOP_NEXT_MATCHINGID(thrd) \
|
||||
(thrd->mpicoll_next_matchingid++)
|
||||
|
||||
#endif /* VT_MPI || VT_HYB */
|
||||
|
||||
#if (defined (VT_IOWRAP))
|
||||
|
||||
/* save enabled/disabled state of I/O tracing when switching off temporarily */
|
||||
/* save value of enabled flag during suspend */
|
||||
#define VTTHRD_IO_TRACING_STATE(thrd) \
|
||||
(thrd->io_tracing_state)
|
||||
|
||||
/* save how often suspend was called */
|
||||
#define VTTHRD_IO_TRACING_SUSPEND_CNT(thrd) \
|
||||
(thrd->io_tracing_suspend_cnt)
|
||||
|
||||
/* flag: is I/O tracing enabled? */
|
||||
/* actual mode of I/O tracing operation */
|
||||
#define VTTHRD_IO_TRACING_ENABLED(thrd) \
|
||||
(thrd->io_tracing_enabled)
|
||||
|
||||
#endif /* VT_IOWRAP */
|
||||
|
||||
#if (defined (VT_IOWRAP) || (defined(HAVE_MPI2_IO) && HAVE_MPI2_IO))
|
||||
|
||||
/* increment matching id counter for I/O operations */
|
||||
#define VTTHRD_IO_NEXT_MATCHINGID(thrd) \
|
||||
(thrd->io_next_matchingid++)
|
||||
#define VTTHRD_IO_NEXT_HANDLE(thrd) \
|
||||
(thrd->io_next_handle++)
|
||||
|
||||
/* increment handle id counter for I/O operations */
|
||||
#define VTTHRD_IO_NEXT_HANDLE(thrd) (thrd->io_next_handle++)
|
||||
|
||||
#endif /* VT_IOWRAP || (HAVE_MPI2_IO && HAVE_MPI2_IO) */
|
||||
|
||||
#if (defined (VT_GETCPU))
|
||||
|
@ -132,12 +132,6 @@ uint8_t vt_my_trace_is_master = 1; /* 1st process on local node? */
|
||||
uint8_t vt_my_trace_is_disabled = 0; /* process disabled? */
|
||||
uint8_t vt_my_trace_is_first_avail = 0; /* 1st not disabled process? */
|
||||
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
/* is requested MPI thread support level MPI_THREAD_SERIALIZED?
|
||||
(not yet supported; no MPI communication events will be recorded) */
|
||||
uint8_t vt_mpi_thread_serialized = 0;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
/* unique file id */
|
||||
int vt_my_funique = 0;
|
||||
|
||||
@ -950,6 +944,9 @@ static void unify_traces(void)
|
||||
}
|
||||
argc++;
|
||||
|
||||
#if !defined(VT_MPIUNIFYLIB)
|
||||
argv[argc++] = strdup("--autostart");
|
||||
#endif /* VT_MPIUNIFYLIB */
|
||||
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||
if (!vt_env_compression()) argv[argc++] = strdup("--nocompress");
|
||||
#endif /* HAVE_ZLIB */
|
||||
@ -1939,7 +1936,7 @@ void vt_update_counter(uint32_t tid, uint64_t* time)
|
||||
update_counter(tid, time);
|
||||
}
|
||||
|
||||
void vt_mpi_init()
|
||||
void vt_mpi_init(uint8_t multithreaded)
|
||||
{
|
||||
#if (defined(VT_MPI) || defined(VT_HYB))
|
||||
|
||||
@ -2040,6 +2037,20 @@ void vt_mpi_init()
|
||||
vt_my_trace_is_master = (uint8_t)(host_rank == 0);
|
||||
}
|
||||
|
||||
#ifdef VT_UNIMCI
|
||||
/* disable UniMCI if MPI is initialized with an unsupported level of MPI
|
||||
thread support (e.g. MPI_THREAD_SERILIZED, MPI_THREAD_MULTIPLE) */
|
||||
if (vt_env_mpicheck() && multithreaded)
|
||||
{
|
||||
vt_unimci_finalize();
|
||||
if (myrank == 0)
|
||||
{
|
||||
vt_warning("MPI correctness checking disabled due to not yet supported "
|
||||
"level of MPI thread support.");
|
||||
}
|
||||
}
|
||||
#endif /* VT_UNIMCI */
|
||||
|
||||
/* first clock synchronization if necessary */
|
||||
#if TIMER_IS_GLOBAL == 0
|
||||
if (vt_num_traces > 1)
|
||||
@ -3083,10 +3094,6 @@ void vt_next_async_time(uint32_t tid, uint32_t kid, uint64_t atime)
|
||||
void vt_mpi_send(uint32_t tid, uint64_t* time, uint32_t dpid, uint32_t cid,
|
||||
uint32_t tag, uint32_t sent)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return;
|
||||
@ -3103,10 +3110,6 @@ void vt_mpi_send(uint32_t tid, uint64_t* time, uint32_t dpid, uint32_t cid,
|
||||
void vt_mpi_recv(uint32_t tid, uint64_t* time, uint32_t spid, uint32_t cid,
|
||||
uint32_t tag, uint32_t recvd)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return;
|
||||
@ -3124,10 +3127,6 @@ void vt_mpi_collexit(uint32_t tid, uint64_t* time, uint64_t* etime,
|
||||
uint32_t rid, uint32_t rpid, uint32_t cid, void* comm,
|
||||
uint32_t sent, uint32_t recvd)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) == VT_TRACE_ON)
|
||||
@ -3154,10 +3153,6 @@ void vt_mpi_collbegin(uint32_t tid, uint64_t* time, uint32_t rid, uint64_t mid,
|
||||
uint32_t rpid, uint32_t cid, uint64_t sent,
|
||||
uint64_t recvd)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return;
|
||||
@ -3178,10 +3173,6 @@ void vt_mpi_collbegin(uint32_t tid, uint64_t* time, uint32_t rid, uint64_t mid,
|
||||
void vt_mpi_collend(uint32_t tid, uint64_t* time, uint64_t mid, void* comm,
|
||||
uint8_t was_recorded)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (was_recorded && (VTTHRD_TRACE_STATUS(VTThrdv[tid]) == VT_TRACE_ON))
|
||||
@ -3201,10 +3192,6 @@ void vt_mpi_collend(uint32_t tid, uint64_t* time, uint64_t mid, void* comm,
|
||||
void vt_mpi_rma_put(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid,
|
||||
uint32_t tag, uint64_t sent)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return;
|
||||
@ -3222,10 +3209,6 @@ void vt_mpi_rma_put(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid,
|
||||
void vt_mpi_rma_putre(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid,
|
||||
uint32_t tag, uint64_t sent)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return;
|
||||
@ -3243,10 +3226,6 @@ void vt_mpi_rma_putre(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid,
|
||||
void vt_mpi_rma_get(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid,
|
||||
uint32_t tag, uint64_t recvd)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return;
|
||||
@ -3263,10 +3242,6 @@ void vt_mpi_rma_get(uint32_t tid, uint64_t* time, uint32_t tpid, uint32_t cid,
|
||||
|
||||
void vt_mpi_rma_end(uint32_t tid, uint64_t* time, uint32_t cid, uint32_t tag)
|
||||
{
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
if (vt_mpi_thread_serialized) return;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
GET_THREAD_ID(tid);
|
||||
|
||||
if (VTTHRD_TRACE_STATUS(VTThrdv[tid]) != VT_TRACE_ON) return;
|
||||
|
@ -120,7 +120,7 @@ EXTERN void vt_update_counter(uint32_t tid, uint64_t* time);
|
||||
* This function have to be called immediately after initializing the
|
||||
* communication middle-ware, e.g. atfer MPI_Init().
|
||||
*/
|
||||
EXTERN void vt_mpi_init(void);
|
||||
EXTERN void vt_mpi_init(uint8_t multithreaded);
|
||||
|
||||
/**
|
||||
* TODO: Description
|
||||
@ -793,12 +793,6 @@ EXTERN uint8_t vt_my_trace_is_master; /** 1st process on local node? */
|
||||
EXTERN uint8_t vt_my_trace_is_disabled; /** process disabled? */
|
||||
EXTERN uint8_t vt_my_trace_is_first_avail; /** 1st not disabled process? */
|
||||
|
||||
#if defined(HAVE_MPI2_THREAD) && HAVE_MPI2_THREAD
|
||||
/** is requested MPI thread support level MPI_THREAD_SERIALIZED?
|
||||
(not yet supported; no MPI communication events will be recorded) */
|
||||
EXTERN uint8_t vt_mpi_thread_serialized;
|
||||
#endif /* HAVE_MPI2_THREAD */
|
||||
|
||||
/** unique file id */
|
||||
EXTERN int vt_my_funique;
|
||||
|
||||
|
@ -54,6 +54,8 @@ void vt_unimci_init()
|
||||
|
||||
void vt_unimci_finalize()
|
||||
{
|
||||
if( vt_unimci_is_initialized )
|
||||
vt_unimci_is_initialized = 0;
|
||||
}
|
||||
|
||||
void vt_unimci_check_msg(uint8_t record, uint64_t* time)
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user