Changes to OTF:
- general: - corrected OTF version number - otfprofile: - removed leading '=' from CSV lines to make it loadable into spreadsheets (e.g. Open Office) - fixed process naming in CSV output of collective operation statistics Changes to VT: - configure: - added *_FOR_BUILD variables to CrayXE's default configure options; required for cross-building - VT libs: - fixed GPU communication, due to new process ID splitting - fixed parsing of PAPI native events in VT_METRICS; use strtok_r instead of strtok which is successively called inPAPI_event_name_to_code - added VT_METRICS_SEP to definition comments (-> Vampir's trace info) - Docu: - fixed link to TAU Reference Guide This commit was SVN r26137.
Этот коммит содержится в:
родитель
524de80eaa
Коммит
03ea0245f0
@ -343,14 +343,17 @@ How to install and configure VampirTrace
|
|||||||
* Maybe you also need to set additional commands and flags for the back-end
|
* Maybe you also need to set additional commands and flags for the back-end
|
||||||
(e.g. RANLIB, AR, MPICC, CXXFLAGS).
|
(e.g. RANLIB, AR, MPICC, CXXFLAGS).
|
||||||
|
|
||||||
For example, this configure command line works for an NEC SX6 system with an X86_64
|
Examples:
|
||||||
based front-end:
|
BlueGene/P:
|
||||||
|
% ./configure --host=powerpc64-ibm-linux-gnu
|
||||||
|
|
||||||
./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
|
Cray XK6:
|
||||||
AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
|
% ./configure --host=x86_64-cray-linux-gnu
|
||||||
--host=sx6-nec-superux14.1
|
CC_FOR_BUILD=craycc
|
||||||
--with-cross-prefix=sx
|
CXX_FOR_BUILD=crayc++
|
||||||
--with-otf-lib=-lotf
|
|
||||||
|
NEC SX6:
|
||||||
|
% ./configure --host=sx6-nec-superux14.1
|
||||||
|
|
||||||
|
|
||||||
Set up the Environment
|
Set up the Environment
|
||||||
|
@ -12,5 +12,9 @@ if test x"$XTPE_COMPILE_TARGET" != x; then
|
|||||||
FFLAGS="$FFLAGS -target=$XTPE_COMPILE_TARGET"
|
FFLAGS="$FFLAGS -target=$XTPE_COMPILE_TARGET"
|
||||||
FCFLAGS="$FCFLAGS -target=$XTPE_COMPILE_TARGET"
|
FCFLAGS="$FCFLAGS -target=$XTPE_COMPILE_TARGET"
|
||||||
fi
|
fi
|
||||||
|
CC_FOR_BUILD="gcc"
|
||||||
|
CXX_FOR_BUILD="g++"
|
||||||
|
CFLAGS_FOR_BUILD="-O3"
|
||||||
|
CXXFLAGS_FOR_BUILD="-O3"
|
||||||
enable_shared="no"
|
enable_shared="no"
|
||||||
with_mpich2="yes"
|
with_mpich2="yes"
|
||||||
|
@ -314,20 +314,20 @@ OpenMP events, and performance counters.
|
|||||||
<P>
|
<P>
|
||||||
After a successful tracing run, VampirTrace writes all collected data to a
|
After a successful tracing run, VampirTrace writes all collected data to a
|
||||||
trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
|
trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
|
||||||
HREF="#foot1523"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1530"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
As a result, the information is available for post-mortem analysis and
|
As a result, the information is available for post-mortem analysis and
|
||||||
visualization by various tools.
|
visualization by various tools.
|
||||||
Most notably, VampirTrace provides the input data for the Vampir analysis
|
Most notably, VampirTrace provides the input data for the Vampir analysis
|
||||||
and visualization tool<A NAME="tex2html2"
|
and visualization tool<A NAME="tex2html2"
|
||||||
HREF="#foot1524"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1531"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
VampirTrace is included in OpenMPI 1.3 and later versions.
|
VampirTrace is included in OpenMPI 1.3 and later versions.
|
||||||
If not disabled explicitly, VampirTrace is built automatically when installing
|
If not disabled explicitly, VampirTrace is built automatically when installing
|
||||||
OpenMPI<A NAME="tex2html3"
|
OpenMPI<A NAME="tex2html3"
|
||||||
HREF="#foot1525"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1532"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
@ -1143,7 +1143,7 @@ in a single file, that
|
|||||||
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
||||||
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
||||||
profiling have a look at the TAU documentation<A NAME="tex2html4"
|
profiling have a look at the TAU documentation<A NAME="tex2html4"
|
||||||
HREF="#foot1549"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1556"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
||||||
<PRE>
|
<PRE>
|
||||||
@ -1160,7 +1160,7 @@ Binary Instrumentation Using Dyninst
|
|||||||
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
||||||
instrument the application during runtime (binary instrumentation), by using
|
instrument the application during runtime (binary instrumentation), by using
|
||||||
Dyninst<A NAME="tex2html5"
|
Dyninst<A NAME="tex2html5"
|
||||||
HREF="#foot1550"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1557"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
Recompiling is not necessary for this kind of instrumentation,
|
Recompiling is not necessary for this kind of instrumentation,
|
||||||
but relinking:
|
but relinking:
|
||||||
@ -1311,7 +1311,7 @@ Tracing Calls to 3rd-Party Libraries
|
|||||||
VampirTrace is also capable to trace calls to third party libraries, which come with
|
VampirTrace is also capable to trace calls to third party libraries, which come with
|
||||||
at least one C header file even without the library's source code. If VampirTrace was
|
at least one C header file even without the library's source code. If VampirTrace was
|
||||||
built with support for library tracing (the CTool library<A NAME="tex2html6"
|
built with support for library tracing (the CTool library<A NAME="tex2html6"
|
||||||
HREF="#foot1551"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1558"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is required), the tool <TT>vtlibwrapgen</TT> can be used to
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is required), the tool <TT>vtlibwrapgen</TT> can be used to
|
||||||
generate a wrapper library to intercept each call to the actual library functions.
|
generate a wrapper library to intercept each call to the actual library functions.
|
||||||
This wrapper library can be linked to the application or used in combination with the
|
This wrapper library can be linked to the application or used in combination with the
|
||||||
@ -1903,7 +1903,7 @@ for the enhanced timer synchronization:
|
|||||||
|
|
||||||
<UL>
|
<UL>
|
||||||
<LI>CLAPACK <A NAME="tex2html7"
|
<LI>CLAPACK <A NAME="tex2html7"
|
||||||
HREF="#foot1561"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1568"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
||||||
</LI>
|
</LI>
|
||||||
<LI>AMD ACML
|
<LI>AMD ACML
|
||||||
@ -3441,7 +3441,7 @@ default: automatically by configure.
|
|||||||
enable support for Dyninst instrumentation,
|
enable support for Dyninst instrumentation,
|
||||||
default: enable if found by configure.
|
default: enable if found by configure.
|
||||||
<SPAN CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
|
<SPAN CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
|
||||||
HREF="#foot1589"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1596"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
@ -3463,9 +3463,9 @@ enable support for automatic source code
|
|||||||
instrumentation by using TAU, default: enable if
|
instrumentation by using TAU, default: enable if
|
||||||
found by configure.
|
found by configure.
|
||||||
<SPAN CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
|
<SPAN CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
|
||||||
HREF="#foot1590"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1597"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
|
||||||
HREF="#foot1591"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1598"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
@ -4034,7 +4034,7 @@ give the path for JVMTI-include files, default:
|
|||||||
|
|
||||||
<P>
|
<P>
|
||||||
To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
|
To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
|
||||||
HREF="#foot1592"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1599"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
@ -4165,15 +4165,32 @@ are shown below:
|
|||||||
<LI>Maybe you also need to set additional commands and flags for the back-end (e.g. <TT>RANLIB</TT>, <TT>AR</TT>, <TT>MPICC</TT>, <TT>CXXFLAGS</TT>).
|
<LI>Maybe you also need to set additional commands and flags for the back-end (e.g. <TT>RANLIB</TT>, <TT>AR</TT>, <TT>MPICC</TT>, <TT>CXXFLAGS</TT>).
|
||||||
</LI>
|
</LI>
|
||||||
</UL>
|
</UL>
|
||||||
For example, this <TT>configure</TT> command line works for an NEC SX6 system with an X86_64 based front-end:
|
Examples:
|
||||||
|
|
||||||
|
<P>
|
||||||
|
BlueGene/P:
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
<PRE>
|
<PRE>
|
||||||
% ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
|
% ./configure --host=powerpc64-ibm-linux-gnu
|
||||||
AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
|
</PRE>
|
||||||
--host=sx6-nec-superux14.1
|
|
||||||
--with-cross-prefix=sx
|
<P>
|
||||||
--with-otf-lib=-lotf
|
Cray XK6:
|
||||||
|
|
||||||
|
<P>
|
||||||
|
<PRE>
|
||||||
|
% ./configure --host=x86_64-cray-linux-gnu
|
||||||
|
CC_FOR_BUILD=craycc
|
||||||
|
CXX_FOR_BUILD=crayc++
|
||||||
|
</PRE>
|
||||||
|
|
||||||
|
<P>
|
||||||
|
NEC SX6:
|
||||||
|
|
||||||
|
<P>
|
||||||
|
<PRE>
|
||||||
|
% ./configure --host=sx6-nec-superux14.1
|
||||||
</PRE>
|
</PRE>
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
@ -5227,69 +5244,69 @@ If you provide us with your additions afterwards we will consider merging them
|
|||||||
into the official VampirTrace package.
|
into the official VampirTrace package.
|
||||||
<BR><HR><H4>Footnotes</H4>
|
<BR><HR><H4>Footnotes</H4>
|
||||||
<DL>
|
<DL>
|
||||||
<DT><A NAME="foot1523">... (OTF)</A><A
|
<DT><A NAME="foot1530">... (OTF)</A><A
|
||||||
HREF="#tex2html1"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html1"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://www.tu-dresden.de/zih/otf
|
<DD>http://www.tu-dresden.de/zih/otf
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1524">... tool </A><A
|
<DT><A NAME="foot1531">... tool </A><A
|
||||||
HREF="#tex2html2"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html2"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://www.vampir.eu
|
<DD>http://www.vampir.eu
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1525">...
|
<DT><A NAME="foot1532">...
|
||||||
Open MPI </A><A
|
Open MPI </A><A
|
||||||
HREF="#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://www.open-mpi.org/faq/?category=vampirtrace
|
<DD>http://www.open-mpi.org/faq/?category=vampirtrace
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1549">... documentation </A><A
|
<DT><A NAME="foot1556">... documentation </A><A
|
||||||
HREF="#tex2html4"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html4"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling
|
<DD>http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1550">...
|
<DT><A NAME="foot1557">...
|
||||||
Dyninst </A><A
|
Dyninst </A><A
|
||||||
HREF="#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://www.dyninst.org
|
<DD>http://www.dyninst.org
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1551">... library </A><A
|
<DT><A NAME="foot1558">... library </A><A
|
||||||
HREF="#tex2html6"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html6"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://sourceforge.net/projects/ctool
|
<DD>http://sourceforge.net/projects/ctool
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1561">... CLAPACK</A><A
|
<DT><A NAME="foot1568">... CLAPACK</A><A
|
||||||
HREF="#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>www.netlib.org/clapack
|
<DD>www.netlib.org/clapack
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1589">... Dyninst </A><A
|
<DT><A NAME="foot1596">... Dyninst </A><A
|
||||||
HREF="#tex2html8"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html8"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://www.dyninst.org
|
<DD>http://www.dyninst.org
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1590">... PDToolkit </A><A
|
<DT><A NAME="foot1597">... PDToolkit </A><A
|
||||||
HREF="#tex2html9"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html9"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://www.cs.uoregon.edu/research/pdt/home.php
|
<DD>http://www.cs.uoregon.edu/research/pdt/home.php
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1591">... TAU </A><A
|
<DT><A NAME="foot1598">... TAU </A><A
|
||||||
HREF="#tex2html10"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html10"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://tau.uoregon.edu
|
<DD>http://tau.uoregon.edu
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1592">... CTool </A><A
|
<DT><A NAME="foot1599">... CTool </A><A
|
||||||
HREF="#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD>http://sourceforge.net/projects/ctool
|
<DD>http://sourceforge.net/projects/ctool
|
||||||
|
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичный файл не отображается.
@ -1,3 +1,13 @@
|
|||||||
|
1.10.2openmpi
|
||||||
|
- otfdump:
|
||||||
|
- don't abort when reading events fails - the input tracefile
|
||||||
|
might only have statistics
|
||||||
|
- otfprofile:
|
||||||
|
- removed leading '=' from CSV lines to make it loadable into
|
||||||
|
spreadsheets (e.g. Open Office)
|
||||||
|
- fixed process naming in CSV output of collective operation
|
||||||
|
statistics
|
||||||
|
|
||||||
1.10.1openmpi
|
1.10.1openmpi
|
||||||
- fixed build issues in otfprofile[-mpi]:
|
- fixed build issues in otfprofile[-mpi]:
|
||||||
- added missing header includes for WEXITSTATUS
|
- added missing header includes for WEXITSTATUS
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
major=1
|
major=1
|
||||||
minor=10
|
minor=10
|
||||||
sub=1
|
sub=2
|
||||||
|
|
||||||
# string is used for alpha, beta, or release tags. If it is non-empty, it will
|
# string is used for alpha, beta, or release tags. If it is non-empty, it will
|
||||||
# be appended to the version number.
|
# be appended to the version number.
|
||||||
@ -48,5 +48,5 @@ string=openmpi
|
|||||||
# release, age must be incremented. Otherwise, reset age
|
# release, age must be incremented. Otherwise, reset age
|
||||||
# to '0'.
|
# to '0'.
|
||||||
|
|
||||||
library=4:1:3
|
library=4:2:3
|
||||||
|
|
||||||
|
@ -9,8 +9,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfcompress", "tools\otfcom
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfdump", "tools\otfdump\otfdump_vs08.vcproj", "{508A823C-8EAB-4C56-9DFE-4D85B0D24491}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfdump", "tools\otfdump\otfdump_vs08.vcproj", "{508A823C-8EAB-4C56-9DFE-4D85B0D24491}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfmerge", "tools\otfmerge\otfmerge_vs08.vcproj", "{1B9223C8-DE20-4538-8214-776C6017D22B}"
|
|
||||||
EndProject
|
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Win32 = Debug|Win32
|
Debug|Win32 = Debug|Win32
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
#define OTF_VERSION_MAJOR 1
|
#define OTF_VERSION_MAJOR 1
|
||||||
#define OTF_VERSION_MINOR 10
|
#define OTF_VERSION_MINOR 10
|
||||||
#define OTF_VERSION_SUB 1
|
#define OTF_VERSION_SUB 2
|
||||||
#define OTF_VERSION_STRING "openmpi"
|
#define OTF_VERSION_STRING "openmpi"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -611,7 +611,7 @@ int main ( int argc, const char** argv ) {
|
|||||||
|
|
||||||
read = OTF_Reader_readDefinitions( reader, handlers );
|
read = OTF_Reader_readDefinitions( reader, handlers );
|
||||||
if( read == OTF_READ_ERROR ) {
|
if( read == OTF_READ_ERROR ) {
|
||||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
fprintf(stderr,"An error occurred while reading definitions. The tracefile seems to be damaged. Abort.\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -624,8 +624,7 @@ int main ( int argc, const char** argv ) {
|
|||||||
|
|
||||||
read = OTF_Reader_readEvents( reader, handlers );
|
read = OTF_Reader_readEvents( reader, handlers );
|
||||||
if( read == OTF_READ_ERROR ) {
|
if( read == OTF_READ_ERROR ) {
|
||||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
fprintf(stderr,"An error occurred while reading events. Maybe the tracefile has no events or it is damaged. Continue.\n");
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -637,7 +636,7 @@ int main ( int argc, const char** argv ) {
|
|||||||
|
|
||||||
read = OTF_Reader_readStatistics( reader, handlers );
|
read = OTF_Reader_readStatistics( reader, handlers );
|
||||||
if( read == OTF_READ_ERROR ) {
|
if( read == OTF_READ_ERROR ) {
|
||||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
fprintf(stderr,"An error occurred while reading statistics. The tracefile seems to be damaged. Abort.\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -649,7 +648,7 @@ int main ( int argc, const char** argv ) {
|
|||||||
}
|
}
|
||||||
read = OTF_Reader_readSnapshots( reader, handlers );
|
read = OTF_Reader_readSnapshots( reader, handlers );
|
||||||
if( read == OTF_READ_ERROR ) {
|
if( read == OTF_READ_ERROR ) {
|
||||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
fprintf(stderr,"An error occurred while reading snapshots. The tracefile seems to be damaged. Abort.\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -661,7 +660,7 @@ int main ( int argc, const char** argv ) {
|
|||||||
}
|
}
|
||||||
read = OTF_Reader_readMarkers( reader, handlers );
|
read = OTF_Reader_readMarkers( reader, handlers );
|
||||||
if( read == OTF_READ_ERROR ) {
|
if( read == OTF_READ_ERROR ) {
|
||||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
fprintf(stderr,"An error occurred while reading markers. The tracefile seems to be damaged. Abort.\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,7 +94,7 @@ static void write_func_data( AllData& alldata, ofstream& csvFile,
|
|||||||
" appending function data to file: %s\n",
|
" appending function data to file: %s\n",
|
||||||
csvFileName.c_str() );
|
csvFileName.c_str() );
|
||||||
|
|
||||||
static const string LINE_PREFIX= "==FUNCTION==";
|
static const string LINE_PREFIX= "FUNCTION";
|
||||||
|
|
||||||
if ( 0 == alldata.myRank ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
@ -145,7 +145,7 @@ static void write_counter_data( AllData& alldata, ofstream& csvFile,
|
|||||||
" appending counter data to file: %s\n",
|
" appending counter data to file: %s\n",
|
||||||
csvFileName.c_str() );
|
csvFileName.c_str() );
|
||||||
|
|
||||||
static const string LINE_PREFIX= "==COUNTER==";
|
static const string LINE_PREFIX= "COUNTER";
|
||||||
|
|
||||||
if ( 0 == alldata.myRank ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
@ -218,7 +218,7 @@ static void write_p2p_data( AllData& alldata, ofstream& csvFile,
|
|||||||
" appending P2P message data to file: %s\n",
|
" appending P2P message data to file: %s\n",
|
||||||
csvFileName.c_str() );
|
csvFileName.c_str() );
|
||||||
|
|
||||||
static const string LINE_PREFIX= "==P2P==";
|
static const string LINE_PREFIX= "P2P";
|
||||||
|
|
||||||
if ( 0 == alldata.myRank ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
@ -270,7 +270,7 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile,
|
|||||||
" appending collective op. data to file: %s\n",
|
" appending collective op. data to file: %s\n",
|
||||||
csvFileName.c_str() );
|
csvFileName.c_str() );
|
||||||
|
|
||||||
static const string LINE_PREFIX= "==COLLOP==";
|
static const string LINE_PREFIX= "COLLOP";
|
||||||
|
|
||||||
static map< uint64_t, string > op_class_names;
|
static map< uint64_t, string > op_class_names;
|
||||||
if ( op_class_names.empty() ) {
|
if ( op_class_names.empty() ) {
|
||||||
@ -313,7 +313,7 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile,
|
|||||||
assert( 0 != op_class_name.length() );
|
assert( 0 != op_class_name.length() );
|
||||||
|
|
||||||
csvFile << LINE_PREFIX << ';'
|
csvFile << LINE_PREFIX << ';'
|
||||||
<< proc_id << ';'
|
<< proc_name << ';'
|
||||||
<< op_class_name << ';'
|
<< op_class_name << ';'
|
||||||
<< count_send << ';'
|
<< count_send << ';'
|
||||||
<< count_recv << ';'
|
<< count_recv << ';'
|
||||||
|
@ -92,13 +92,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
|||||||
} \
|
} \
|
||||||
if(do_traceE){ \
|
if(do_traceE){ \
|
||||||
if(_kind == cudaMemcpyHostToDevice){ \
|
if(_kind == cudaMemcpyHostToDevice){ \
|
||||||
vt_mpi_rma_put(_ptid, &time, strmID * 65536 + vt_my_trace, \
|
vt_mpi_rma_put(_ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||||
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
||||||
}else if(_kind == cudaMemcpyDeviceToHost){ \
|
}else if(_kind == cudaMemcpyDeviceToHost){ \
|
||||||
vt_mpi_rma_get(_ptid, &time, strmID * 65536 + vt_my_trace, \
|
vt_mpi_rma_get(_ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||||
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
||||||
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
|
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
|
||||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \
|
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \
|
||||||
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
@ -142,7 +142,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
|||||||
\
|
\
|
||||||
if(do_traceE){\
|
if(do_traceE){\
|
||||||
vt_mpi_rma_get(vtSrcDev->strmList->tid, &time, \
|
vt_mpi_rma_get(vtSrcDev->strmList->tid, &time, \
|
||||||
vtDstDev->strmList->tid * 65536 + vt_my_trace,\
|
VT_GPU_RANK_ID(vtDstDev->strmList->tid),\
|
||||||
vt_gpu_commCID, 0, _bytes);\
|
vt_gpu_commCID, 0, _bytes);\
|
||||||
}\
|
}\
|
||||||
\
|
\
|
||||||
@ -274,13 +274,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
|||||||
VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \
|
VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \
|
||||||
if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);} \
|
if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);} \
|
||||||
if(_kind == cudaMemcpyHostToDevice){ \
|
if(_kind == cudaMemcpyHostToDevice){ \
|
||||||
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, \
|
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||||
vt_gpu_commCID, 0, _bytes); \
|
vt_gpu_commCID, 0, _bytes); \
|
||||||
}else if(_kind == cudaMemcpyDeviceToHost){ \
|
}else if(_kind == cudaMemcpyDeviceToHost){ \
|
||||||
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, \
|
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||||
vt_gpu_commCID, 0, _bytes); \
|
vt_gpu_commCID, 0, _bytes); \
|
||||||
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
|
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
|
||||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \
|
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \
|
||||||
vt_gpu_commCID, 0, _bytes); \
|
vt_gpu_commCID, 0, _bytes); \
|
||||||
CUDARTWRAP_LOCK(); \
|
CUDARTWRAP_LOCK(); \
|
||||||
vt_gpu_prop[strmID] |= VTGPU_GPU_COMM; \
|
vt_gpu_prop[strmID] |= VTGPU_GPU_COMM; \
|
||||||
@ -1192,13 +1192,13 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid)
|
|||||||
}*/
|
}*/
|
||||||
|
|
||||||
if(mcpy->kind == cudaMemcpyHostToDevice){
|
if(mcpy->kind == cudaMemcpyHostToDevice){
|
||||||
vt_mpi_rma_get(tid, &strttime, mcpy->pid * 65536 + vt_my_trace,
|
vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid),
|
||||||
vt_gpu_commCID, 0, mcpy->byteCount);
|
vt_gpu_commCID, 0, mcpy->byteCount);
|
||||||
}else if(mcpy->kind == cudaMemcpyDeviceToHost){
|
}else if(mcpy->kind == cudaMemcpyDeviceToHost){
|
||||||
vt_mpi_rma_put(tid, &strttime, mcpy->pid * 65536 + vt_my_trace,
|
vt_mpi_rma_put(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid),
|
||||||
vt_gpu_commCID, 0, mcpy->byteCount);
|
vt_gpu_commCID, 0, mcpy->byteCount);
|
||||||
}else if(mcpy->kind == cudaMemcpyDeviceToDevice){
|
}else if(mcpy->kind == cudaMemcpyDeviceToDevice){
|
||||||
vt_mpi_rma_get(tid, &strttime, tid * 65536 + vt_my_trace,
|
vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(tid),
|
||||||
vt_gpu_commCID, 0, mcpy->byteCount);
|
vt_gpu_commCID, 0, mcpy->byteCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ static uint8_t vt_cuptiact_finalized = 0;
|
|||||||
static uint8_t *vt_cuptiact_global_buffer = NULL;*/
|
static uint8_t *vt_cuptiact_global_buffer = NULL;*/
|
||||||
|
|
||||||
/* size of the activity buffer */
|
/* size of the activity buffer */
|
||||||
static size_t vt_cuptiact_bufSize = VTGPU_DEFAULT_BSIZE;
|
static size_t vt_cuptiact_bufSize = VT_CUPTI_ACT_DEFAULT_BSIZE;
|
||||||
|
|
||||||
/* cupti activity specific kernel counter IDs */
|
/* cupti activity specific kernel counter IDs */
|
||||||
static uint32_t vt_cuptiact_cid_knStaticSharedMem = VT_NO_ID;
|
static uint32_t vt_cuptiact_cid_knStaticSharedMem = VT_NO_ID;
|
||||||
@ -224,9 +224,9 @@ void vt_cupti_activity_init()
|
|||||||
|
|
||||||
void vt_cupti_activity_finalize()
|
void vt_cupti_activity_finalize()
|
||||||
{
|
{
|
||||||
if(!vt_cuptiact_finalized){
|
if(!vt_cuptiact_finalized && vt_cuptiact_initialized){
|
||||||
VT_CUPTI_ACT_LOCK();
|
VT_CUPTI_ACT_LOCK();
|
||||||
if(!vt_cuptiact_finalized){
|
if(!vt_cuptiact_finalized && vt_cuptiact_initialized){
|
||||||
vt_cntl_msg(2, "[CUPTI Activity] Finalizing ... ");
|
vt_cntl_msg(2, "[CUPTI Activity] Finalizing ... ");
|
||||||
|
|
||||||
vt_cuptiact_finalized = 1;
|
vt_cuptiact_finalized = 1;
|
||||||
@ -476,7 +476,8 @@ static vt_cuptiact_ctx_t* vt_cuptiact_getCtx(CUcontext cuCtx)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Destroy a VampirTrace CUPTI Activity context.
|
* Check for a VampirTrace activity stream by stream ID. If it does not exist,
|
||||||
|
* create it.
|
||||||
*
|
*
|
||||||
* @param vtCtx VampirTrace CUPTI Activity context
|
* @param vtCtx VampirTrace CUPTI Activity context
|
||||||
* @param strmID the CUDA stream ID provided by CUPTI callback API
|
* @param strmID the CUDA stream ID provided by CUPTI callback API
|
||||||
@ -682,19 +683,19 @@ static void vt_cuptiact_writeKernelRecord(CUpti_ActivityKernel *kernel,
|
|||||||
|
|
||||||
/* if current activity's start time is before last written timestamp */
|
/* if current activity's start time is before last written timestamp */
|
||||||
if(start < vtStrm->vtLastTime){
|
if(start < vtStrm->vtLastTime){
|
||||||
vt_warning("[CUPTI Activity] Kernel start time < last written timestamp!");
|
vt_warning("[CUPTI Activity] Kernel: start time < last written timestamp!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if time between start and stop is increasing */
|
/* check if time between start and stop is increasing */
|
||||||
if(stop < start){
|
if(stop < start){
|
||||||
vt_warning("[CUPTI Activity] Kernel start time > kernel stop time!");
|
vt_warning("[CUPTI Activity] Kernel: start time > stop time!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if synchronization stop time is before kernel stop time */
|
/* check if synchronization stop time is before kernel stop time */
|
||||||
if(vtCtx->sync.hostStop < stop){
|
if(vtCtx->sync.hostStop < stop){
|
||||||
vt_warning("[CUPTI Activity] Sync stop time < kernel stop time!");
|
vt_warning("[CUPTI Activity] Kernel: sync stop time < stop time!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -805,19 +806,19 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy,
|
|||||||
|
|
||||||
/* if current activity's start time is before last written timestamp */
|
/* if current activity's start time is before last written timestamp */
|
||||||
if(start < vtStrm->vtLastTime){
|
if(start < vtStrm->vtLastTime){
|
||||||
vt_cntl_msg(1, "[CUPTI Activity] Memcpy start time < last written timestamp!");
|
vt_cntl_msg(1, "[CUPTI Activity] Memcpy: start time < last written timestamp!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if time between start and stop is increasing */
|
/* check if time between start and stop is increasing */
|
||||||
if(stop < start){
|
if(stop < start){
|
||||||
vt_warning("[CUPTI Activity] Memcpy start time > kernel stop time!");
|
vt_warning("[CUPTI Activity] Memcpy: start time > stop time!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* check if synchronization stop time is before kernel stop time */
|
/* check if synchronization stop time is before kernel stop time */
|
||||||
if(vtCtx->sync.hostStop < stop){
|
if(vtCtx->sync.hostStop < stop){
|
||||||
vt_warning("[CUPTI Activity] Synchronization stop time < kernel stop time!");
|
vt_warning("[CUPTI Activity] Memcpy: sync stop time < stop time!");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -848,13 +849,13 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy,
|
|||||||
vt_warning("MCPYexit: %llu (%d)", stop, vtThrdID);
|
vt_warning("MCPYexit: %llu (%d)", stop, vtThrdID);
|
||||||
*/
|
*/
|
||||||
if(kind == VT_GPU_HOST2DEV){
|
if(kind == VT_GPU_HOST2DEV){
|
||||||
vt_mpi_rma_get(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace,
|
vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid),
|
||||||
vt_gpu_commCID, 0, mcpy->bytes);
|
vt_gpu_commCID, 0, mcpy->bytes);
|
||||||
}else if(kind == VT_GPU_DEV2HOST){
|
}else if(kind == VT_GPU_DEV2HOST){
|
||||||
vt_mpi_rma_put(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace,
|
vt_mpi_rma_put(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid),
|
||||||
vt_gpu_commCID, 0, mcpy->bytes);
|
vt_gpu_commCID, 0, mcpy->bytes);
|
||||||
}else if(kind == VT_GPU_DEV2DEV){
|
}else if(kind == VT_GPU_DEV2DEV){
|
||||||
vt_mpi_rma_get(vtThrdID, &start, vtThrdID * 65536 + vt_my_trace,
|
vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtThrdID),
|
||||||
vt_gpu_commCID, 0, mcpy->bytes);
|
vt_gpu_commCID, 0, mcpy->bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1464,13 +1464,13 @@ static void vt_cupticb_handle_cudart_memcpy(
|
|||||||
|
|
||||||
/*time = vt_pform_wtime();*/
|
/*time = vt_pform_wtime();*/
|
||||||
if(kind == cudaMemcpyHostToDevice){
|
if(kind == cudaMemcpyHostToDevice){
|
||||||
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace,
|
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||||
vt_gpu_commCID, 0, bytes);
|
vt_gpu_commCID, 0, bytes);
|
||||||
}else if(kind == cudaMemcpyDeviceToHost){
|
}else if(kind == cudaMemcpyDeviceToHost){
|
||||||
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace,
|
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||||
vt_gpu_commCID, 0, bytes);
|
vt_gpu_commCID, 0, bytes);
|
||||||
}else if(kind == cudaMemcpyDeviceToDevice){
|
}else if(kind == cudaMemcpyDeviceToDevice){
|
||||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace,
|
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),
|
||||||
vt_gpu_commCID, 0, bytes);
|
vt_gpu_commCID, 0, bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1528,13 +1528,13 @@ static void vt_cupticb_handle_cudart_mcpyAsync(const CUpti_CallbackData *cbInfo,
|
|||||||
|
|
||||||
time = vt_pform_wtime();
|
time = vt_pform_wtime();
|
||||||
if(kind == cudaMemcpyHostToDevice){
|
if(kind == cudaMemcpyHostToDevice){
|
||||||
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace,
|
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||||
vt_gpu_commCID, 0, bytes);
|
vt_gpu_commCID, 0, bytes);
|
||||||
}else if(kind == cudaMemcpyDeviceToHost){
|
}else if(kind == cudaMemcpyDeviceToHost){
|
||||||
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace,
|
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||||
vt_gpu_commCID, 0, bytes);
|
vt_gpu_commCID, 0, bytes);
|
||||||
}else if(kind == cudaMemcpyDeviceToDevice){
|
}else if(kind == cudaMemcpyDeviceToDevice){
|
||||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace,
|
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),
|
||||||
vt_gpu_commCID, 0, bytes);
|
vt_gpu_commCID, 0, bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1692,12 +1692,14 @@ void vt_cupti_callback_init()
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* set callback for CUDA runtime API functions */
|
/* set callback for CUDA runtime API functions */
|
||||||
#if (defined(CUPTI_API_VERSION) && (CUPTI_API_VERSION >= 2))
|
#if defined(VT_CUPTI_ACTIVITY)
|
||||||
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
|
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
|
||||||
vt_cupti_trace_gpu_mem > 0){
|
vt_cupti_trace_gpu_mem > 0){
|
||||||
vt_cupti_set_callback(vt_cupticb_all_ptr,
|
vt_cupti_set_callback(vt_cupticb_all_ptr,
|
||||||
CUPTI_CB_DOMAIN_RESOURCE,
|
CUPTI_CB_DOMAIN_RESOURCE,
|
||||||
CUPTI_RUNTIME_TRACE_CBID_INVALID);
|
CUPTI_RUNTIME_TRACE_CBID_INVALID);
|
||||||
|
|
||||||
|
vt_cupti_activity_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy){
|
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy){
|
||||||
@ -1737,14 +1739,6 @@ void vt_cupti_callback_init()
|
|||||||
memset(vt_cupticb_cudaApiFuncTab, VT_NO_ID,
|
memset(vt_cupticb_cudaApiFuncTab, VT_NO_ID,
|
||||||
VT_CUPTICB_CUDA_API_FUNC_MAX * sizeof(uint32_t));
|
VT_CUPTICB_CUDA_API_FUNC_MAX * sizeof(uint32_t));
|
||||||
|
|
||||||
/* use CUPTI activity for kernel and memcpy tracing, if it is available */
|
|
||||||
#if defined(VT_CUPTI_ACTIVITY)
|
|
||||||
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
|
|
||||||
vt_cupti_trace_gpu_mem > 0){
|
|
||||||
vt_cupti_activity_init();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* register the finalize function of VampirTrace CUPTI to be called before
|
/* register the finalize function of VampirTrace CUPTI to be called before
|
||||||
* the program exits */
|
* the program exits */
|
||||||
atexit(vt_cupti_callback_finalize);
|
atexit(vt_cupti_callback_finalize);
|
||||||
@ -1769,7 +1763,10 @@ void vt_cupti_callback_finalize()
|
|||||||
vt_cntl_msg(2, "[CUPTI Callbacks] Finalizing ... ");
|
vt_cntl_msg(2, "[CUPTI Callbacks] Finalizing ... ");
|
||||||
|
|
||||||
#if defined(VT_CUPTI_ACTIVITY)
|
#if defined(VT_CUPTI_ACTIVITY)
|
||||||
|
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
|
||||||
|
vt_cupti_trace_gpu_mem > 0){
|
||||||
vt_cupti_activity_finalize();
|
vt_cupti_activity_finalize();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
VT_CUPTI_CALL(cuptiUnsubscribe(vt_cupticb_subscriber),
|
VT_CUPTI_CALL(cuptiUnsubscribe(vt_cupticb_subscriber),
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
# define EXTERN extern
|
# define EXTERN extern
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "vt_defs.h"
|
||||||
#include "vt_inttypes.h" /* VampirTrace integer types */
|
#include "vt_inttypes.h" /* VampirTrace integer types */
|
||||||
#include "vt_thrd.h" /* thread creation for GPU kernels */
|
#include "vt_thrd.h" /* thread creation for GPU kernels */
|
||||||
#include "vt_trc.h" /* VampirTrace events */
|
#include "vt_trc.h" /* VampirTrace events */
|
||||||
@ -39,6 +40,12 @@
|
|||||||
/* performance counter available? */
|
/* performance counter available? */
|
||||||
#define VTGPU_NO_PC 0x04 /* no performance counter for this thread available */
|
#define VTGPU_NO_PC 0x04 /* no performance counter for this thread available */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the rank ID for a given VampirTrace thread ID.
|
||||||
|
* The MPI RMA functions take the rank ID instead of the VampirTrace process ID!
|
||||||
|
*/
|
||||||
|
#define VT_GPU_RANK_ID(thread_id) \
|
||||||
|
(VT_PROCESS_ID(vt_my_trace, thread_id)-1)
|
||||||
|
|
||||||
#if (defined(VT_CUDARTWRAP) || defined(VT_CUPTI))
|
#if (defined(VT_CUDARTWRAP) || defined(VT_CUPTI))
|
||||||
/*
|
/*
|
||||||
|
@ -239,7 +239,7 @@ static metricmap_t* vt_metricmap_init(metmap_t match)
|
|||||||
return mapv;
|
return mapv;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void metricv_add(char* name, int code)
|
static void metricv_add(char* name, int code, uint32_t props)
|
||||||
{
|
{
|
||||||
if (nmetrics >= VT_METRIC_MAXNUM) {
|
if (nmetrics >= VT_METRIC_MAXNUM) {
|
||||||
vt_error_msg("Number of counters exceeds VampirTrace allowed maximum "
|
vt_error_msg("Number of counters exceeds VampirTrace allowed maximum "
|
||||||
@ -248,7 +248,7 @@ static void metricv_add(char* name, int code)
|
|||||||
metricv[nmetrics] = (struct metric*)malloc(sizeof(struct metric));
|
metricv[nmetrics] = (struct metric*)malloc(sizeof(struct metric));
|
||||||
metricv[nmetrics]->name = strdup(name);
|
metricv[nmetrics]->name = strdup(name);
|
||||||
metricv[nmetrics]->descr[0] = '\0';
|
metricv[nmetrics]->descr[0] = '\0';
|
||||||
metricv[nmetrics]->props = VT_CNTR_ACC;
|
metricv[nmetrics]->props = props;
|
||||||
metricv[nmetrics]->papi_code = code;
|
metricv[nmetrics]->papi_code = code;
|
||||||
nmetrics++;
|
nmetrics++;
|
||||||
}
|
}
|
||||||
@ -399,7 +399,7 @@ int vt_metric_open()
|
|||||||
char* env_sep;
|
char* env_sep;
|
||||||
char* var;
|
char* var;
|
||||||
char* token;
|
char* token;
|
||||||
int forceprop;
|
char* saveptr;
|
||||||
PAPI_event_info_t info;
|
PAPI_event_info_t info;
|
||||||
metricmap_t* mapv = NULL;
|
metricmap_t* mapv = NULL;
|
||||||
metricmap_t* map;
|
metricmap_t* map;
|
||||||
@ -433,15 +433,19 @@ int vt_metric_open()
|
|||||||
vt_cntl_msg(2, "VT_METRICS=%s", var);
|
vt_cntl_msg(2, "VT_METRICS=%s", var);
|
||||||
|
|
||||||
/* read metrics from specification string */
|
/* read metrics from specification string */
|
||||||
token = strtok(var, env_sep);
|
token = strtok_r(var, env_sep, &saveptr);
|
||||||
while ( token && (nmetrics < VT_METRIC_MAXNUM) ) {
|
while ( token && (nmetrics < VT_METRIC_MAXNUM) ) {
|
||||||
|
/* set counter properties */
|
||||||
|
uint32_t props;
|
||||||
if (token[0]=='!')
|
if (token[0]=='!')
|
||||||
{
|
{
|
||||||
forceprop=1;
|
props = VT_CNTR_ABS | VT_CNTR_NEXT;
|
||||||
token++;
|
token++;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
forceprop=0;
|
{
|
||||||
|
props = VT_CNTR_ACC;
|
||||||
|
}
|
||||||
/* search metricmap for a suitable definition */
|
/* search metricmap for a suitable definition */
|
||||||
map = mapv;
|
map = mapv;
|
||||||
/*printf("Token%d: <%s>\n", nmetrics, token);*/
|
/*printf("Token%d: <%s>\n", nmetrics, token);*/
|
||||||
@ -471,9 +475,9 @@ int vt_metric_open()
|
|||||||
/*printf("Event %s *N/A*\n", component);*/
|
/*printf("Event %s *N/A*\n", component);*/
|
||||||
got_valid_match = 0;
|
got_valid_match = 0;
|
||||||
} else if ((k==0) && (len==0)) { /* use provided event name */
|
} else if ((k==0) && (len==0)) { /* use provided event name */
|
||||||
metricv_add(token, code);
|
metricv_add(token, code, props);
|
||||||
} else { /* use alias component name */
|
} else { /* use alias component name */
|
||||||
metricv_add(component, code);
|
metricv_add(component, code, props);
|
||||||
}
|
}
|
||||||
k++;
|
k++;
|
||||||
} while (got_valid_match && (len > 0));
|
} while (got_valid_match && (len > 0));
|
||||||
@ -499,12 +503,10 @@ int vt_metric_open()
|
|||||||
if (retval != PAPI_OK)
|
if (retval != PAPI_OK)
|
||||||
vt_error_msg("Metric <%s> not available\n", component);
|
vt_error_msg("Metric <%s> not available\n", component);
|
||||||
|
|
||||||
metricv_add(component, code);
|
metricv_add(component, code, props);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (forceprop)
|
token = strtok_r(NULL, env_sep, &saveptr);
|
||||||
metricv[nmetrics-1]->props = VT_CNTR_ABS | VT_CNTR_NEXT;
|
|
||||||
token = strtok(NULL, env_sep);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*printf("nmetrics=%d\n", nmetrics);*/
|
/*printf("nmetrics=%d\n", nmetrics);*/
|
||||||
|
@ -488,6 +488,9 @@ static void write_def_header(void)
|
|||||||
/* VT_METRICS */
|
/* VT_METRICS */
|
||||||
vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS: %s",
|
vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS: %s",
|
||||||
vt_env_metrics() ? vt_env_metrics() : "<not set>");
|
vt_env_metrics() ? vt_env_metrics() : "<not set>");
|
||||||
|
/* VT_METRICS_SEP */
|
||||||
|
vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS_SEP: %s",
|
||||||
|
vt_env_metrics_sep());
|
||||||
#endif /* VT_METR */
|
#endif /* VT_METR */
|
||||||
|
|
||||||
#if defined(VT_RUSAGE)
|
#if defined(VT_RUSAGE)
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user