Changes to OTF:
- general: - corrected OTF version number - otfprofile: - removed leading '=' from CSV lines to make it loadable into spreadsheets (e.g. Open Office) - fixed process naming in CSV output of collective operation statistics Changes to VT: - configure: - added *_FOR_BUILD variables to CrayXE's default configure options; required for cross-building - VT libs: - fixed GPU communication, due to new process ID splitting - fixed parsing of PAPI native events in VT_METRICS; use strtok_r instead of strtok which is successively called inPAPI_event_name_to_code - added VT_METRICS_SEP to definition comments (-> Vampir's trace info) - Docu: - fixed link to TAU Reference Guide This commit was SVN r26137.
Этот коммит содержится в:
родитель
524de80eaa
Коммит
03ea0245f0
@ -343,14 +343,17 @@ How to install and configure VampirTrace
|
||||
* Maybe you also need to set additional commands and flags for the back-end
|
||||
(e.g. RANLIB, AR, MPICC, CXXFLAGS).
|
||||
|
||||
For example, this configure command line works for an NEC SX6 system with an X86_64
|
||||
based front-end:
|
||||
Examples:
|
||||
BlueGene/P:
|
||||
% ./configure --host=powerpc64-ibm-linux-gnu
|
||||
|
||||
./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
|
||||
AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
|
||||
--host=sx6-nec-superux14.1
|
||||
--with-cross-prefix=sx
|
||||
--with-otf-lib=-lotf
|
||||
Cray XK6:
|
||||
% ./configure --host=x86_64-cray-linux-gnu
|
||||
CC_FOR_BUILD=craycc
|
||||
CXX_FOR_BUILD=crayc++
|
||||
|
||||
NEC SX6:
|
||||
% ./configure --host=sx6-nec-superux14.1
|
||||
|
||||
|
||||
Set up the Environment
|
||||
|
@ -12,5 +12,9 @@ if test x"$XTPE_COMPILE_TARGET" != x; then
|
||||
FFLAGS="$FFLAGS -target=$XTPE_COMPILE_TARGET"
|
||||
FCFLAGS="$FCFLAGS -target=$XTPE_COMPILE_TARGET"
|
||||
fi
|
||||
CC_FOR_BUILD="gcc"
|
||||
CXX_FOR_BUILD="g++"
|
||||
CFLAGS_FOR_BUILD="-O3"
|
||||
CXXFLAGS_FOR_BUILD="-O3"
|
||||
enable_shared="no"
|
||||
with_mpich2="yes"
|
||||
|
@ -314,20 +314,20 @@ OpenMP events, and performance counters.
|
||||
<P>
|
||||
After a successful tracing run, VampirTrace writes all collected data to a
|
||||
trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
|
||||
HREF="#foot1523"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1530"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
As a result, the information is available for post-mortem analysis and
|
||||
visualization by various tools.
|
||||
Most notably, VampirTrace provides the input data for the Vampir analysis
|
||||
and visualization tool<A NAME="tex2html2"
|
||||
HREF="#foot1524"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1531"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
VampirTrace is included in OpenMPI 1.3 and later versions.
|
||||
If not disabled explicitly, VampirTrace is built automatically when installing
|
||||
OpenMPI<A NAME="tex2html3"
|
||||
HREF="#foot1525"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1532"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
@ -1143,7 +1143,7 @@ in a single file, that
|
||||
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
||||
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
||||
profiling have a look at the TAU documentation<A NAME="tex2html4"
|
||||
HREF="#foot1549"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1556"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
||||
<PRE>
|
||||
@ -1160,7 +1160,7 @@ Binary Instrumentation Using Dyninst
|
||||
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
||||
instrument the application during runtime (binary instrumentation), by using
|
||||
Dyninst<A NAME="tex2html5"
|
||||
HREF="#foot1550"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1557"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
Recompiling is not necessary for this kind of instrumentation,
|
||||
but relinking:
|
||||
@ -1311,7 +1311,7 @@ Tracing Calls to 3rd-Party Libraries
|
||||
VampirTrace is also capable to trace calls to third party libraries, which come with
|
||||
at least one C header file even without the library's source code. If VampirTrace was
|
||||
built with support for library tracing (the CTool library<A NAME="tex2html6"
|
||||
HREF="#foot1551"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1558"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is required), the tool <TT>vtlibwrapgen</TT> can be used to
|
||||
generate a wrapper library to intercept each call to the actual library functions.
|
||||
This wrapper library can be linked to the application or used in combination with the
|
||||
@ -1903,7 +1903,7 @@ for the enhanced timer synchronization:
|
||||
|
||||
<UL>
|
||||
<LI>CLAPACK <A NAME="tex2html7"
|
||||
HREF="#foot1561"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1568"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
||||
</LI>
|
||||
<LI>AMD ACML
|
||||
@ -3441,7 +3441,7 @@ default: automatically by configure.
|
||||
enable support for Dyninst instrumentation,
|
||||
default: enable if found by configure.
|
||||
<SPAN CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
|
||||
HREF="#foot1589"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1596"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
|
||||
|
||||
<P>
|
||||
@ -3463,9 +3463,9 @@ enable support for automatic source code
|
||||
instrumentation by using TAU, default: enable if
|
||||
found by configure.
|
||||
<SPAN CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
|
||||
HREF="#foot1590"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1597"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
|
||||
HREF="#foot1591"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1598"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
|
||||
|
||||
<P>
|
||||
@ -4034,7 +4034,7 @@ give the path for JVMTI-include files, default:
|
||||
|
||||
<P>
|
||||
To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
|
||||
HREF="#foot1592"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1599"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
|
||||
|
||||
<P>
|
||||
@ -4165,15 +4165,32 @@ are shown below:
|
||||
<LI>Maybe you also need to set additional commands and flags for the back-end (e.g. <TT>RANLIB</TT>, <TT>AR</TT>, <TT>MPICC</TT>, <TT>CXXFLAGS</TT>).
|
||||
</LI>
|
||||
</UL>
|
||||
For example, this <TT>configure</TT> command line works for an NEC SX6 system with an X86_64 based front-end:
|
||||
Examples:
|
||||
|
||||
<P>
|
||||
BlueGene/P:
|
||||
|
||||
<P>
|
||||
<PRE>
|
||||
% ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
|
||||
AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
|
||||
--host=sx6-nec-superux14.1
|
||||
--with-cross-prefix=sx
|
||||
--with-otf-lib=-lotf
|
||||
% ./configure --host=powerpc64-ibm-linux-gnu
|
||||
</PRE>
|
||||
|
||||
<P>
|
||||
Cray XK6:
|
||||
|
||||
<P>
|
||||
<PRE>
|
||||
% ./configure --host=x86_64-cray-linux-gnu
|
||||
CC_FOR_BUILD=craycc
|
||||
CXX_FOR_BUILD=crayc++
|
||||
</PRE>
|
||||
|
||||
<P>
|
||||
NEC SX6:
|
||||
|
||||
<P>
|
||||
<PRE>
|
||||
% ./configure --host=sx6-nec-superux14.1
|
||||
</PRE>
|
||||
|
||||
<P>
|
||||
@ -5227,69 +5244,69 @@ If you provide us with your additions afterwards we will consider merging them
|
||||
into the official VampirTrace package.
|
||||
<BR><HR><H4>Footnotes</H4>
|
||||
<DL>
|
||||
<DT><A NAME="foot1523">... (OTF)</A><A
|
||||
<DT><A NAME="foot1530">... (OTF)</A><A
|
||||
HREF="#tex2html1"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.tu-dresden.de/zih/otf
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1524">... tool </A><A
|
||||
<DT><A NAME="foot1531">... tool </A><A
|
||||
HREF="#tex2html2"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.vampir.eu
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1525">...
|
||||
<DT><A NAME="foot1532">...
|
||||
Open MPI </A><A
|
||||
HREF="#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.open-mpi.org/faq/?category=vampirtrace
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1549">... documentation </A><A
|
||||
<DT><A NAME="foot1556">... documentation </A><A
|
||||
HREF="#tex2html4"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling
|
||||
<DD>http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1550">...
|
||||
<DT><A NAME="foot1557">...
|
||||
Dyninst </A><A
|
||||
HREF="#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.dyninst.org
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1551">... library </A><A
|
||||
<DT><A NAME="foot1558">... library </A><A
|
||||
HREF="#tex2html6"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://sourceforge.net/projects/ctool
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1561">... CLAPACK</A><A
|
||||
<DT><A NAME="foot1568">... CLAPACK</A><A
|
||||
HREF="#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>www.netlib.org/clapack
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1589">... Dyninst </A><A
|
||||
<DT><A NAME="foot1596">... Dyninst </A><A
|
||||
HREF="#tex2html8"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.dyninst.org
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1590">... PDToolkit </A><A
|
||||
<DT><A NAME="foot1597">... PDToolkit </A><A
|
||||
HREF="#tex2html9"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://www.cs.uoregon.edu/research/pdt/home.php
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1591">... TAU </A><A
|
||||
<DT><A NAME="foot1598">... TAU </A><A
|
||||
HREF="#tex2html10"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://tau.uoregon.edu
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1592">... CTool </A><A
|
||||
<DT><A NAME="foot1599">... CTool </A><A
|
||||
HREF="#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD>http://sourceforge.net/projects/ctool
|
||||
|
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичный файл не отображается.
@ -1,3 +1,13 @@
|
||||
1.10.2openmpi
|
||||
- otfdump:
|
||||
- don't abort when reading events fails - the input tracefile
|
||||
might only have statistics
|
||||
- otfprofile:
|
||||
- removed leading '=' from CSV lines to make it loadable into
|
||||
spreadsheets (e.g. Open Office)
|
||||
- fixed process naming in CSV output of collective operation
|
||||
statistics
|
||||
|
||||
1.10.1openmpi
|
||||
- fixed build issues in otfprofile[-mpi]:
|
||||
- added missing header includes for WEXITSTATUS
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
major=1
|
||||
minor=10
|
||||
sub=1
|
||||
sub=2
|
||||
|
||||
# string is used for alpha, beta, or release tags. If it is non-empty, it will
|
||||
# be appended to the version number.
|
||||
@ -48,5 +48,5 @@ string=openmpi
|
||||
# release, age must be incremented. Otherwise, reset age
|
||||
# to '0'.
|
||||
|
||||
library=4:1:3
|
||||
library=4:2:3
|
||||
|
||||
|
@ -9,8 +9,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfcompress", "tools\otfcom
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfdump", "tools\otfdump\otfdump_vs08.vcproj", "{508A823C-8EAB-4C56-9DFE-4D85B0D24491}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfmerge", "tools\otfmerge\otfmerge_vs08.vcproj", "{1B9223C8-DE20-4538-8214-776C6017D22B}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
|
@ -18,7 +18,7 @@
|
||||
|
||||
#define OTF_VERSION_MAJOR 1
|
||||
#define OTF_VERSION_MINOR 10
|
||||
#define OTF_VERSION_SUB 1
|
||||
#define OTF_VERSION_SUB 2
|
||||
#define OTF_VERSION_STRING "openmpi"
|
||||
|
||||
/**
|
||||
|
@ -611,7 +611,7 @@ int main ( int argc, const char** argv ) {
|
||||
|
||||
read = OTF_Reader_readDefinitions( reader, handlers );
|
||||
if( read == OTF_READ_ERROR ) {
|
||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
||||
fprintf(stderr,"An error occurred while reading definitions. The tracefile seems to be damaged. Abort.\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -624,8 +624,7 @@ int main ( int argc, const char** argv ) {
|
||||
|
||||
read = OTF_Reader_readEvents( reader, handlers );
|
||||
if( read == OTF_READ_ERROR ) {
|
||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
||||
return 1;
|
||||
fprintf(stderr,"An error occurred while reading events. Maybe the tracefile has no events or it is damaged. Continue.\n");
|
||||
}
|
||||
}
|
||||
|
||||
@ -637,7 +636,7 @@ int main ( int argc, const char** argv ) {
|
||||
|
||||
read = OTF_Reader_readStatistics( reader, handlers );
|
||||
if( read == OTF_READ_ERROR ) {
|
||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
||||
fprintf(stderr,"An error occurred while reading statistics. The tracefile seems to be damaged. Abort.\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -649,7 +648,7 @@ int main ( int argc, const char** argv ) {
|
||||
}
|
||||
read = OTF_Reader_readSnapshots( reader, handlers );
|
||||
if( read == OTF_READ_ERROR ) {
|
||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
||||
fprintf(stderr,"An error occurred while reading snapshots. The tracefile seems to be damaged. Abort.\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@ -661,7 +660,7 @@ int main ( int argc, const char** argv ) {
|
||||
}
|
||||
read = OTF_Reader_readMarkers( reader, handlers );
|
||||
if( read == OTF_READ_ERROR ) {
|
||||
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
|
||||
fprintf(stderr,"An error occurred while reading markers. The tracefile seems to be damaged. Abort.\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -94,7 +94,7 @@ static void write_func_data( AllData& alldata, ofstream& csvFile,
|
||||
" appending function data to file: %s\n",
|
||||
csvFileName.c_str() );
|
||||
|
||||
static const string LINE_PREFIX= "==FUNCTION==";
|
||||
static const string LINE_PREFIX= "FUNCTION";
|
||||
|
||||
if ( 0 == alldata.myRank ) {
|
||||
|
||||
@ -145,7 +145,7 @@ static void write_counter_data( AllData& alldata, ofstream& csvFile,
|
||||
" appending counter data to file: %s\n",
|
||||
csvFileName.c_str() );
|
||||
|
||||
static const string LINE_PREFIX= "==COUNTER==";
|
||||
static const string LINE_PREFIX= "COUNTER";
|
||||
|
||||
if ( 0 == alldata.myRank ) {
|
||||
|
||||
@ -218,7 +218,7 @@ static void write_p2p_data( AllData& alldata, ofstream& csvFile,
|
||||
" appending P2P message data to file: %s\n",
|
||||
csvFileName.c_str() );
|
||||
|
||||
static const string LINE_PREFIX= "==P2P==";
|
||||
static const string LINE_PREFIX= "P2P";
|
||||
|
||||
if ( 0 == alldata.myRank ) {
|
||||
|
||||
@ -270,7 +270,7 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile,
|
||||
" appending collective op. data to file: %s\n",
|
||||
csvFileName.c_str() );
|
||||
|
||||
static const string LINE_PREFIX= "==COLLOP==";
|
||||
static const string LINE_PREFIX= "COLLOP";
|
||||
|
||||
static map< uint64_t, string > op_class_names;
|
||||
if ( op_class_names.empty() ) {
|
||||
@ -313,7 +313,7 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile,
|
||||
assert( 0 != op_class_name.length() );
|
||||
|
||||
csvFile << LINE_PREFIX << ';'
|
||||
<< proc_id << ';'
|
||||
<< proc_name << ';'
|
||||
<< op_class_name << ';'
|
||||
<< count_send << ';'
|
||||
<< count_recv << ';'
|
||||
|
@ -92,13 +92,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
||||
} \
|
||||
if(do_traceE){ \
|
||||
if(_kind == cudaMemcpyHostToDevice){ \
|
||||
vt_mpi_rma_put(_ptid, &time, strmID * 65536 + vt_my_trace, \
|
||||
vt_mpi_rma_put(_ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
||||
}else if(_kind == cudaMemcpyDeviceToHost){ \
|
||||
vt_mpi_rma_get(_ptid, &time, strmID * 65536 + vt_my_trace, \
|
||||
vt_mpi_rma_get(_ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
||||
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
|
||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \
|
||||
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \
|
||||
vt_gpu_commCID, 0, (uint64_t)_bytes); \
|
||||
} \
|
||||
} \
|
||||
@ -142,7 +142,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
||||
\
|
||||
if(do_traceE){\
|
||||
vt_mpi_rma_get(vtSrcDev->strmList->tid, &time, \
|
||||
vtDstDev->strmList->tid * 65536 + vt_my_trace,\
|
||||
VT_GPU_RANK_ID(vtDstDev->strmList->tid),\
|
||||
vt_gpu_commCID, 0, _bytes);\
|
||||
}\
|
||||
\
|
||||
@ -274,13 +274,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
||||
VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \
|
||||
if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);} \
|
||||
if(_kind == cudaMemcpyHostToDevice){ \
|
||||
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, \
|
||||
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||
vt_gpu_commCID, 0, _bytes); \
|
||||
}else if(_kind == cudaMemcpyDeviceToHost){ \
|
||||
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, \
|
||||
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID), \
|
||||
vt_gpu_commCID, 0, _bytes); \
|
||||
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
|
||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \
|
||||
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \
|
||||
vt_gpu_commCID, 0, _bytes); \
|
||||
CUDARTWRAP_LOCK(); \
|
||||
vt_gpu_prop[strmID] |= VTGPU_GPU_COMM; \
|
||||
@ -1192,13 +1192,13 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid)
|
||||
}*/
|
||||
|
||||
if(mcpy->kind == cudaMemcpyHostToDevice){
|
||||
vt_mpi_rma_get(tid, &strttime, mcpy->pid * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid),
|
||||
vt_gpu_commCID, 0, mcpy->byteCount);
|
||||
}else if(mcpy->kind == cudaMemcpyDeviceToHost){
|
||||
vt_mpi_rma_put(tid, &strttime, mcpy->pid * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_put(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid),
|
||||
vt_gpu_commCID, 0, mcpy->byteCount);
|
||||
}else if(mcpy->kind == cudaMemcpyDeviceToDevice){
|
||||
vt_mpi_rma_get(tid, &strttime, tid * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(tid),
|
||||
vt_gpu_commCID, 0, mcpy->byteCount);
|
||||
}
|
||||
|
||||
|
@ -113,7 +113,7 @@ static uint8_t vt_cuptiact_finalized = 0;
|
||||
static uint8_t *vt_cuptiact_global_buffer = NULL;*/
|
||||
|
||||
/* size of the activity buffer */
|
||||
static size_t vt_cuptiact_bufSize = VTGPU_DEFAULT_BSIZE;
|
||||
static size_t vt_cuptiact_bufSize = VT_CUPTI_ACT_DEFAULT_BSIZE;
|
||||
|
||||
/* cupti activity specific kernel counter IDs */
|
||||
static uint32_t vt_cuptiact_cid_knStaticSharedMem = VT_NO_ID;
|
||||
@ -224,9 +224,9 @@ void vt_cupti_activity_init()
|
||||
|
||||
void vt_cupti_activity_finalize()
|
||||
{
|
||||
if(!vt_cuptiact_finalized){
|
||||
if(!vt_cuptiact_finalized && vt_cuptiact_initialized){
|
||||
VT_CUPTI_ACT_LOCK();
|
||||
if(!vt_cuptiact_finalized){
|
||||
if(!vt_cuptiact_finalized && vt_cuptiact_initialized){
|
||||
vt_cntl_msg(2, "[CUPTI Activity] Finalizing ... ");
|
||||
|
||||
vt_cuptiact_finalized = 1;
|
||||
@ -476,7 +476,8 @@ static vt_cuptiact_ctx_t* vt_cuptiact_getCtx(CUcontext cuCtx)
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy a VampirTrace CUPTI Activity context.
|
||||
* Check for a VampirTrace activity stream by stream ID. If it does not exist,
|
||||
* create it.
|
||||
*
|
||||
* @param vtCtx VampirTrace CUPTI Activity context
|
||||
* @param strmID the CUDA stream ID provided by CUPTI callback API
|
||||
@ -682,19 +683,19 @@ static void vt_cuptiact_writeKernelRecord(CUpti_ActivityKernel *kernel,
|
||||
|
||||
/* if current activity's start time is before last written timestamp */
|
||||
if(start < vtStrm->vtLastTime){
|
||||
vt_warning("[CUPTI Activity] Kernel start time < last written timestamp!");
|
||||
vt_warning("[CUPTI Activity] Kernel: start time < last written timestamp!");
|
||||
return;
|
||||
}
|
||||
|
||||
/* check if time between start and stop is increasing */
|
||||
if(stop < start){
|
||||
vt_warning("[CUPTI Activity] Kernel start time > kernel stop time!");
|
||||
vt_warning("[CUPTI Activity] Kernel: start time > stop time!");
|
||||
return;
|
||||
}
|
||||
|
||||
/* check if synchronization stop time is before kernel stop time */
|
||||
if(vtCtx->sync.hostStop < stop){
|
||||
vt_warning("[CUPTI Activity] Sync stop time < kernel stop time!");
|
||||
vt_warning("[CUPTI Activity] Kernel: sync stop time < stop time!");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -805,19 +806,19 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy,
|
||||
|
||||
/* if current activity's start time is before last written timestamp */
|
||||
if(start < vtStrm->vtLastTime){
|
||||
vt_cntl_msg(1, "[CUPTI Activity] Memcpy start time < last written timestamp!");
|
||||
vt_cntl_msg(1, "[CUPTI Activity] Memcpy: start time < last written timestamp!");
|
||||
return;
|
||||
}
|
||||
|
||||
/* check if time between start and stop is increasing */
|
||||
if(stop < start){
|
||||
vt_warning("[CUPTI Activity] Memcpy start time > kernel stop time!");
|
||||
vt_warning("[CUPTI Activity] Memcpy: start time > stop time!");
|
||||
return;
|
||||
}
|
||||
|
||||
/* check if synchronization stop time is before kernel stop time */
|
||||
if(vtCtx->sync.hostStop < stop){
|
||||
vt_warning("[CUPTI Activity] Synchronization stop time < kernel stop time!");
|
||||
vt_warning("[CUPTI Activity] Memcpy: sync stop time < stop time!");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -848,13 +849,13 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy,
|
||||
vt_warning("MCPYexit: %llu (%d)", stop, vtThrdID);
|
||||
*/
|
||||
if(kind == VT_GPU_HOST2DEV){
|
||||
vt_mpi_rma_get(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid),
|
||||
vt_gpu_commCID, 0, mcpy->bytes);
|
||||
}else if(kind == VT_GPU_DEV2HOST){
|
||||
vt_mpi_rma_put(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_put(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid),
|
||||
vt_gpu_commCID, 0, mcpy->bytes);
|
||||
}else if(kind == VT_GPU_DEV2DEV){
|
||||
vt_mpi_rma_get(vtThrdID, &start, vtThrdID * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtThrdID),
|
||||
vt_gpu_commCID, 0, mcpy->bytes);
|
||||
}
|
||||
|
||||
|
@ -1464,13 +1464,13 @@ static void vt_cupticb_handle_cudart_memcpy(
|
||||
|
||||
/*time = vt_pform_wtime();*/
|
||||
if(kind == cudaMemcpyHostToDevice){
|
||||
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||
vt_gpu_commCID, 0, bytes);
|
||||
}else if(kind == cudaMemcpyDeviceToHost){
|
||||
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||
vt_gpu_commCID, 0, bytes);
|
||||
}else if(kind == cudaMemcpyDeviceToDevice){
|
||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),
|
||||
vt_gpu_commCID, 0, bytes);
|
||||
}
|
||||
}
|
||||
@ -1528,13 +1528,13 @@ static void vt_cupticb_handle_cudart_mcpyAsync(const CUpti_CallbackData *cbInfo,
|
||||
|
||||
time = vt_pform_wtime();
|
||||
if(kind == cudaMemcpyHostToDevice){
|
||||
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||
vt_gpu_commCID, 0, bytes);
|
||||
}else if(kind == cudaMemcpyDeviceToHost){
|
||||
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID),
|
||||
vt_gpu_commCID, 0, bytes);
|
||||
}else if(kind == cudaMemcpyDeviceToDevice){
|
||||
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace,
|
||||
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),
|
||||
vt_gpu_commCID, 0, bytes);
|
||||
}
|
||||
}
|
||||
@ -1692,12 +1692,14 @@ void vt_cupti_callback_init()
|
||||
#endif
|
||||
|
||||
/* set callback for CUDA runtime API functions */
|
||||
#if (defined(CUPTI_API_VERSION) && (CUPTI_API_VERSION >= 2))
|
||||
#if defined(VT_CUPTI_ACTIVITY)
|
||||
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
|
||||
vt_cupti_trace_gpu_mem > 0){
|
||||
vt_cupti_set_callback(vt_cupticb_all_ptr,
|
||||
CUPTI_CB_DOMAIN_RESOURCE,
|
||||
CUPTI_RUNTIME_TRACE_CBID_INVALID);
|
||||
|
||||
vt_cupti_activity_init();
|
||||
}
|
||||
|
||||
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy){
|
||||
@ -1725,26 +1727,18 @@ void vt_cupti_callback_init()
|
||||
CUPTI_CB_DOMAIN_DRIVER_API,
|
||||
CUPTI_DRIVER_TRACE_CBID_cuCtxCreate);
|
||||
*/
|
||||
#else
|
||||
#else
|
||||
if(vt_cupticb_trace_runtimeAPI){
|
||||
vt_cupti_set_callback(vt_cupticb_cudart_ptr,
|
||||
CUPTI_CB_DOMAIN_RUNTIME_API,
|
||||
CUPTI_RUNTIME_TRACE_CBID_INVALID);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* reset the hash table for CUDA API functions */
|
||||
memset(vt_cupticb_cudaApiFuncTab, VT_NO_ID,
|
||||
VT_CUPTICB_CUDA_API_FUNC_MAX * sizeof(uint32_t));
|
||||
|
||||
/* use CUPTI activity for kernel and memcpy tracing, if it is available */
|
||||
#if defined(VT_CUPTI_ACTIVITY)
|
||||
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
|
||||
vt_cupti_trace_gpu_mem > 0){
|
||||
vt_cupti_activity_init();
|
||||
}
|
||||
#endif
|
||||
|
||||
/* register the finalize function of VampirTrace CUPTI to be called before
|
||||
* the program exits */
|
||||
atexit(vt_cupti_callback_finalize);
|
||||
@ -1769,7 +1763,10 @@ void vt_cupti_callback_finalize()
|
||||
vt_cntl_msg(2, "[CUPTI Callbacks] Finalizing ... ");
|
||||
|
||||
#if defined(VT_CUPTI_ACTIVITY)
|
||||
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
|
||||
vt_cupti_trace_gpu_mem > 0){
|
||||
vt_cupti_activity_finalize();
|
||||
}
|
||||
#endif
|
||||
|
||||
VT_CUPTI_CALL(cuptiUnsubscribe(vt_cupticb_subscriber),
|
||||
|
@ -19,6 +19,7 @@
|
||||
# define EXTERN extern
|
||||
#endif
|
||||
|
||||
#include "vt_defs.h"
|
||||
#include "vt_inttypes.h" /* VampirTrace integer types */
|
||||
#include "vt_thrd.h" /* thread creation for GPU kernels */
|
||||
#include "vt_trc.h" /* VampirTrace events */
|
||||
@ -39,6 +40,12 @@
|
||||
/* performance counter available? */
|
||||
#define VTGPU_NO_PC 0x04 /* no performance counter for this thread available */
|
||||
|
||||
/*
|
||||
* Get the rank ID for a given VampirTrace thread ID.
|
||||
* The MPI RMA functions take the rank ID instead of the VampirTrace process ID!
|
||||
*/
|
||||
#define VT_GPU_RANK_ID(thread_id) \
|
||||
(VT_PROCESS_ID(vt_my_trace, thread_id)-1)
|
||||
|
||||
#if (defined(VT_CUDARTWRAP) || defined(VT_CUPTI))
|
||||
/*
|
||||
|
@ -239,7 +239,7 @@ static metricmap_t* vt_metricmap_init(metmap_t match)
|
||||
return mapv;
|
||||
}
|
||||
|
||||
static void metricv_add(char* name, int code)
|
||||
static void metricv_add(char* name, int code, uint32_t props)
|
||||
{
|
||||
if (nmetrics >= VT_METRIC_MAXNUM) {
|
||||
vt_error_msg("Number of counters exceeds VampirTrace allowed maximum "
|
||||
@ -248,7 +248,7 @@ static void metricv_add(char* name, int code)
|
||||
metricv[nmetrics] = (struct metric*)malloc(sizeof(struct metric));
|
||||
metricv[nmetrics]->name = strdup(name);
|
||||
metricv[nmetrics]->descr[0] = '\0';
|
||||
metricv[nmetrics]->props = VT_CNTR_ACC;
|
||||
metricv[nmetrics]->props = props;
|
||||
metricv[nmetrics]->papi_code = code;
|
||||
nmetrics++;
|
||||
}
|
||||
@ -399,7 +399,7 @@ int vt_metric_open()
|
||||
char* env_sep;
|
||||
char* var;
|
||||
char* token;
|
||||
int forceprop;
|
||||
char* saveptr;
|
||||
PAPI_event_info_t info;
|
||||
metricmap_t* mapv = NULL;
|
||||
metricmap_t* map;
|
||||
@ -433,15 +433,19 @@ int vt_metric_open()
|
||||
vt_cntl_msg(2, "VT_METRICS=%s", var);
|
||||
|
||||
/* read metrics from specification string */
|
||||
token = strtok(var, env_sep);
|
||||
token = strtok_r(var, env_sep, &saveptr);
|
||||
while ( token && (nmetrics < VT_METRIC_MAXNUM) ) {
|
||||
/* set counter properties */
|
||||
uint32_t props;
|
||||
if (token[0]=='!')
|
||||
{
|
||||
forceprop=1;
|
||||
props = VT_CNTR_ABS | VT_CNTR_NEXT;
|
||||
token++;
|
||||
}
|
||||
else
|
||||
forceprop=0;
|
||||
{
|
||||
props = VT_CNTR_ACC;
|
||||
}
|
||||
/* search metricmap for a suitable definition */
|
||||
map = mapv;
|
||||
/*printf("Token%d: <%s>\n", nmetrics, token);*/
|
||||
@ -471,9 +475,9 @@ int vt_metric_open()
|
||||
/*printf("Event %s *N/A*\n", component);*/
|
||||
got_valid_match = 0;
|
||||
} else if ((k==0) && (len==0)) { /* use provided event name */
|
||||
metricv_add(token, code);
|
||||
metricv_add(token, code, props);
|
||||
} else { /* use alias component name */
|
||||
metricv_add(component, code);
|
||||
metricv_add(component, code, props);
|
||||
}
|
||||
k++;
|
||||
} while (got_valid_match && (len > 0));
|
||||
@ -499,12 +503,10 @@ int vt_metric_open()
|
||||
if (retval != PAPI_OK)
|
||||
vt_error_msg("Metric <%s> not available\n", component);
|
||||
|
||||
metricv_add(component, code);
|
||||
metricv_add(component, code, props);
|
||||
}
|
||||
|
||||
if (forceprop)
|
||||
metricv[nmetrics-1]->props = VT_CNTR_ABS | VT_CNTR_NEXT;
|
||||
token = strtok(NULL, env_sep);
|
||||
token = strtok_r(NULL, env_sep, &saveptr);
|
||||
}
|
||||
|
||||
/*printf("nmetrics=%d\n", nmetrics);*/
|
||||
|
@ -488,6 +488,9 @@ static void write_def_header(void)
|
||||
/* VT_METRICS */
|
||||
vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS: %s",
|
||||
vt_env_metrics() ? vt_env_metrics() : "<not set>");
|
||||
/* VT_METRICS_SEP */
|
||||
vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS_SEP: %s",
|
||||
vt_env_metrics_sep());
|
||||
#endif /* VT_METR */
|
||||
|
||||
#if defined(VT_RUSAGE)
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user