1
1
- general:
		- corrected OTF version number
	- otfprofile:
		- removed leading '=' from CSV lines to make it loadable into spreadsheets (e.g. Open Office)
		- fixed process naming in CSV output of collective operation statistics
Changes to VT:
	- configure:
		- added *_FOR_BUILD variables to CrayXE's default configure options; required for cross-building
	- VT libs:
		- fixed GPU communication, due to new process ID splitting
		- fixed parsing of PAPI native events in VT_METRICS; use strtok_r instead of strtok which is successively called inPAPI_event_name_to_code
		- added VT_METRICS_SEP to definition comments (-> Vampir's trace info)
	- Docu:
		- fixed link to TAU Reference Guide

This commit was SVN r26137.
Этот коммит содержится в:
Matthias Jurenz 2012-03-14 12:36:28 +00:00
родитель 524de80eaa
Коммит 03ea0245f0
16 изменённых файлов: 223 добавлений и 182 удалений

Просмотреть файл

@ -343,14 +343,17 @@ How to install and configure VampirTrace
* Maybe you also need to set additional commands and flags for the back-end
(e.g. RANLIB, AR, MPICC, CXXFLAGS).
For example, this configure command line works for an NEC SX6 system with an X86_64
based front-end:
Examples:
BlueGene/P:
% ./configure --host=powerpc64-ibm-linux-gnu
./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
--host=sx6-nec-superux14.1
--with-cross-prefix=sx
--with-otf-lib=-lotf
Cray XK6:
% ./configure --host=x86_64-cray-linux-gnu
CC_FOR_BUILD=craycc
CXX_FOR_BUILD=crayc++
NEC SX6:
% ./configure --host=sx6-nec-superux14.1
Set up the Environment

Просмотреть файл

@ -12,5 +12,9 @@ if test x"$XTPE_COMPILE_TARGET" != x; then
FFLAGS="$FFLAGS -target=$XTPE_COMPILE_TARGET"
FCFLAGS="$FCFLAGS -target=$XTPE_COMPILE_TARGET"
fi
CC_FOR_BUILD="gcc"
CXX_FOR_BUILD="g++"
CFLAGS_FOR_BUILD="-O3"
CXXFLAGS_FOR_BUILD="-O3"
enable_shared="no"
with_mpich2="yes"

Просмотреть файл

@ -314,20 +314,20 @@ OpenMP events, and performance counters.
<P>
After a successful tracing run, VampirTrace writes all collected data to a
trace file in the Open Trace Format (OTF)<A NAME="tex2html1"
HREF="#foot1523"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1530"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
As a result, the information is available for post-mortem analysis and
visualization by various tools.
Most notably, VampirTrace provides the input data for the Vampir analysis
and visualization tool<A NAME="tex2html2"
HREF="#foot1524"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1531"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
<P>
VampirTrace is included in OpenMPI&nbsp;1.3 and later versions.
If not disabled explicitly, VampirTrace is built automatically when installing
OpenMPI<A NAME="tex2html3"
HREF="#foot1525"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1532"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
<P>
@ -1143,7 +1143,7 @@ in a single file, that
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
The lists end with <TT>END[_FILE]_&lt;INCLUDE|EXCLUDE&gt;_LIST</TT>. For further information on selective
profiling have a look at the TAU documentation<A NAME="tex2html4"
HREF="#foot1549"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1556"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
<PRE>
@ -1160,7 +1160,7 @@ Binary Instrumentation Using Dyninst
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
instrument the application during runtime (binary instrumentation), by using
Dyninst<A NAME="tex2html5"
HREF="#foot1550"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1557"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
Recompiling is not necessary for this kind of instrumentation,
but relinking:
@ -1311,7 +1311,7 @@ Tracing Calls to 3rd-Party Libraries
VampirTrace is also capable to trace calls to third party libraries, which come with
at least one C header file even without the library's source code. If VampirTrace was
built with support for library tracing (the CTool library<A NAME="tex2html6"
HREF="#foot1551"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1558"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is required), the tool <TT>vtlibwrapgen</TT> can be used to
generate a wrapper library to intercept each call to the actual library functions.
This wrapper library can be linked to the application or used in combination with the
@ -1903,7 +1903,7 @@ for the enhanced timer synchronization:
<UL>
<LI>CLAPACK <A NAME="tex2html7"
HREF="#foot1561"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1568"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
</LI>
<LI>AMD ACML
@ -3441,7 +3441,7 @@ default: automatically by configure.
enable support for Dyninst instrumentation,
default: enable if found by configure.
<SPAN CLASS="textbf">Note:</SPAN> Requires Dyninst<A NAME="tex2html8"
HREF="#foot1589"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1596"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> version 6.1 or higher!
<P>
@ -3463,9 +3463,9 @@ enable support for automatic source code
instrumentation by using TAU, default: enable if
found by configure.
<SPAN CLASS="textbf">Note:</SPAN> Requires PDToolkit<A NAME="tex2html9"
HREF="#foot1590"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1597"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> or TAU<A NAME="tex2html10"
HREF="#foot1591"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1598"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>!
<P>
@ -4034,7 +4034,7 @@ give the path for JVMTI-include files, default:
<P>
To enable support for generating wrapper for 3th-Party libraries the C code parser CTool<A NAME="tex2html11"
HREF="#foot1592"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1599"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A> is needed:
<P>
@ -4165,15 +4165,32 @@ are shown below:
<LI>Maybe you also need to set additional commands and flags for the back-end (e.g.&nbsp;<TT>RANLIB</TT>, <TT>AR</TT>, <TT>MPICC</TT>, <TT>CXXFLAGS</TT>).
</LI>
</UL>
For example, this <TT>configure</TT> command line works for an NEC SX6 system with an X86_64 based front-end:
Examples:
<P>
BlueGene/P:
<P>
<PRE>
% ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
--host=sx6-nec-superux14.1
--with-cross-prefix=sx
--with-otf-lib=-lotf
% ./configure --host=powerpc64-ibm-linux-gnu
</PRE>
<P>
Cray XK6:
<P>
<PRE>
% ./configure --host=x86_64-cray-linux-gnu
CC_FOR_BUILD=craycc
CXX_FOR_BUILD=crayc++
</PRE>
<P>
NEC SX6:
<P>
<PRE>
% ./configure --host=sx6-nec-superux14.1
</PRE>
<P>
@ -5227,69 +5244,69 @@ If you provide us with your additions afterwards we will consider merging them
into the official VampirTrace package.
<BR><HR><H4>Footnotes</H4>
<DL>
<DT><A NAME="foot1523">... (OTF)</A><A
<DT><A NAME="foot1530">... (OTF)</A><A
HREF="#tex2html1"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://www.tu-dresden.de/zih/otf
</DD>
<DT><A NAME="foot1524">... tool </A><A
<DT><A NAME="foot1531">... tool </A><A
HREF="#tex2html2"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://www.vampir.eu
</DD>
<DT><A NAME="foot1525">...
<DT><A NAME="foot1532">...
Open MPI </A><A
HREF="#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://www.open-mpi.org/faq/?category=vampirtrace
</DD>
<DT><A NAME="foot1549">... documentation </A><A
<DT><A NAME="foot1556">... documentation </A><A
HREF="#tex2html4"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling
<DD>http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
</DD>
<DT><A NAME="foot1550">...
<DT><A NAME="foot1557">...
Dyninst </A><A
HREF="#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://www.dyninst.org
</DD>
<DT><A NAME="foot1551">... library </A><A
<DT><A NAME="foot1558">... library </A><A
HREF="#tex2html6"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://sourceforge.net/projects/ctool
</DD>
<DT><A NAME="foot1561">... CLAPACK</A><A
<DT><A NAME="foot1568">... CLAPACK</A><A
HREF="#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>www.netlib.org/clapack
</DD>
<DT><A NAME="foot1589">... Dyninst </A><A
<DT><A NAME="foot1596">... Dyninst </A><A
HREF="#tex2html8"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://www.dyninst.org
</DD>
<DT><A NAME="foot1590">... PDToolkit </A><A
<DT><A NAME="foot1597">... PDToolkit </A><A
HREF="#tex2html9"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://www.cs.uoregon.edu/research/pdt/home.php
</DD>
<DT><A NAME="foot1591">... TAU </A><A
<DT><A NAME="foot1598">... TAU </A><A
HREF="#tex2html10"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://tau.uoregon.edu
</DD>
<DT><A NAME="foot1592">... CTool </A><A
<DT><A NAME="foot1599">... CTool </A><A
HREF="#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD>http://sourceforge.net/projects/ctool

Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf

Двоичный файл не отображается.

Просмотреть файл

@ -1,3 +1,13 @@
1.10.2openmpi
- otfdump:
- don't abort when reading events fails - the input tracefile
might only have statistics
- otfprofile:
- removed leading '=' from CSV lines to make it loadable into
spreadsheets (e.g. Open Office)
- fixed process naming in CSV output of collective operation
statistics
1.10.1openmpi
- fixed build issues in otfprofile[-mpi]:
- added missing header includes for WEXITSTATUS

Просмотреть файл

@ -7,7 +7,7 @@
major=1
minor=10
sub=1
sub=2
# string is used for alpha, beta, or release tags. If it is non-empty, it will
# be appended to the version number.
@ -48,5 +48,5 @@ string=openmpi
# release, age must be incremented. Otherwise, reset age
# to '0'.
library=4:1:3
library=4:2:3

Просмотреть файл

@ -9,8 +9,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfcompress", "tools\otfcom
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfdump", "tools\otfdump\otfdump_vs08.vcproj", "{508A823C-8EAB-4C56-9DFE-4D85B0D24491}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfmerge", "tools\otfmerge\otfmerge_vs08.vcproj", "{1B9223C8-DE20-4538-8214-776C6017D22B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32

Просмотреть файл

@ -18,7 +18,7 @@
#define OTF_VERSION_MAJOR 1
#define OTF_VERSION_MINOR 10
#define OTF_VERSION_SUB 1
#define OTF_VERSION_SUB 2
#define OTF_VERSION_STRING "openmpi"
/**

Просмотреть файл

@ -611,7 +611,7 @@ int main ( int argc, const char** argv ) {
read = OTF_Reader_readDefinitions( reader, handlers );
if( read == OTF_READ_ERROR ) {
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
fprintf(stderr,"An error occurred while reading definitions. The tracefile seems to be damaged. Abort.\n");
return 1;
}
}
@ -624,8 +624,7 @@ int main ( int argc, const char** argv ) {
read = OTF_Reader_readEvents( reader, handlers );
if( read == OTF_READ_ERROR ) {
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
return 1;
fprintf(stderr,"An error occurred while reading events. Maybe the tracefile has no events or it is damaged. Continue.\n");
}
}
@ -637,7 +636,7 @@ int main ( int argc, const char** argv ) {
read = OTF_Reader_readStatistics( reader, handlers );
if( read == OTF_READ_ERROR ) {
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
fprintf(stderr,"An error occurred while reading statistics. The tracefile seems to be damaged. Abort.\n");
return 1;
}
}
@ -649,7 +648,7 @@ int main ( int argc, const char** argv ) {
}
read = OTF_Reader_readSnapshots( reader, handlers );
if( read == OTF_READ_ERROR ) {
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
fprintf(stderr,"An error occurred while reading snapshots. The tracefile seems to be damaged. Abort.\n");
return 1;
}
}
@ -661,7 +660,7 @@ int main ( int argc, const char** argv ) {
}
read = OTF_Reader_readMarkers( reader, handlers );
if( read == OTF_READ_ERROR ) {
fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n");
fprintf(stderr,"An error occurred while reading markers. The tracefile seems to be damaged. Abort.\n");
return 1;
}
}

Просмотреть файл

@ -94,7 +94,7 @@ static void write_func_data( AllData& alldata, ofstream& csvFile,
" appending function data to file: %s\n",
csvFileName.c_str() );
static const string LINE_PREFIX= "==FUNCTION==";
static const string LINE_PREFIX= "FUNCTION";
if ( 0 == alldata.myRank ) {
@ -145,7 +145,7 @@ static void write_counter_data( AllData& alldata, ofstream& csvFile,
" appending counter data to file: %s\n",
csvFileName.c_str() );
static const string LINE_PREFIX= "==COUNTER==";
static const string LINE_PREFIX= "COUNTER";
if ( 0 == alldata.myRank ) {
@ -218,7 +218,7 @@ static void write_p2p_data( AllData& alldata, ofstream& csvFile,
" appending P2P message data to file: %s\n",
csvFileName.c_str() );
static const string LINE_PREFIX= "==P2P==";
static const string LINE_PREFIX= "P2P";
if ( 0 == alldata.myRank ) {
@ -270,7 +270,7 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile,
" appending collective op. data to file: %s\n",
csvFileName.c_str() );
static const string LINE_PREFIX= "==COLLOP==";
static const string LINE_PREFIX= "COLLOP";
static map< uint64_t, string > op_class_names;
if ( op_class_names.empty() ) {
@ -313,7 +313,7 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile,
assert( 0 != op_class_name.length() );
csvFile << LINE_PREFIX << ';'
<< proc_id << ';'
<< proc_name << ';'
<< op_class_name << ';'
<< count_send << ';'
<< count_recv << ';'

Просмотреть файл

@ -92,13 +92,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
} \
if(do_traceE){ \
if(_kind == cudaMemcpyHostToDevice){ \
vt_mpi_rma_put(_ptid, &time, strmID * 65536 + vt_my_trace, \
vt_mpi_rma_put(_ptid, &time, VT_GPU_RANK_ID(strmID), \
vt_gpu_commCID, 0, (uint64_t)_bytes); \
}else if(_kind == cudaMemcpyDeviceToHost){ \
vt_mpi_rma_get(_ptid, &time, strmID * 65536 + vt_my_trace, \
vt_mpi_rma_get(_ptid, &time, VT_GPU_RANK_ID(strmID), \
vt_gpu_commCID, 0, (uint64_t)_bytes); \
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \
vt_gpu_commCID, 0, (uint64_t)_bytes); \
} \
} \
@ -142,7 +142,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
\
if(do_traceE){\
vt_mpi_rma_get(vtSrcDev->strmList->tid, &time, \
vtDstDev->strmList->tid * 65536 + vt_my_trace,\
VT_GPU_RANK_ID(vtDstDev->strmList->tid),\
vt_gpu_commCID, 0, _bytes);\
}\
\
@ -274,13 +274,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \
if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);} \
if(_kind == cudaMemcpyHostToDevice){ \
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, \
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID), \
vt_gpu_commCID, 0, _bytes); \
}else if(_kind == cudaMemcpyDeviceToHost){ \
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, \
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID), \
vt_gpu_commCID, 0, _bytes); \
}else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \
vt_gpu_commCID, 0, _bytes); \
CUDARTWRAP_LOCK(); \
vt_gpu_prop[strmID] |= VTGPU_GPU_COMM; \
@ -1192,13 +1192,13 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid)
}*/
if(mcpy->kind == cudaMemcpyHostToDevice){
vt_mpi_rma_get(tid, &strttime, mcpy->pid * 65536 + vt_my_trace,
vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid),
vt_gpu_commCID, 0, mcpy->byteCount);
}else if(mcpy->kind == cudaMemcpyDeviceToHost){
vt_mpi_rma_put(tid, &strttime, mcpy->pid * 65536 + vt_my_trace,
vt_mpi_rma_put(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid),
vt_gpu_commCID, 0, mcpy->byteCount);
}else if(mcpy->kind == cudaMemcpyDeviceToDevice){
vt_mpi_rma_get(tid, &strttime, tid * 65536 + vt_my_trace,
vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(tid),
vt_gpu_commCID, 0, mcpy->byteCount);
}

Просмотреть файл

@ -113,7 +113,7 @@ static uint8_t vt_cuptiact_finalized = 0;
static uint8_t *vt_cuptiact_global_buffer = NULL;*/
/* size of the activity buffer */
static size_t vt_cuptiact_bufSize = VTGPU_DEFAULT_BSIZE;
static size_t vt_cuptiact_bufSize = VT_CUPTI_ACT_DEFAULT_BSIZE;
/* cupti activity specific kernel counter IDs */
static uint32_t vt_cuptiact_cid_knStaticSharedMem = VT_NO_ID;
@ -224,9 +224,9 @@ void vt_cupti_activity_init()
void vt_cupti_activity_finalize()
{
if(!vt_cuptiact_finalized){
if(!vt_cuptiact_finalized && vt_cuptiact_initialized){
VT_CUPTI_ACT_LOCK();
if(!vt_cuptiact_finalized){
if(!vt_cuptiact_finalized && vt_cuptiact_initialized){
vt_cntl_msg(2, "[CUPTI Activity] Finalizing ... ");
vt_cuptiact_finalized = 1;
@ -476,7 +476,8 @@ static vt_cuptiact_ctx_t* vt_cuptiact_getCtx(CUcontext cuCtx)
}
/*
* Destroy a VampirTrace CUPTI Activity context.
* Check for a VampirTrace activity stream by stream ID. If it does not exist,
* create it.
*
* @param vtCtx VampirTrace CUPTI Activity context
* @param strmID the CUDA stream ID provided by CUPTI callback API
@ -682,19 +683,19 @@ static void vt_cuptiact_writeKernelRecord(CUpti_ActivityKernel *kernel,
/* if current activity's start time is before last written timestamp */
if(start < vtStrm->vtLastTime){
vt_warning("[CUPTI Activity] Kernel start time < last written timestamp!");
vt_warning("[CUPTI Activity] Kernel: start time < last written timestamp!");
return;
}
/* check if time between start and stop is increasing */
if(stop < start){
vt_warning("[CUPTI Activity] Kernel start time > kernel stop time!");
vt_warning("[CUPTI Activity] Kernel: start time > stop time!");
return;
}
/* check if synchronization stop time is before kernel stop time */
if(vtCtx->sync.hostStop < stop){
vt_warning("[CUPTI Activity] Sync stop time < kernel stop time!");
vt_warning("[CUPTI Activity] Kernel: sync stop time < stop time!");
return;
}
@ -805,19 +806,19 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy,
/* if current activity's start time is before last written timestamp */
if(start < vtStrm->vtLastTime){
vt_cntl_msg(1, "[CUPTI Activity] Memcpy start time < last written timestamp!");
vt_cntl_msg(1, "[CUPTI Activity] Memcpy: start time < last written timestamp!");
return;
}
/* check if time between start and stop is increasing */
if(stop < start){
vt_warning("[CUPTI Activity] Memcpy start time > kernel stop time!");
vt_warning("[CUPTI Activity] Memcpy: start time > stop time!");
return;
}
/* check if synchronization stop time is before kernel stop time */
if(vtCtx->sync.hostStop < stop){
vt_warning("[CUPTI Activity] Synchronization stop time < kernel stop time!");
vt_warning("[CUPTI Activity] Memcpy: sync stop time < stop time!");
return;
}
@ -848,13 +849,13 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy,
vt_warning("MCPYexit: %llu (%d)", stop, vtThrdID);
*/
if(kind == VT_GPU_HOST2DEV){
vt_mpi_rma_get(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace,
vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid),
vt_gpu_commCID, 0, mcpy->bytes);
}else if(kind == VT_GPU_DEV2HOST){
vt_mpi_rma_put(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace,
vt_mpi_rma_put(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid),
vt_gpu_commCID, 0, mcpy->bytes);
}else if(kind == VT_GPU_DEV2DEV){
vt_mpi_rma_get(vtThrdID, &start, vtThrdID * 65536 + vt_my_trace,
vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtThrdID),
vt_gpu_commCID, 0, mcpy->bytes);
}

Просмотреть файл

@ -1464,13 +1464,13 @@ static void vt_cupticb_handle_cudart_memcpy(
/*time = vt_pform_wtime();*/
if(kind == cudaMemcpyHostToDevice){
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace,
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID),
vt_gpu_commCID, 0, bytes);
}else if(kind == cudaMemcpyDeviceToHost){
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace,
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID),
vt_gpu_commCID, 0, bytes);
}else if(kind == cudaMemcpyDeviceToDevice){
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace,
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),
vt_gpu_commCID, 0, bytes);
}
}
@ -1528,13 +1528,13 @@ static void vt_cupticb_handle_cudart_mcpyAsync(const CUpti_CallbackData *cbInfo,
time = vt_pform_wtime();
if(kind == cudaMemcpyHostToDevice){
vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace,
vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID),
vt_gpu_commCID, 0, bytes);
}else if(kind == cudaMemcpyDeviceToHost){
vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace,
vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID),
vt_gpu_commCID, 0, bytes);
}else if(kind == cudaMemcpyDeviceToDevice){
vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace,
vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID),
vt_gpu_commCID, 0, bytes);
}
}
@ -1692,12 +1692,14 @@ void vt_cupti_callback_init()
#endif
/* set callback for CUDA runtime API functions */
#if (defined(CUPTI_API_VERSION) && (CUPTI_API_VERSION >= 2))
#if defined(VT_CUPTI_ACTIVITY)
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
vt_cupti_trace_gpu_mem > 0){
vt_cupti_set_callback(vt_cupticb_all_ptr,
CUPTI_CB_DOMAIN_RESOURCE,
CUPTI_RUNTIME_TRACE_CBID_INVALID);
vt_cupti_activity_init();
}
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy){
@ -1725,26 +1727,18 @@ void vt_cupti_callback_init()
CUPTI_CB_DOMAIN_DRIVER_API,
CUPTI_DRIVER_TRACE_CBID_cuCtxCreate);
*/
#else
#else
if(vt_cupticb_trace_runtimeAPI){
vt_cupti_set_callback(vt_cupticb_cudart_ptr,
CUPTI_CB_DOMAIN_RUNTIME_API,
CUPTI_RUNTIME_TRACE_CBID_INVALID);
}
#endif
#endif
/* reset the hash table for CUDA API functions */
memset(vt_cupticb_cudaApiFuncTab, VT_NO_ID,
VT_CUPTICB_CUDA_API_FUNC_MAX * sizeof(uint32_t));
/* use CUPTI activity for kernel and memcpy tracing, if it is available */
#if defined(VT_CUPTI_ACTIVITY)
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
vt_cupti_trace_gpu_mem > 0){
vt_cupti_activity_init();
}
#endif
/* register the finalize function of VampirTrace CUPTI to be called before
* the program exits */
atexit(vt_cupti_callback_finalize);
@ -1769,7 +1763,10 @@ void vt_cupti_callback_finalize()
vt_cntl_msg(2, "[CUPTI Callbacks] Finalizing ... ");
#if defined(VT_CUPTI_ACTIVITY)
if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy ||
vt_cupti_trace_gpu_mem > 0){
vt_cupti_activity_finalize();
}
#endif
VT_CUPTI_CALL(cuptiUnsubscribe(vt_cupticb_subscriber),

Просмотреть файл

@ -19,6 +19,7 @@
# define EXTERN extern
#endif
#include "vt_defs.h"
#include "vt_inttypes.h" /* VampirTrace integer types */
#include "vt_thrd.h" /* thread creation for GPU kernels */
#include "vt_trc.h" /* VampirTrace events */
@ -39,6 +40,12 @@
/* performance counter available? */
#define VTGPU_NO_PC 0x04 /* no performance counter for this thread available */
/*
* Get the rank ID for a given VampirTrace thread ID.
* The MPI RMA functions take the rank ID instead of the VampirTrace process ID!
*/
#define VT_GPU_RANK_ID(thread_id) \
(VT_PROCESS_ID(vt_my_trace, thread_id)-1)
#if (defined(VT_CUDARTWRAP) || defined(VT_CUPTI))
/*

Просмотреть файл

@ -239,7 +239,7 @@ static metricmap_t* vt_metricmap_init(metmap_t match)
return mapv;
}
static void metricv_add(char* name, int code)
static void metricv_add(char* name, int code, uint32_t props)
{
if (nmetrics >= VT_METRIC_MAXNUM) {
vt_error_msg("Number of counters exceeds VampirTrace allowed maximum "
@ -248,7 +248,7 @@ static void metricv_add(char* name, int code)
metricv[nmetrics] = (struct metric*)malloc(sizeof(struct metric));
metricv[nmetrics]->name = strdup(name);
metricv[nmetrics]->descr[0] = '\0';
metricv[nmetrics]->props = VT_CNTR_ACC;
metricv[nmetrics]->props = props;
metricv[nmetrics]->papi_code = code;
nmetrics++;
}
@ -399,7 +399,7 @@ int vt_metric_open()
char* env_sep;
char* var;
char* token;
int forceprop;
char* saveptr;
PAPI_event_info_t info;
metricmap_t* mapv = NULL;
metricmap_t* map;
@ -433,15 +433,19 @@ int vt_metric_open()
vt_cntl_msg(2, "VT_METRICS=%s", var);
/* read metrics from specification string */
token = strtok(var, env_sep);
token = strtok_r(var, env_sep, &saveptr);
while ( token && (nmetrics < VT_METRIC_MAXNUM) ) {
/* set counter properties */
uint32_t props;
if (token[0]=='!')
{
forceprop=1;
props = VT_CNTR_ABS | VT_CNTR_NEXT;
token++;
}
else
forceprop=0;
{
props = VT_CNTR_ACC;
}
/* search metricmap for a suitable definition */
map = mapv;
/*printf("Token%d: <%s>\n", nmetrics, token);*/
@ -471,9 +475,9 @@ int vt_metric_open()
/*printf("Event %s *N/A*\n", component);*/
got_valid_match = 0;
} else if ((k==0) && (len==0)) { /* use provided event name */
metricv_add(token, code);
metricv_add(token, code, props);
} else { /* use alias component name */
metricv_add(component, code);
metricv_add(component, code, props);
}
k++;
} while (got_valid_match && (len > 0));
@ -499,12 +503,10 @@ int vt_metric_open()
if (retval != PAPI_OK)
vt_error_msg("Metric <%s> not available\n", component);
metricv_add(component, code);
metricv_add(component, code, props);
}
if (forceprop)
metricv[nmetrics-1]->props = VT_CNTR_ABS | VT_CNTR_NEXT;
token = strtok(NULL, env_sep);
token = strtok_r(NULL, env_sep, &saveptr);
}
/*printf("nmetrics=%d\n", nmetrics);*/

Просмотреть файл

@ -488,6 +488,9 @@ static void write_def_header(void)
/* VT_METRICS */
vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS: %s",
vt_env_metrics() ? vt_env_metrics() : "<not set>");
/* VT_METRICS_SEP */
vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS_SEP: %s",
vt_env_metrics_sep());
#endif /* VT_METR */
#if defined(VT_RUSAGE)