diff --git a/ompi/contrib/vt/vt/INSTALL b/ompi/contrib/vt/vt/INSTALL index 00b1a98f79..c8be645738 100644 --- a/ompi/contrib/vt/vt/INSTALL +++ b/ompi/contrib/vt/vt/INSTALL @@ -343,14 +343,17 @@ How to install and configure VampirTrace * Maybe you also need to set additional commands and flags for the back-end (e.g. RANLIB, AR, MPICC, CXXFLAGS). - For example, this configure command line works for an NEC SX6 system with an X86_64 - based front-end: + Examples: + BlueGene/P: + % ./configure --host=powerpc64-ibm-linux-gnu - ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc - AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++ - --host=sx6-nec-superux14.1 - --with-cross-prefix=sx - --with-otf-lib=-lotf + Cray XK6: + % ./configure --host=x86_64-cray-linux-gnu + CC_FOR_BUILD=craycc + CXX_FOR_BUILD=crayc++ + + NEC SX6: + % ./configure --host=sx6-nec-superux14.1 Set up the Environment diff --git a/ompi/contrib/vt/vt/config/defaults/crayxe b/ompi/contrib/vt/vt/config/defaults/crayxe index 25a13ec509..d6c01632cd 100644 --- a/ompi/contrib/vt/vt/config/defaults/crayxe +++ b/ompi/contrib/vt/vt/config/defaults/crayxe @@ -12,5 +12,9 @@ if test x"$XTPE_COMPILE_TARGET" != x; then FFLAGS="$FFLAGS -target=$XTPE_COMPILE_TARGET" FCFLAGS="$FCFLAGS -target=$XTPE_COMPILE_TARGET" fi +CC_FOR_BUILD="gcc" +CXX_FOR_BUILD="g++" +CFLAGS_FOR_BUILD="-O3" +CXXFLAGS_FOR_BUILD="-O3" enable_shared="no" with_mpich2="yes" diff --git a/ompi/contrib/vt/vt/doc/UserManual.html b/ompi/contrib/vt/vt/doc/UserManual.html index cb51db65a5..d804a1bee4 100644 --- a/ompi/contrib/vt/vt/doc/UserManual.html +++ b/ompi/contrib/vt/vt/doc/UserManual.html @@ -314,20 +314,20 @@ OpenMP events, and performance counters.

After a successful tracing run, VampirTrace writes all collected data to a trace file in the Open Trace Format (OTF)[*][*]. As a result, the information is available for post-mortem analysis and visualization by various tools. Most notably, VampirTrace provides the input data for the Vampir analysis and visualization tool[*][*].

VampirTrace is included in OpenMPI 1.3 and later versions. If not disabled explicitly, VampirTrace is built automatically when installing OpenMPI[*][*].

@@ -1143,7 +1143,7 @@ in a single file, that The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line. The lists end with END[_FILE]_<INCLUDE|EXCLUDE>_LIST. For further information on selective profiling have a look at the TAU documentation[*][*]. To announce the file through the compiler wrapper use the option -vt:tau:

@@ -1160,7 +1160,7 @@ Binary Instrumentation Using Dyninst
 The option -vt:inst dyninst is used with the compiler wrapper to 
 instrument the application during runtime (binary instrumentation), by using 
 Dyninst[*][*].
 Recompiling is not necessary for this kind of instrumentation,
 but relinking:
@@ -1311,7 +1311,7 @@ Tracing Calls to 3rd-Party Libraries
   VampirTrace is also capable to trace calls to third party libraries, which come with
   at least one C header file even without the library's source code. If VampirTrace was
   built with support for library tracing (the CTool library[*][*]  is required), the tool vtlibwrapgen can be used to
   generate a wrapper library to intercept each call to the actual library functions.
   This wrapper library can be linked to the application or used in combination with the
@@ -1903,7 +1903,7 @@ for the enhanced timer synchronization:
 
 
-For example, this configure command line works for an NEC SX6 system with an X86_64 based front-end:
+Examples:
+
+

+BlueGene/P:

-% ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 MPICC=sxmpicc
-              AR=sxar RANLIB="sxar st" CC_FOR_BUILD=cc CXX_FOR_BUILD=c++
-              --host=sx6-nec-superux14.1
-              --with-cross-prefix=sx
-              --with-otf-lib=-lotf
-
+% ./configure --host=powerpc64-ibm-linux-gnu +
+ +

+Cray XK6: + +

+

+% ./configure --host=x86_64-cray-linux-gnu
+              CC_FOR_BUILD=craycc
+              CXX_FOR_BUILD=crayc++
+
+ +

+NEC SX6: + +

+

+% ./configure --host=sx6-nec-superux14.1
+

@@ -5227,69 +5244,69 @@ If you provide us with your additions afterwards we will consider merging them into the official VampirTrace package.


Footnotes

-
... (OTF)... (OTF)[*]
http://www.tu-dresden.de/zih/otf
-
... tool ... tool [*]
http://www.vampir.eu
-
... +
... Open MPI [*]
http://www.open-mpi.org/faq/?category=vampirtrace
-
... documentation ... documentation [*]
-
http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling +
http://www.cs.uoregon.edu/Research/tau/docs/newguide/bk05ch02.html#d0e3770
-
... +
... Dyninst [*]
http://www.dyninst.org
-
... library ... library [*]
http://sourceforge.net/projects/ctool
-
... CLAPACK... CLAPACK[*]
www.netlib.org/clapack
-
... Dyninst ... Dyninst [*]
http://www.dyninst.org
-
... PDToolkit ... PDToolkit [*]
http://www.cs.uoregon.edu/research/pdt/home.php
-
... TAU ... TAU [*]
http://tau.uoregon.edu
-
... CTool ... CTool [*]
http://sourceforge.net/projects/ctool diff --git a/ompi/contrib/vt/vt/doc/UserManual.pdf b/ompi/contrib/vt/vt/doc/UserManual.pdf index 160c683abc..4614d486ae 100644 Binary files a/ompi/contrib/vt/vt/doc/UserManual.pdf and b/ompi/contrib/vt/vt/doc/UserManual.pdf differ diff --git a/ompi/contrib/vt/vt/extlib/otf/ChangeLog b/ompi/contrib/vt/vt/extlib/otf/ChangeLog index fcf1555661..91b4cccac7 100644 --- a/ompi/contrib/vt/vt/extlib/otf/ChangeLog +++ b/ompi/contrib/vt/vt/extlib/otf/ChangeLog @@ -1,3 +1,13 @@ +1.10.2openmpi + - otfdump: + - don't abort when reading events fails - the input tracefile + might only have statistics + - otfprofile: + - removed leading '=' from CSV lines to make it loadable into + spreadsheets (e.g. Open Office) + - fixed process naming in CSV output of collective operation + statistics + 1.10.1openmpi - fixed build issues in otfprofile[-mpi]: - added missing header includes for WEXITSTATUS diff --git a/ompi/contrib/vt/vt/extlib/otf/VERSION b/ompi/contrib/vt/vt/extlib/otf/VERSION index 690398d879..d3c76339e6 100644 --- a/ompi/contrib/vt/vt/extlib/otf/VERSION +++ b/ompi/contrib/vt/vt/extlib/otf/VERSION @@ -7,7 +7,7 @@ major=1 minor=10 -sub=1 +sub=2 # string is used for alpha, beta, or release tags. If it is non-empty, it will # be appended to the version number. @@ -48,5 +48,5 @@ string=openmpi # release, age must be incremented. Otherwise, reset age # to '0'. -library=4:1:3 +library=4:2:3 diff --git a/ompi/contrib/vt/vt/extlib/otf/otf_vc08.sln b/ompi/contrib/vt/vt/extlib/otf/otf_vc08.sln index 81363dba96..fc5e113dd3 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otf_vc08.sln +++ b/ompi/contrib/vt/vt/extlib/otf/otf_vc08.sln @@ -9,8 +9,6 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfcompress", "tools\otfcom EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfdump", "tools\otfdump\otfdump_vs08.vcproj", "{508A823C-8EAB-4C56-9DFE-4D85B0D24491}" EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "otfmerge", "tools\otfmerge\otfmerge_vs08.vcproj", "{1B9223C8-DE20-4538-8214-776C6017D22B}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Win32 = Debug|Win32 diff --git a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h index 36b688986a..6047832b48 100644 --- a/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h +++ b/ompi/contrib/vt/vt/extlib/otf/otflib/OTF_Version.h @@ -18,7 +18,7 @@ #define OTF_VERSION_MAJOR 1 #define OTF_VERSION_MINOR 10 -#define OTF_VERSION_SUB 1 +#define OTF_VERSION_SUB 2 #define OTF_VERSION_STRING "openmpi" /** diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp index 995666791a..71f0f08d16 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfdump/otfdump.cpp @@ -611,7 +611,7 @@ int main ( int argc, const char** argv ) { read = OTF_Reader_readDefinitions( reader, handlers ); if( read == OTF_READ_ERROR ) { - fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n"); + fprintf(stderr,"An error occurred while reading definitions. The tracefile seems to be damaged. Abort.\n"); return 1; } } @@ -624,8 +624,7 @@ int main ( int argc, const char** argv ) { read = OTF_Reader_readEvents( reader, handlers ); if( read == OTF_READ_ERROR ) { - fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n"); - return 1; + fprintf(stderr,"An error occurred while reading events. Maybe the tracefile has no events or it is damaged. Continue.\n"); } } @@ -637,7 +636,7 @@ int main ( int argc, const char** argv ) { read = OTF_Reader_readStatistics( reader, handlers ); if( read == OTF_READ_ERROR ) { - fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n"); + fprintf(stderr,"An error occurred while reading statistics. The tracefile seems to be damaged. Abort.\n"); return 1; } } @@ -649,7 +648,7 @@ int main ( int argc, const char** argv ) { } read = OTF_Reader_readSnapshots( reader, handlers ); if( read == OTF_READ_ERROR ) { - fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n"); + fprintf(stderr,"An error occurred while reading snapshots. The tracefile seems to be damaged. Abort.\n"); return 1; } } @@ -661,7 +660,7 @@ int main ( int argc, const char** argv ) { } read = OTF_Reader_readMarkers( reader, handlers ); if( read == OTF_READ_ERROR ) { - fprintf(stderr,"An error occurred while reading the tracefile. It seems to be damaged. Abort.\n"); + fprintf(stderr,"An error occurred while reading markers. The tracefile seems to be damaged. Abort.\n"); return 1; } } diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.cpp index 20ea546e31..25663aa02f 100644 --- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.cpp +++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile/create_csv.cpp @@ -94,7 +94,7 @@ static void write_func_data( AllData& alldata, ofstream& csvFile, " appending function data to file: %s\n", csvFileName.c_str() ); - static const string LINE_PREFIX= "==FUNCTION=="; + static const string LINE_PREFIX= "FUNCTION"; if ( 0 == alldata.myRank ) { @@ -145,7 +145,7 @@ static void write_counter_data( AllData& alldata, ofstream& csvFile, " appending counter data to file: %s\n", csvFileName.c_str() ); - static const string LINE_PREFIX= "==COUNTER=="; + static const string LINE_PREFIX= "COUNTER"; if ( 0 == alldata.myRank ) { @@ -218,7 +218,7 @@ static void write_p2p_data( AllData& alldata, ofstream& csvFile, " appending P2P message data to file: %s\n", csvFileName.c_str() ); - static const string LINE_PREFIX= "==P2P=="; + static const string LINE_PREFIX= "P2P"; if ( 0 == alldata.myRank ) { @@ -270,7 +270,7 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile, " appending collective op. data to file: %s\n", csvFileName.c_str() ); - static const string LINE_PREFIX= "==COLLOP=="; + static const string LINE_PREFIX= "COLLOP"; static map< uint64_t, string > op_class_names; if ( op_class_names.empty() ) { @@ -306,14 +306,14 @@ static void write_collop_data( AllData& alldata, ofstream& csvFile, const uint64_t& bytes_recv= it->second.bytes_recv.sum; const double duration= it->second.duration_send.sum / alldata.timerResolution; - const string& proc_name= alldata.processIdNameMap[ proc_id ]; + const string& proc_name= alldata.processIdNameMap[ proc_id ]; assert( 0 != proc_name.length() ); const string& op_class_name= op_class_names[ op_class ]; assert( 0 != op_class_name.length() ); csvFile << LINE_PREFIX << ';' - << proc_id << ';' + << proc_name << ';' << op_class_name << ';' << count_send << ';' << count_recv << ';' diff --git a/ompi/contrib/vt/vt/vtlib/vt_cudart.c b/ompi/contrib/vt/vt/vtlib/vt_cudart.c index a9c9c45909..74d469c56f 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_cudart.c +++ b/ompi/contrib/vt/vt/vtlib/vt_cudart.c @@ -92,13 +92,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL; } \ if(do_traceE){ \ if(_kind == cudaMemcpyHostToDevice){ \ - vt_mpi_rma_put(_ptid, &time, strmID * 65536 + vt_my_trace, \ + vt_mpi_rma_put(_ptid, &time, VT_GPU_RANK_ID(strmID), \ vt_gpu_commCID, 0, (uint64_t)_bytes); \ }else if(_kind == cudaMemcpyDeviceToHost){ \ - vt_mpi_rma_get(_ptid, &time, strmID * 65536 + vt_my_trace, \ + vt_mpi_rma_get(_ptid, &time, VT_GPU_RANK_ID(strmID), \ vt_gpu_commCID, 0, (uint64_t)_bytes); \ }else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \ - vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \ + vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \ vt_gpu_commCID, 0, (uint64_t)_bytes); \ } \ } \ @@ -142,7 +142,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL; \ if(do_traceE){\ vt_mpi_rma_get(vtSrcDev->strmList->tid, &time, \ - vtDstDev->strmList->tid * 65536 + vt_my_trace,\ + VT_GPU_RANK_ID(vtDstDev->strmList->tid),\ vt_gpu_commCID, 0, _bytes);\ }\ \ @@ -274,13 +274,13 @@ VTThrdMutex* VTThrdMutexCudart = NULL; VT_CUDART_CALL(cudaThreadSynchronize_ptr(),"vtcudaSync() failed!"); \ if(syncLevel > 1){time = vt_pform_wtime(); vt_exit(ptid, &time);} \ if(_kind == cudaMemcpyHostToDevice){ \ - vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, \ + vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID), \ vt_gpu_commCID, 0, _bytes); \ }else if(_kind == cudaMemcpyDeviceToHost){ \ - vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, \ + vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID), \ vt_gpu_commCID, 0, _bytes); \ }else if(_kind == cudaMemcpyDeviceToDevice && syncLevel > 2){ \ - vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, \ + vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), \ vt_gpu_commCID, 0, _bytes); \ CUDARTWRAP_LOCK(); \ vt_gpu_prop[strmID] |= VTGPU_GPU_COMM; \ @@ -1192,13 +1192,13 @@ static void VTCUDAflush(VTCUDADevice *vtDev, uint32_t ptid) }*/ if(mcpy->kind == cudaMemcpyHostToDevice){ - vt_mpi_rma_get(tid, &strttime, mcpy->pid * 65536 + vt_my_trace, + vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid), vt_gpu_commCID, 0, mcpy->byteCount); }else if(mcpy->kind == cudaMemcpyDeviceToHost){ - vt_mpi_rma_put(tid, &strttime, mcpy->pid * 65536 + vt_my_trace, + vt_mpi_rma_put(tid, &strttime, VT_GPU_RANK_ID(mcpy->pid), vt_gpu_commCID, 0, mcpy->byteCount); }else if(mcpy->kind == cudaMemcpyDeviceToDevice){ - vt_mpi_rma_get(tid, &strttime, tid * 65536 + vt_my_trace, + vt_mpi_rma_get(tid, &strttime, VT_GPU_RANK_ID(tid), vt_gpu_commCID, 0, mcpy->byteCount); } diff --git a/ompi/contrib/vt/vt/vtlib/vt_cupti_activity.c b/ompi/contrib/vt/vt/vtlib/vt_cupti_activity.c index f2365cfd89..0ba5234b11 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_cupti_activity.c +++ b/ompi/contrib/vt/vt/vtlib/vt_cupti_activity.c @@ -113,7 +113,7 @@ static uint8_t vt_cuptiact_finalized = 0; static uint8_t *vt_cuptiact_global_buffer = NULL;*/ /* size of the activity buffer */ -static size_t vt_cuptiact_bufSize = VTGPU_DEFAULT_BSIZE; +static size_t vt_cuptiact_bufSize = VT_CUPTI_ACT_DEFAULT_BSIZE; /* cupti activity specific kernel counter IDs */ static uint32_t vt_cuptiact_cid_knStaticSharedMem = VT_NO_ID; @@ -224,9 +224,9 @@ void vt_cupti_activity_init() void vt_cupti_activity_finalize() { - if(!vt_cuptiact_finalized){ + if(!vt_cuptiact_finalized && vt_cuptiact_initialized){ VT_CUPTI_ACT_LOCK(); - if(!vt_cuptiact_finalized){ + if(!vt_cuptiact_finalized && vt_cuptiact_initialized){ vt_cntl_msg(2, "[CUPTI Activity] Finalizing ... "); vt_cuptiact_finalized = 1; @@ -476,7 +476,8 @@ static vt_cuptiact_ctx_t* vt_cuptiact_getCtx(CUcontext cuCtx) } /* - * Destroy a VampirTrace CUPTI Activity context. + * Check for a VampirTrace activity stream by stream ID. If it does not exist, + * create it. * * @param vtCtx VampirTrace CUPTI Activity context * @param strmID the CUDA stream ID provided by CUPTI callback API @@ -682,19 +683,19 @@ static void vt_cuptiact_writeKernelRecord(CUpti_ActivityKernel *kernel, /* if current activity's start time is before last written timestamp */ if(start < vtStrm->vtLastTime){ - vt_warning("[CUPTI Activity] Kernel start time < last written timestamp!"); + vt_warning("[CUPTI Activity] Kernel: start time < last written timestamp!"); return; } /* check if time between start and stop is increasing */ if(stop < start){ - vt_warning("[CUPTI Activity] Kernel start time > kernel stop time!"); + vt_warning("[CUPTI Activity] Kernel: start time > stop time!"); return; } /* check if synchronization stop time is before kernel stop time */ if(vtCtx->sync.hostStop < stop){ - vt_warning("[CUPTI Activity] Sync stop time < kernel stop time!"); + vt_warning("[CUPTI Activity] Kernel: sync stop time < stop time!"); return; } @@ -805,19 +806,19 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy, /* if current activity's start time is before last written timestamp */ if(start < vtStrm->vtLastTime){ - vt_cntl_msg(1, "[CUPTI Activity] Memcpy start time < last written timestamp!"); + vt_cntl_msg(1, "[CUPTI Activity] Memcpy: start time < last written timestamp!"); return; } /* check if time between start and stop is increasing */ if(stop < start){ - vt_warning("[CUPTI Activity] Memcpy start time > kernel stop time!"); + vt_warning("[CUPTI Activity] Memcpy: start time > stop time!"); return; } /* check if synchronization stop time is before kernel stop time */ if(vtCtx->sync.hostStop < stop){ - vt_warning("[CUPTI Activity] Synchronization stop time < kernel stop time!"); + vt_warning("[CUPTI Activity] Memcpy: sync stop time < stop time!"); return; } @@ -848,13 +849,13 @@ static void vt_cuptiact_writeMemcpyRecord(CUpti_ActivityMemcpy *mcpy, vt_warning("MCPYexit: %llu (%d)", stop, vtThrdID); */ if(kind == VT_GPU_HOST2DEV){ - vt_mpi_rma_get(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace, + vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid), vt_gpu_commCID, 0, mcpy->bytes); }else if(kind == VT_GPU_DEV2HOST){ - vt_mpi_rma_put(vtThrdID, &start, vtCtx->ptid * 65536 + vt_my_trace, + vt_mpi_rma_put(vtThrdID, &start, VT_GPU_RANK_ID(vtCtx->ptid), vt_gpu_commCID, 0, mcpy->bytes); }else if(kind == VT_GPU_DEV2DEV){ - vt_mpi_rma_get(vtThrdID, &start, vtThrdID * 65536 + vt_my_trace, + vt_mpi_rma_get(vtThrdID, &start, VT_GPU_RANK_ID(vtThrdID), vt_gpu_commCID, 0, mcpy->bytes); } diff --git a/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.c b/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.c index 334c03934e..52db96b7d1 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.c +++ b/ompi/contrib/vt/vt/vtlib/vt_cupti_callback.c @@ -1464,13 +1464,13 @@ static void vt_cupticb_handle_cudart_memcpy( /*time = vt_pform_wtime();*/ if(kind == cudaMemcpyHostToDevice){ - vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, + vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID), vt_gpu_commCID, 0, bytes); }else if(kind == cudaMemcpyDeviceToHost){ - vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, + vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID), vt_gpu_commCID, 0, bytes); }else if(kind == cudaMemcpyDeviceToDevice){ - vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, + vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), vt_gpu_commCID, 0, bytes); } } @@ -1528,13 +1528,13 @@ static void vt_cupticb_handle_cudart_mcpyAsync(const CUpti_CallbackData *cbInfo, time = vt_pform_wtime(); if(kind == cudaMemcpyHostToDevice){ - vt_mpi_rma_put(ptid, &time, strmID * 65536 + vt_my_trace, + vt_mpi_rma_put(ptid, &time, VT_GPU_RANK_ID(strmID), vt_gpu_commCID, 0, bytes); }else if(kind == cudaMemcpyDeviceToHost){ - vt_mpi_rma_get(ptid, &time, strmID * 65536 + vt_my_trace, + vt_mpi_rma_get(ptid, &time, VT_GPU_RANK_ID(strmID), vt_gpu_commCID, 0, bytes); }else if(kind == cudaMemcpyDeviceToDevice){ - vt_mpi_rma_get(strmID, &time, strmID * 65536 + vt_my_trace, + vt_mpi_rma_get(strmID, &time, VT_GPU_RANK_ID(strmID), vt_gpu_commCID, 0, bytes); } } @@ -1692,12 +1692,14 @@ void vt_cupti_callback_init() #endif /* set callback for CUDA runtime API functions */ - #if (defined(CUPTI_API_VERSION) && (CUPTI_API_VERSION >= 2)) +#if defined(VT_CUPTI_ACTIVITY) if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy || vt_cupti_trace_gpu_mem > 0){ vt_cupti_set_callback(vt_cupticb_all_ptr, CUPTI_CB_DOMAIN_RESOURCE, CUPTI_RUNTIME_TRACE_CBID_INVALID); + + vt_cupti_activity_init(); } if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy){ @@ -1725,26 +1727,18 @@ void vt_cupti_callback_init() CUPTI_CB_DOMAIN_DRIVER_API, CUPTI_DRIVER_TRACE_CBID_cuCtxCreate); */ - #else +#else if(vt_cupticb_trace_runtimeAPI){ vt_cupti_set_callback(vt_cupticb_cudart_ptr, CUPTI_CB_DOMAIN_RUNTIME_API, CUPTI_RUNTIME_TRACE_CBID_INVALID); } - #endif +#endif /* reset the hash table for CUDA API functions */ memset(vt_cupticb_cudaApiFuncTab, VT_NO_ID, VT_CUPTICB_CUDA_API_FUNC_MAX * sizeof(uint32_t)); - /* use CUPTI activity for kernel and memcpy tracing, if it is available */ -#if defined(VT_CUPTI_ACTIVITY) - if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy || - vt_cupti_trace_gpu_mem > 0){ - vt_cupti_activity_init(); - } -#endif - /* register the finalize function of VampirTrace CUPTI to be called before * the program exits */ atexit(vt_cupti_callback_finalize); @@ -1769,7 +1763,10 @@ void vt_cupti_callback_finalize() vt_cntl_msg(2, "[CUPTI Callbacks] Finalizing ... "); #if defined(VT_CUPTI_ACTIVITY) - vt_cupti_activity_finalize(); + if(vt_cupti_trace_kernels > 0 || vt_cupti_trace_mcpy || + vt_cupti_trace_gpu_mem > 0){ + vt_cupti_activity_finalize(); + } #endif VT_CUPTI_CALL(cuptiUnsubscribe(vt_cupticb_subscriber), diff --git a/ompi/contrib/vt/vt/vtlib/vt_gpu.h b/ompi/contrib/vt/vt/vtlib/vt_gpu.h index 9c9856d4c0..2cdfcecb65 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_gpu.h +++ b/ompi/contrib/vt/vt/vtlib/vt_gpu.h @@ -19,6 +19,7 @@ # define EXTERN extern #endif +#include "vt_defs.h" #include "vt_inttypes.h" /* VampirTrace integer types */ #include "vt_thrd.h" /* thread creation for GPU kernels */ #include "vt_trc.h" /* VampirTrace events */ @@ -39,6 +40,12 @@ /* performance counter available? */ #define VTGPU_NO_PC 0x04 /* no performance counter for this thread available */ +/* + * Get the rank ID for a given VampirTrace thread ID. + * The MPI RMA functions take the rank ID instead of the VampirTrace process ID! + */ +#define VT_GPU_RANK_ID(thread_id) \ + (VT_PROCESS_ID(vt_my_trace, thread_id)-1) #if (defined(VT_CUDARTWRAP) || defined(VT_CUPTI)) /* diff --git a/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c b/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c index e20812d135..7e7d6e3620 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c +++ b/ompi/contrib/vt/vt/vtlib/vt_metric_papi.c @@ -88,8 +88,8 @@ static struct metric* metricv[VT_METRIC_MAXNUM]; static int nmetrics = 0; static metricmap_t* metricmap_append(metricmap_t* map, - metmap_t type, - char* event, char* alias) + metmap_t type, + char* event, char* alias) { /*printf("Def 0x%X %s = <%s>\n", type, event, alias);*/ @@ -115,7 +115,7 @@ static metricmap_t* metricmap_append(metricmap_t* map, map->event_name = strdup(event); map->alias_name = strdup(alias); map->next = NULL; - + return map; } @@ -170,10 +170,10 @@ static metricmap_t* vt_metricmap_init(metmap_t match) if (!specfile) return NULL; - fp = fopen(specfile, "r"); + fp = fopen(specfile, "r"); if (fp == NULL) { vt_cntl_msg(2, "Failed to open metric specification %s: %s", - specfile, strerror(errno)); + specfile, strerror(errno)); return NULL; } @@ -186,7 +186,7 @@ static metricmap_t* vt_metricmap_init(metmap_t match) while (len && ((line[len-1] == ' ') || (line[len-1] == '\t'))) len--; line[len] = '\0'; /* chop comment and return */ lineno++; - if (len <= 1) continue; + if (len <= 1) continue; defs++; if (!strncmp("measure", line, 7)) type=METMAP_MEASURE; else if (!strncmp("compose", line, 7)) type=METMAP_COMPOSE; @@ -208,16 +208,16 @@ static metricmap_t* vt_metricmap_init(metmap_t match) /*printf("Def %2d:<%s> %s <%s>\n", defs, def_name, line, def_args);*/ len = strlen(def_args); /* length of definition arguments */ if (((type == METMAP_MEASURE) && (match & METMAP_MEASURE)) || - ((type == METMAP_AGGROUP) && (match & METMAP_AGGROUP))) { + ((type == METMAP_AGGROUP) && (match & METMAP_AGGROUP))) { if (((int)strcspn(def_args, "=+") != len) || - (((int)strcspn(def_args, "=+-*/ \t") != len) - && (type == METMAP_MEASURE))) { - type = METMAP_INVALID; - invalid_defs++; - vt_cntl_msg(2, "XXXX Def %d:%s <%s> invalid!", lineno, line, def_name); + (((int)strcspn(def_args, "=+-*/ \t") != len) + && (type == METMAP_MEASURE))) { + type = METMAP_INVALID; + invalid_defs++; + vt_cntl_msg(2, "XXXX Def %d:%s <%s> invalid!", lineno, line, def_name); } else { - map = metricmap_append(map, type, def_name, def_args); - measure_defs++; + map = metricmap_append(map, type, def_name, def_args); + measure_defs++; } } else if ((type == METMAP_COMPOSE) && (match & METMAP_COMPOSE)) { map = metricmap_append(map, type, def_name, def_args); @@ -229,26 +229,26 @@ static metricmap_t* vt_metricmap_init(metmap_t match) if (mapv == NULL) mapv = map; /* initialise head of vector */ } vt_cntl_msg(2, "Mapped %d/%d defs from \"%s\"", - measure_defs+aggroup_defs+compose_defs+compute_defs, defs, specfile); + measure_defs+aggroup_defs+compose_defs+compute_defs, defs, specfile); #if 0 - printf("measure %d aggroup %d compose %d compute %d unknown %d invalid %d\n", - measure_defs, aggroup_defs, compose_defs, compute_defs, - unknown_defs, invalid_defs); + printf("measure %d aggroup %d compose %d compute %d unknown %d invalid %d\n", + measure_defs, aggroup_defs, compose_defs, compute_defs, + unknown_defs, invalid_defs); #endif fclose(fp); return mapv; } -static void metricv_add(char* name, int code) +static void metricv_add(char* name, int code, uint32_t props) { if (nmetrics >= VT_METRIC_MAXNUM) { vt_error_msg("Number of counters exceeds VampirTrace allowed maximum " - "of %d", VT_METRIC_MAXNUM); + "of %d", VT_METRIC_MAXNUM); } else { metricv[nmetrics] = (struct metric*)malloc(sizeof(struct metric)); metricv[nmetrics]->name = strdup(name); metricv[nmetrics]->descr[0] = '\0'; - metricv[nmetrics]->props = VT_CNTR_ACC; + metricv[nmetrics]->props = props; metricv[nmetrics]->papi_code = code; nmetrics++; } @@ -296,14 +296,14 @@ static void metric_descriptions(void) if (strcmp(info.long_descr, metricv[i]->name) != 0) { strncpy(metricv[i]->descr, info.long_descr, sizeof(metricv[i]->descr)); - + /* tidy description if necessary */ j=strlen(metricv[i]->descr)-1; if (metricv[i]->descr[j] == '\n') metricv[i]->descr[j]='\0'; j=strlen(metricv[i]->descr)-1; if (metricv[i]->descr[j] != '.') - strncat(metricv[i]->descr, ".", - sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + strncat(metricv[i]->descr, ".", + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); } if (metricv[i]->papi_code & PAPI_PRESET_MASK) { /* PAPI preset */ @@ -314,21 +314,21 @@ static void metric_descriptions(void) strncat(metricv[i]->descr, info.name[0], sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); for (k=1; k<(int)info.count; k++) { - char op[4]; - postfix_chp = postfix_chp?strpbrk(++postfix_chp, "+-*/"):NULL; - sprintf(op, " %c ", (postfix_chp?*postfix_chp:derive_ch)); - strncat(metricv[i]->descr, op, - sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); - strncat(metricv[i]->descr, info.name[k], - sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + char op[4]; + postfix_chp = postfix_chp?strpbrk(++postfix_chp, "+-*/"):NULL; + sprintf(op, " %c ", (postfix_chp?*postfix_chp:derive_ch)); + strncat(metricv[i]->descr, op, + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + strncat(metricv[i]->descr, info.name[k], + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); } strncat(metricv[i]->descr, " ]", sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); if (strcmp(info.symbol, metricv[i]->name) != 0) { /* add preset name */ - strncat(metricv[i]->descr, " = ", - sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); - strncat(metricv[i]->descr, info.symbol, - sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + strncat(metricv[i]->descr, " = ", + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); + strncat(metricv[i]->descr, info.symbol, + sizeof(metricv[i]->descr)-strlen(metricv[i]->descr)); } } @@ -341,7 +341,7 @@ static void metric_test(void) { int i, j; int retval; - + int component; struct eventmap_t * EventSet[VT_METRIC_MAXNUM]; for (i=0; iEventId); if ( retval != PAPI_OK ) metric_error(retval, "PAPI_cleanup_eventset"); - + retval = PAPI_destroy_eventset(&(EventSet[i]->EventId)); if ( retval != PAPI_OK ) metric_error(retval, "PAPI_destroy_eventset"); @@ -399,12 +399,12 @@ int vt_metric_open() char* env_sep; char* var; char* token; - int forceprop; + char* saveptr; PAPI_event_info_t info; metricmap_t* mapv = NULL; metricmap_t* map; - /* read environment variable "VT_METRICS". Return if + /* read environment variable "VT_METRICS". Return if uset and no PAPI timer used. */ env = vt_env_metrics(); if( env == NULL ) @@ -431,56 +431,60 @@ int vt_metric_open() var = strdup(env); vt_cntl_msg(2, "VT_METRICS=%s", var); - + /* read metrics from specification string */ - token = strtok(var, env_sep); + token = strtok_r(var, env_sep, &saveptr); while ( token && (nmetrics < VT_METRIC_MAXNUM) ) { + /* set counter properties */ + uint32_t props; if (token[0]=='!') { - forceprop=1; + props = VT_CNTR_ABS | VT_CNTR_NEXT; token++; } else - forceprop=0; + { + props = VT_CNTR_ACC; + } /* search metricmap for a suitable definition */ map = mapv; /*printf("Token%d: <%s>\n", nmetrics, token);*/ while (map != NULL) { if ( strcmp(map->event_name, token) == 0 ) { - /*printf("Definition %s = <%s>\n", token, map->alias_name);*/ - /* expand definition and set components */ - char* c_token = map->alias_name; - int len = strcspn(c_token, " \t"); /* first token */ - int got_valid_match = 1; /* to be verified */ - int k = 0; - do { /* verify each component of definition is available */ - char component[64]; - int code = -1; - strncpy(component, c_token, len); - component[len] = '\0'; - /*printf("Comp[%d] <%s>\n", k, component);*/ - c_token += len + strspn(c_token+len, " \t"); - len = strcspn(c_token, " \t"); /* next token */ + /*printf("Definition %s = <%s>\n", token, map->alias_name);*/ + /* expand definition and set components */ + char* c_token = map->alias_name; + int len = strcspn(c_token, " \t"); /* first token */ + int got_valid_match = 1; /* to be verified */ + int k = 0; + do { /* verify each component of definition is available */ + char component[64]; + int code = -1; + strncpy(component, c_token, len); + component[len] = '\0'; + /*printf("Comp[%d] <%s>\n", k, component);*/ + c_token += len + strspn(c_token+len, " \t"); + len = strcspn(c_token, " \t"); /* next token */ - PAPI_event_name_to_code(component, &code); - memset(&info, 0, sizeof(PAPI_event_info_t)); - retval = PAPI_get_event_info(code, &info); - /*printf("v[%d] %s [0x%X] %d\n", k, component, code, info.count);*/ - - if (info.count == 0) { - /*printf("Event %s *N/A*\n", component);*/ - got_valid_match = 0; - } else if ((k==0) && (len==0)) { /* use provided event name */ - metricv_add(token, code); - } else { /* use alias component name */ - metricv_add(component, code); - } - k++; - } while (got_valid_match && (len > 0)); - if (got_valid_match) { - /*printf("Definition %s = <%s> OK\n", map->event_name, map->alias_name);*/ - break; /* accept this event definition */ - } + PAPI_event_name_to_code(component, &code); + memset(&info, 0, sizeof(PAPI_event_info_t)); + retval = PAPI_get_event_info(code, &info); + /*printf("v[%d] %s [0x%X] %d\n", k, component, code, info.count);*/ + + if (info.count == 0) { + /*printf("Event %s *N/A*\n", component);*/ + got_valid_match = 0; + } else if ((k==0) && (len==0)) { /* use provided event name */ + metricv_add(token, code, props); + } else { /* use alias component name */ + metricv_add(component, code, props); + } + k++; + } while (got_valid_match && (len > 0)); + if (got_valid_match) { + /*printf("Definition %s = <%s> OK\n", map->event_name, map->alias_name);*/ + break; /* accept this event definition */ + } } map = map->next; } @@ -491,20 +495,18 @@ int vt_metric_open() /*printf("Comp[X] <%s>\n", component);*/ retval = PAPI_event_name_to_code(component, &code); if (retval != PAPI_OK || code == -1) - vt_error_msg("Metric <%s> not supported\n", component); + vt_error_msg("Metric <%s> not supported\n", component); memset(&info, 0, sizeof(PAPI_event_info_t)); retval = PAPI_get_event_info(code, &info); /*printf("v[%d] %s [0x%X] %d\n", nmetrics, component, code, info.count);*/ if (retval != PAPI_OK) - vt_error_msg("Metric <%s> not available\n", component); + vt_error_msg("Metric <%s> not available\n", component); - metricv_add(component, code); + metricv_add(component, code, props); } - if (forceprop) - metricv[nmetrics-1]->props = VT_CNTR_ABS | VT_CNTR_NEXT; - token = strtok(NULL, env_sep); + token = strtok_r(NULL, env_sep, &saveptr); } /*printf("nmetrics=%d\n", nmetrics);*/ @@ -526,7 +528,7 @@ int vt_metric_open() void vt_metric_close() { int i; - + for ( i = 0; i < nmetrics; i++ ) { free (metricv[i]->name); free(metricv[i]); @@ -628,7 +630,7 @@ void vt_metric_free(struct vt_metv* metv, uint32_t tid) } VT_RESUME_IO_TRACING(tid); - + free(metv); } @@ -639,7 +641,7 @@ void vt_metric_thread_init(long (*id_fn)(void)) if ( nmetrics == 0 ) return; - retval = PAPI_thread_init((unsigned long (*)(void))(id_fn)); + retval = PAPI_thread_init((unsigned long (*)(void))(id_fn)); if ( retval != PAPI_OK) metric_error(retval, "PAPI_thread_init"); vt_cntl_msg(2, "PAPI thread support initialized"); @@ -718,12 +720,12 @@ uint64_t vt_metric_clckrt(void) if (!PAPI_is_initialized()) { /* initialize PAPI, since it hasn't already been initialized */ - int retval = PAPI_library_init(PAPI_VER_CURRENT); + int retval = PAPI_library_init(PAPI_VER_CURRENT); if ( retval != PAPI_VER_CURRENT ) metric_error(retval, "PAPI_library_init"); } - hwinfo = PAPI_get_hardware_info(); + hwinfo = PAPI_get_hardware_info(); if ( hwinfo == NULL) vt_error_msg("Failed to access PAPI hardware info\n"); vt_cntl_msg(2, "Clock rate: %f MHz", hwinfo->mhz); diff --git a/ompi/contrib/vt/vt/vtlib/vt_trc.c b/ompi/contrib/vt/vt/vtlib/vt_trc.c index 2598244c3a..a64e6fd71b 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_trc.c +++ b/ompi/contrib/vt/vt/vtlib/vt_trc.c @@ -488,6 +488,9 @@ static void write_def_header(void) /* VT_METRICS */ vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS: %s", vt_env_metrics() ? vt_env_metrics() : ""); + /* VT_METRICS_SEP */ + vt_def_comment(VT_MASTER_THREAD, VT_UNIFY_STRID_VT_COMMENT" VT_METRICS_SEP: %s", + vt_env_metrics_sep()); #endif /* VT_METR */ #if defined(VT_RUSAGE)