Changes to OTF:
- improved zlib compression - otfprofile-mpi: - fixed progress Changes to VT: - fixed C++ linker issue for manual instrumentation of multiple files - fixed CUDA kernel launch configuration - process and thread buffer size can be explicitly specified by the user via the environment variables VT_BUFFER_SIZE and VT_THREAD_BUFFER_SIZE - fixed CUDA buffer management - vtfilter: - fixed progress - vtwrapper: - link CUPTI library, if available - vtsetup: - removed fixed path to *.dtd file in vtsetup-data.xml[.in] (fixes 'java.net.MalformedURLException') This commit was SVN r24950.
Этот коммит содержится в:
родитель
c1ab24c687
Коммит
4ca70e5c91
@ -1,7 +1,10 @@
|
||||
5.11.1openmpi
|
||||
- updated version of internal OTF to 1.9.1openmpi
|
||||
- updated version of internal OTF to 1.9.1sawfish
|
||||
(see extlib/otf/ChangeLog)
|
||||
- process and thread buffer size can be explicitly specified by the user
|
||||
via the environment variables VT_BUFFER_SIZE and VT_THREAD_BUFFER_SIZE
|
||||
- improved filtering of CUDA kernels
|
||||
- fixed CUDA kernel launch configuration
|
||||
- fixed unification of local process group definitions
|
||||
- fixed wrapper generation for MPI implementations which don't support
|
||||
the MPI-2 standard
|
||||
@ -9,6 +12,8 @@
|
||||
VT is configured without trace compression support
|
||||
- fixed detection of OpenMP flag '-qsmp=*:omp:*' in the compiler
|
||||
wrappers
|
||||
- fixed C++ linker issue for manual instrumentation of multiple files
|
||||
- bugfix to OPARI (see tools/opari/ChangeLog:19)
|
||||
|
||||
5.11
|
||||
- updated version of internal OTF to 1.9sawfish
|
||||
|
@ -93,6 +93,8 @@ AC_PROG_F77
|
||||
AC_PROG_FC
|
||||
AM_PROG_AS
|
||||
|
||||
AC_LANG([C])
|
||||
|
||||
AS_IF([test x"$F77" != x],
|
||||
[AC_DEFINE([HAVE_F77], [1], [Define to 1 if VT is configured with Fortran 77 support.])])
|
||||
AM_CONDITIONAL(AMHAVEF77, test x"$F77" != x)
|
||||
@ -125,9 +127,6 @@ AC_CHECK_PROG(JAVA, java, java)
|
||||
|
||||
ACVT_CONF_TITLE([Header files, types, and functions])
|
||||
|
||||
# Added by Open MPI
|
||||
AC_LANG([C])
|
||||
|
||||
# Check for header files
|
||||
AC_HEADER_STDC
|
||||
AC_CHECK_HEADERS([stdint.h inttypes.h fnmatch.h sys/param.h])
|
||||
|
@ -252,20 +252,20 @@ OpenMP events, and performance counters.
|
||||
<P>
|
||||
After a successful tracing run, VampirTrace writes all collected data to a
|
||||
trace file in the Open Trace Format (OTF)<A NAME="tex2html3"
|
||||
HREF="#foot1146"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1153"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
As a result, the information is available for post-mortem analysis and
|
||||
visualization by various tools.
|
||||
Most notably, VampirTrace provides the input data for the Vampir analysis
|
||||
and visualization tool<A NAME="tex2html5"
|
||||
HREF="#foot1147"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1154"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
VampirTrace is included in OpenMPI 1.3 and later versions.
|
||||
If not disabled explicitly, VampirTrace is built automatically when installing
|
||||
OpenMPI<A NAME="tex2html7"
|
||||
HREF="#foot1148"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1155"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
|
||||
<P>
|
||||
@ -1083,7 +1083,7 @@ in a single file, that
|
||||
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
||||
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
||||
profiling have a look at the TAU documentation<A NAME="tex2html11"
|
||||
HREF="#foot1172"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1179"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
||||
<PRE>
|
||||
@ -1100,7 +1100,7 @@ Binary Instrumentation Using Dyninst
|
||||
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
||||
instrument the application during runtime (binary instrumentation), by using
|
||||
Dyninst<A NAME="tex2html13"
|
||||
HREF="#foot1173"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1180"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||
Recompiling is not necessary for this kind of instrumentation,
|
||||
but relinking:
|
||||
@ -1407,6 +1407,13 @@ of a VampirTrace instrumented executable:
|
||||
⇒ Section <A HREF="#sec:trace_file_size">3.3</A></TD>
|
||||
<TD ALIGN="LEFT">32M</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_THREAD_BUFFER_SIZE"></A><TT>VT_THREAD_BUFFER_SIZE</TT></TD>
|
||||
<TD ALIGN="LEFT">Size of internal event trace buffer for threads. If not defined, the
|
||||
size is set to 10% of <TT>VT_BUFFER_SIZE</TT>.
|
||||
<BR>
|
||||
⇒ Section <A HREF="#sec:trace_file_size">3.3</A></TD>
|
||||
<TD ALIGN="LEFT">0</TD>
|
||||
</TR>
|
||||
<TR><TD ALIGN="LEFT"><A NAME="VT_SETUP_VT_CLEAN"></A><TT>VT_CLEAN</TT></TD>
|
||||
<TD ALIGN="LEFT">Remove temporary trace files?</TD>
|
||||
<TD ALIGN="LEFT">yes</TD>
|
||||
@ -1701,6 +1708,17 @@ enough memory is available to hold the VampirTrace buffer and the application
|
||||
data, parts of the application may be swapped to disk, leading
|
||||
to a significant change in the behavior of the application.
|
||||
|
||||
<P>
|
||||
In multi-threaded applications a single buffer cannot be shared across a process
|
||||
and the associated threads for performance reasons. Thus independent
|
||||
buffers are created for every process and thread, at which the process buffer
|
||||
size is 70% and the thread buffer size is 10% of the value set in
|
||||
<TT>VT_BUFFER_SIZE</TT>. The buffer size of processes and threads can be
|
||||
explicitly specified setting the environment variable
|
||||
<TT>VT_THREAD_BUFFER_SIZE</TT>, which defines the buffer size of a thread,
|
||||
whereas the buffer size of a process is then defined by the value of
|
||||
<TT>VT_BUFFER_SIZE</TT>.
|
||||
|
||||
<P>
|
||||
Note that you can decrease the size of trace files significantly by
|
||||
using the runtime function filtering as explained in Section <A HREF="#sec:function_filter">5.1</A>.
|
||||
@ -1820,7 +1838,7 @@ for the enhanced timer synchronization:
|
||||
|
||||
<UL>
|
||||
<LI>CLAPACK<A NAME="tex2html15"
|
||||
HREF="#foot1183"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
HREF="#foot1190"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
||||
</LI>
|
||||
<LI>AMD ACML
|
||||
@ -2211,9 +2229,9 @@ Example: <TT>VT_CUPTI_METRICS=local_store:local_load</TT>
|
||||
</DL>
|
||||
|
||||
<P>
|
||||
Until CUDA Runtime Version 4.0 and CUDA Driver for Linux 270.27 the usage of
|
||||
CUDA events between asynchronous tasks serializes their on-device execution.
|
||||
This seems to be a bug, which has already been reported to NVIDIA.
|
||||
Until CUDA Runtime Version 4.0 and CUDA Driver for Linux 270.41.19
|
||||
the usage of CUDA events between asynchronous tasks serializes their on-device
|
||||
execution. This seems to be a bug, which has already been reported to NVIDIA.
|
||||
As VampirTrace uses CUDA events for time measurement and asynchronous tasks
|
||||
may overlap (depends on the CUDA device capability), there might be a sensible
|
||||
impact on the program flow.
|
||||
@ -2223,7 +2241,7 @@ Until CUDA Runtime Version 4.0 and CUDA Driver for Linux 270.27 the usage of
|
||||
<P>
|
||||
|
||||
<H3><A NAME="SECTION00550100000000000000">
|
||||
Available CUDA Counter</A>
|
||||
CUDA runtime API Counter</A>
|
||||
</H3>
|
||||
If <TT>VT_CUDATRACE_GPUMEMUSAGE</TT> is enabled,
|
||||
<SPAN CLASS="textit">cudaMalloc</SPAN> and <SPAN CLASS="textit">cudaFree</SPAN> functions will be tracked to write
|
||||
@ -2237,6 +2255,21 @@ There are three counters, which provide some information about the kernel
|
||||
<P>
|
||||
|
||||
<H3><A NAME="SECTION00550200000000000000">
|
||||
CUDA Performance Counters - CUPTI Events</A>
|
||||
</H3>
|
||||
To capture performance counters in CUDA applications, CUPTI metrics can be
|
||||
specified with the environment variable <TT>VT_CUPTI_METRICS</TT>. Metrics
|
||||
are separated by default with '':`` or user specified by
|
||||
<TT>VT_METRICS_SEP</TT>. The <SPAN CLASS="textit">CUPTI User's Guide</SPAN> provides
|
||||
information about the available counters.
|
||||
Due to the use of asynchronous CUDA events in a GPU stream, VampirTrace
|
||||
currently writes CUPTI counters directly on the process stream, which created
|
||||
the GPU stream. Future implementations will write these counter information
|
||||
on the GPU stream, where the corresponding CUDA kernel runs on.
|
||||
|
||||
<P>
|
||||
|
||||
<H3><A NAME="SECTION00550300000000000000">
|
||||
Compile and Link CUDA applications</A>
|
||||
</H3>
|
||||
Use the VampirTrace compiler wrapper <TT>vtnvcc</TT> instead of <TT>nvcc</TT>
|
||||
@ -2293,7 +2326,7 @@ With the library tracing mechanism described in section <A HREF="#sec:3rd-party"
|
||||
|
||||
<P>
|
||||
|
||||
<H3><A NAME="SECTION00550300000000000000">
|
||||
<H3><A NAME="SECTION00550400000000000000">
|
||||
Tracing the NVIDIA CUDA SDK 3.x and 4.0</A>
|
||||
</H3>
|
||||
To get some example traces, replace the compiler commands in the common
|
||||
@ -2319,7 +2352,7 @@ Tracing the NVIDIA CUDA SDK 3.x and 4.0</A>
|
||||
<BR>
|
||||
<P>
|
||||
|
||||
<H3><A NAME="SECTION00550400000000000000">
|
||||
<H3><A NAME="SECTION00550500000000000000">
|
||||
Multithreaded CUDA applications</A>
|
||||
</H3>
|
||||
If threads are used to invoke asynchronous CUDA tasks, make sure to call a
|
||||
@ -2331,7 +2364,7 @@ Multithreaded CUDA applications</A>
|
||||
<BR>
|
||||
<P>
|
||||
|
||||
<H3><A NAME="SECTION00550500000000000000">
|
||||
<H3><A NAME="SECTION00550600000000000000">
|
||||
Mixed Use of CUDA runtime and driver API</A>
|
||||
</H3>
|
||||
As CUDA runtime API may implicitly create and destroy CUDA contexts, there
|
||||
@ -2341,21 +2374,6 @@ Mixed Use of CUDA runtime and driver API</A>
|
||||
which used the asynchronous tasks, before the other API closes its thread or
|
||||
context - cudaThreadExit() for runtime API and cuCtxDestroy() for driver API.
|
||||
Otherwise not yet flushed, asynchronous tasks will be missing in the final trace.
|
||||
|
||||
<P>
|
||||
|
||||
<H3><A NAME="SECTION00550600000000000000">
|
||||
CUDA Performance Counters - CUPTI Events</A>
|
||||
</H3>
|
||||
To capture performance counters in CUDA applications, CUPTI metrics can be
|
||||
specified with the environment variable <TT>VT_CUPTI_METRICS</TT>. Metrics
|
||||
are separated by default with '':`` or user specified by
|
||||
<TT>VT_METRICS_SEP</TT>. The <SPAN CLASS="textit">CUPTI User's Guide</SPAN> provides
|
||||
information about the available counters.
|
||||
Due to the use of asynchronous CUDA events in a GPU stream, VampirTrace
|
||||
currently writes CUPTI counters directly on the process stream, which created
|
||||
the GPU stream. Future implementations will write these counter information
|
||||
on the GPU stream, where the corresponding CUDA kernel runs on.
|
||||
|
||||
<P><P>
|
||||
<BR>
|
||||
@ -2364,11 +2382,11 @@ CUDA Performance Counters - CUPTI Events</A>
|
||||
<P>
|
||||
<SPAN CLASS="textbf">Note:</SPAN>
|
||||
<BR>
|
||||
For 64-bit systems VampirTrace has to be configured with the 64-bit
|
||||
For 32-bit systems VampirTrace has to be configured with the 32-bit
|
||||
version of cuda runtime library. If the link test fails, use the
|
||||
following configure option (⇒<A HREF="#sec:configure">A.2</A>):
|
||||
<PRE>
|
||||
--with-cudart-lib-dir=$CUDA_INSTALL_PATH/lib64
|
||||
--with-cudart-lib-dir=$CUDA_INSTALL_PATH/lib
|
||||
</PRE>
|
||||
|
||||
<P>
|
||||
@ -3381,21 +3399,21 @@ by the Linux 2.6 kernel are shown in the table.
|
||||
<P>
|
||||
<BR><HR><H4>Footnotes</H4>
|
||||
<DL>
|
||||
<DT><A NAME="foot1146">... (OTF)</A><A
|
||||
<DT><A NAME="foot1153">... (OTF)</A><A
|
||||
HREF="UserManual.html#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD><TT><A NAME="tex2html4"
|
||||
HREF="http://www.tu-dresden.de/zih/otf">http://www.tu-dresden.de/zih/otf</A></TT>
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1147">... tool </A><A
|
||||
<DT><A NAME="foot1154">... tool </A><A
|
||||
HREF="UserManual.html#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD><TT><A NAME="tex2html6"
|
||||
HREF="http://www.vampir.eu">http://www.vampir.eu</A></TT>
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1148">...
|
||||
<DT><A NAME="foot1155">...
|
||||
Open MPI </A><A
|
||||
HREF="UserManual.html#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
@ -3403,14 +3421,14 @@ Open MPI </A><A
|
||||
HREF="http://www.open-mpi.org/faq/?category=vampirtrace">http://www.open-mpi.org/faq/?category=vampirtrace</A></TT>
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1172">... documentation </A><A
|
||||
<DT><A NAME="foot1179">... documentation </A><A
|
||||
HREF="UserManual.html#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD><TT><A NAME="tex2html12"
|
||||
HREF="http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling">http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling</A></TT>
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1173">...
|
||||
<DT><A NAME="foot1180">...
|
||||
Dyninst </A><A
|
||||
HREF="UserManual.html#tex2html13"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
@ -3418,7 +3436,7 @@ Dyninst </A><A
|
||||
HREF="http://www.dyninst.org">http://www.dyninst.org</A></TT>
|
||||
|
||||
</DD>
|
||||
<DT><A NAME="foot1183">... CLAPACK</A><A
|
||||
<DT><A NAME="foot1190">... CLAPACK</A><A
|
||||
HREF="UserManual.html#tex2html15"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||
<DD><TT><A NAME="tex2html16"
|
||||
|
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичный файл не отображается.
@ -1,4 +1,4 @@
|
||||
<vtconfig vtconfigversion="1.0" vtversion="5.10">
|
||||
<vtconfig vtconfigversion="1.0" vtversion="@PACKAGE_VERSION@">
|
||||
|
||||
<config attr="nm">@VT_SETUP_NM@</config>
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
1.9.1openmpi
|
||||
1.9.1sawfish
|
||||
- improved zlib compression
|
||||
- added progress display to otfprofile-mpi
|
||||
- use pdf[la]tex instead of latex/dvipdf to convert otfprofile-mpi's
|
||||
TeX output to PDF
|
||||
|
@ -22,9 +22,7 @@ AC_SUBST([OTF_VERSION_LIBRARY])
|
||||
AC_PROG_CXX
|
||||
AC_PROG_CC
|
||||
|
||||
# Added by Open MPI
|
||||
AC_LANG([C])
|
||||
|
||||
AC_PROG_LIBTOOL
|
||||
|
||||
AC_C_BIGENDIAN
|
||||
|
@ -91,8 +91,6 @@ struct struct_OTF_File {
|
||||
|
||||
/** zlib entry buffer ... what a nice wordplay */
|
||||
unsigned char* zbuffer;
|
||||
|
||||
unsigned char* ybuffer;
|
||||
|
||||
uint32_t zbuffersize;
|
||||
|
||||
@ -127,7 +125,6 @@ void OTF_File_init( OTF_File* file ) {
|
||||
#ifdef HAVE_ZLIB
|
||||
file->z= NULL;
|
||||
file->zbuffer= NULL;
|
||||
file->ybuffer= NULL;
|
||||
file->zbuffersize= 1024*10;
|
||||
#endif /* HAVE_ZLIB */
|
||||
file->pos= 0;
|
||||
@ -148,7 +145,6 @@ void OTF_File_finalize( OTF_File* file ) {
|
||||
#ifdef HAVE_ZLIB
|
||||
file->z= NULL;
|
||||
file->zbuffer= NULL;
|
||||
file->ybuffer= NULL;
|
||||
file->zbuffersize= 0;
|
||||
#endif /* HAVE_ZLIB */
|
||||
file->pos= 0;
|
||||
@ -218,16 +214,13 @@ OTF_File* OTF_File_open_with_external_buffer( uint32_t len, const char* buffer,
|
||||
inflateInit( ret->z );
|
||||
|
||||
ret->zbuffer= malloc( ret->zbuffersize );
|
||||
ret->ybuffer= malloc( ret->zbuffersize );
|
||||
if( NULL == ret->zbuffer || NULL == ret->ybuffer) {
|
||||
if( NULL == ret->zbuffer ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"no memory left.\n", __FUNCTION__, __FILE__, __LINE__ );
|
||||
|
||||
free( ret->zbuffer );
|
||||
free( ret->ybuffer );
|
||||
ret->zbuffer= NULL;
|
||||
ret->ybuffer= NULL;
|
||||
free( ret->z );
|
||||
ret->z= NULL;
|
||||
free( ret );
|
||||
@ -263,11 +256,9 @@ OTF_File* OTF_File_open_with_external_buffer( uint32_t len, const char* buffer,
|
||||
size_t OTF_File_write( OTF_File* file, const void* ptr, size_t size ) {
|
||||
|
||||
|
||||
size_t byteswritten;
|
||||
size_t byteswritten = 0;
|
||||
|
||||
#ifdef HAVE_ZLIB
|
||||
int len = 0;
|
||||
int rest = (int) size;
|
||||
int status;
|
||||
#endif/* HAVE_ZLIB */
|
||||
|
||||
@ -309,122 +300,46 @@ size_t OTF_File_write( OTF_File* file, const void* ptr, size_t size ) {
|
||||
#ifdef HAVE_ZLIB
|
||||
|
||||
if ( NULL != file->z ) {
|
||||
|
||||
/* step 1 */
|
||||
/* is any data in the y-buffer */
|
||||
if ( 0 < file->z->avail_in ) {
|
||||
|
||||
/* len of the piece to fill the y buffer (to 10Kbyte) */
|
||||
len = file->zbuffersize - file->z->avail_in;
|
||||
|
||||
/* is enough data in the "*ptr" to fill the ybuffer fully */
|
||||
if ( len <= rest ) {
|
||||
|
||||
memcpy( file->ybuffer + file->z->avail_in, ptr, len );
|
||||
file->z->avail_in = file->zbuffersize;
|
||||
file->z->next_in = file->ybuffer;
|
||||
file->z->avail_out = file->zbuffersize;
|
||||
file->z->next_out = file->zbuffer;
|
||||
|
||||
status = deflate( file->z, Z_FULL_FLUSH );
|
||||
if ( status != Z_OK ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"error in compressing, status %u.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, status );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
byteswritten= fwrite( file->zbuffer, 1, file->zbuffersize - file->z->avail_out, file->file );
|
||||
if( byteswritten < (file->zbuffersize - file->z->avail_out) ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"less bytes written than expected %u < %u.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, (uint32_t) byteswritten,
|
||||
(uint32_t) (file->zbuffersize - file->z->avail_out) );
|
||||
|
||||
}
|
||||
|
||||
/* test if avail_in really ran empty */
|
||||
if ( 0 < file->z->avail_in ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"error in compressing.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__ );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
rest -= len;
|
||||
|
||||
} else {
|
||||
|
||||
/* no, it is not */
|
||||
|
||||
/* only copy the new data into the ybuffer */
|
||||
memcpy( file->ybuffer + file->z->avail_in, ptr, rest );
|
||||
file->z->avail_in += rest;
|
||||
rest = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* step 2 */
|
||||
/* if theres more than 10k in the "*ptr" */
|
||||
while( (uint32_t) rest >= file->zbuffersize ) {
|
||||
|
||||
file->z->avail_in = file->zbuffersize;
|
||||
file->z->next_in = ( ( ( unsigned char* ) ptr ) + len );
|
||||
file->z->avail_out = file->zbuffersize;
|
||||
file->z->next_out = file->zbuffer;
|
||||
|
||||
rest -= file->zbuffersize;
|
||||
len += file->zbuffersize;
|
||||
|
||||
status = deflate( file->z, Z_FULL_FLUSH );
|
||||
if ( status != Z_OK ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"error in compressing, status %u.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, status );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
byteswritten= fwrite( file->zbuffer, 1, file->zbuffersize - file->z->avail_out,
|
||||
file->file );
|
||||
if( byteswritten < (file->zbuffersize - file->z->avail_out) ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"less bytes written than expected %u < %u.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, (uint32_t) byteswritten,
|
||||
(uint32_t) (file->zbuffersize - file->z->avail_out) );
|
||||
|
||||
}
|
||||
|
||||
/* test if avail_in really ran empty */
|
||||
if ( 0 < file->z->avail_in ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"error in compressing.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__ );
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* step 3 */
|
||||
/* is there less than 10k data left ... throw it into the ybuffer */
|
||||
if ( rest > 0 ) {
|
||||
|
||||
memcpy( file->ybuffer, ( ( unsigned char* ) ptr ) + len, rest );
|
||||
file->z->avail_in = rest;
|
||||
}
|
||||
|
||||
return size;
|
||||
|
||||
/* compress the data without using the ybuffer */
|
||||
file->z->avail_in = size;
|
||||
file->z->next_in = (void*)ptr;
|
||||
|
||||
while (file->z->avail_in > 0)
|
||||
{
|
||||
status = deflate(file->z, Z_FULL_FLUSH);
|
||||
if (status == Z_STREAM_ERROR)
|
||||
{
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"error in compressing, status %i.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, status );
|
||||
return byteswritten;
|
||||
}
|
||||
|
||||
while (file->z->avail_out == 0)
|
||||
{
|
||||
size_t towrite = file->zbuffersize - file->z->avail_out;
|
||||
if (towrite != fwrite(file->zbuffer, 1, towrite, file->file))
|
||||
{
|
||||
OTF_fprintf(stderr, "ERROR in function %s, file: %s, line %i:\n",
|
||||
"Failed to write %u bytes to file!\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, towrite);
|
||||
return byteswritten;
|
||||
}
|
||||
file->z->avail_out = file->zbuffersize;
|
||||
file->z->next_out = file->zbuffer;
|
||||
status = deflate(file->z, Z_FULL_FLUSH);
|
||||
if (status == Z_STREAM_ERROR)
|
||||
{
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"error in compressing, status %i.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, status );
|
||||
assert(status != Z_STREAM_ERROR);
|
||||
return byteswritten;
|
||||
}
|
||||
}
|
||||
byteswritten = size - file->z->avail_in;
|
||||
}
|
||||
} else {
|
||||
|
||||
#endif /* HAVE_ZLIB */
|
||||
@ -444,11 +359,11 @@ size_t OTF_File_write( OTF_File* file, const void* ptr, size_t size ) {
|
||||
|
||||
}
|
||||
|
||||
return byteswritten;
|
||||
|
||||
#ifdef HAVE_ZLIB
|
||||
}
|
||||
#endif /* HAVE_ZLIB */
|
||||
return byteswritten;
|
||||
|
||||
}
|
||||
|
||||
@ -605,8 +520,7 @@ int OTF_File_seek( OTF_File* file, uint64_t pos ) {
|
||||
file->z->total_in= 0;
|
||||
|
||||
/* re-initialize z object */
|
||||
inflateEnd( file->z );
|
||||
inflateInit( file->z );
|
||||
inflateReset(file->z);
|
||||
|
||||
/* do not sync at very beginning of compressed stream because it
|
||||
would skip the first block */
|
||||
@ -627,12 +541,15 @@ int OTF_File_seek( OTF_File* file, uint64_t pos ) {
|
||||
}
|
||||
|
||||
if ( Z_DATA_ERROR == sync ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
|
||||
/* do not break here, this might happen with larger zlib chunks */
|
||||
/*OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"Z_DATA_ERROR.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__ );
|
||||
|
||||
return -1;
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( Z_STREAM_ERROR == sync ) {
|
||||
@ -728,57 +645,51 @@ int OTF_File_close( OTF_File* file ) {
|
||||
|
||||
if ( NULL != file->z ) {
|
||||
|
||||
if ( OTF_FILEMODE_WRITE != file->mode ) {
|
||||
if ( OTF_FILEMODE_WRITE != file->mode ) {
|
||||
|
||||
inflateEnd( file->z );
|
||||
|
||||
} else {
|
||||
|
||||
if ( file->z->avail_in > 0 ) {
|
||||
|
||||
file->z->next_in = file->ybuffer;
|
||||
file->z->next_out = file->zbuffer;
|
||||
file->z->avail_out = file->zbuffersize;
|
||||
|
||||
status = deflate( file->z, Z_FULL_FLUSH );
|
||||
if ( status != Z_OK ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"error in uncompressing, status %u.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, status );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if( 0 == OTF_File_revive( file, OTF_FILEMODE_WRITE ) ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
size_t towrite;
|
||||
/* flush buffer */
|
||||
if( 0 == OTF_File_revive( file, OTF_FILEMODE_WRITE ) ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"OTF_File_revive() failed.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__ );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
byteswritten= fwrite( file->zbuffer, 1, file->zbuffersize -
|
||||
file->z->avail_out, file->file );
|
||||
if( byteswritten < (file->zbuffersize - file->z->avail_out) ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"less bytes written than expected %u < %u.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, (uint32_t) byteswritten,
|
||||
(uint32_t) (file->zbuffersize - file->z->avail_out) );
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
deflateEnd( file->z );
|
||||
return 0;
|
||||
}
|
||||
status = deflate(file->z, Z_FULL_FLUSH);
|
||||
assert(status != Z_STREAM_ERROR);
|
||||
towrite = file->zbuffersize - file->z->avail_out;
|
||||
byteswritten = 0;
|
||||
if (towrite > 0)
|
||||
byteswritten = fwrite(file->zbuffer, 1, towrite, file->file);
|
||||
if (towrite != byteswritten)
|
||||
{
|
||||
OTF_fprintf(stderr, "ERROR in function %s, file: %s, line: %i:\n"
|
||||
"Failed to write compressed buffer of size %lu\n",
|
||||
__FUNCTION__, __FILE__, __LINE__, towrite);
|
||||
}
|
||||
while (file->z->avail_out != file->zbuffersize)
|
||||
{
|
||||
file->z->avail_out = file->zbuffersize;
|
||||
file->z->next_out = file->zbuffer;
|
||||
deflate(file->z, Z_FULL_FLUSH);
|
||||
assert(status != Z_STREAM_ERROR);
|
||||
towrite = file->zbuffersize - file->z->avail_out;
|
||||
if (towrite > 0)
|
||||
fwrite(file->zbuffer, 1, towrite, file->file);
|
||||
}
|
||||
deflateEnd( file->z );
|
||||
}
|
||||
|
||||
free( file->z );
|
||||
|
||||
free( file->ybuffer );
|
||||
file->z = NULL;
|
||||
|
||||
free( file->zbuffer );
|
||||
file->zbuffer = NULL;
|
||||
}
|
||||
|
||||
#endif /* HAVE_ZLIB */
|
||||
@ -1153,7 +1064,7 @@ void OTF_File_setZBufferSize( OTF_File* file, uint32_t size ) {
|
||||
#ifdef HAVE_ZLIB
|
||||
|
||||
if( NULL != file->z ) {
|
||||
|
||||
void *tmp;
|
||||
if ( 32 > size ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
@ -1175,21 +1086,24 @@ void OTF_File_setZBufferSize( OTF_File* file, uint32_t size ) {
|
||||
__FUNCTION__, __FILE__, __LINE__, size );
|
||||
|
||||
}
|
||||
|
||||
file->zbuffersize= size;
|
||||
|
||||
if( NULL != file->zbuffer ) {
|
||||
|
||||
/* use realloc instead of free()/malloc() */
|
||||
/*if( NULL != file->zbuffer ) {
|
||||
free( file->zbuffer );
|
||||
}
|
||||
file->zbuffer= malloc( size );
|
||||
assert( file->zbuffer );
|
||||
}*/
|
||||
tmp = realloc( file->zbuffer, size );
|
||||
if (tmp == NULL)
|
||||
{
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"No memory left to reallocate zlib buffer.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__ );
|
||||
return;
|
||||
}
|
||||
file->zbuffer = tmp;
|
||||
file->zbuffersize= size;
|
||||
file->z->avail_out = size;
|
||||
file->z->next_out = file->z->next_in = file->zbuffer;
|
||||
|
||||
if( NULL != file->ybuffer ) {
|
||||
free( file->ybuffer );
|
||||
}
|
||||
file->ybuffer= malloc( size );
|
||||
assert( file->ybuffer );
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1303,17 +1217,15 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager,
|
||||
inflateInit( ret->z );
|
||||
|
||||
ret->zbuffer= malloc( ret->zbuffersize );
|
||||
ret->ybuffer= malloc( ret->zbuffersize );
|
||||
if( NULL == ret->zbuffer || NULL == ret->ybuffer) {
|
||||
|
||||
if( NULL == ret->zbuffer ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"no memory left.\n",
|
||||
__FUNCTION__, __FILE__, __LINE__ );
|
||||
|
||||
free( ret->zbuffer );
|
||||
free( ret->ybuffer );
|
||||
ret->zbuffer= NULL;
|
||||
ret->ybuffer= NULL;
|
||||
|
||||
free( ret->z );
|
||||
ret->z= NULL;
|
||||
@ -1384,8 +1296,7 @@ OTF_File* OTF_File_open_zlevel( const char* filename, OTF_FileManager* manager,
|
||||
deflateInit( ret->z, zlevel );
|
||||
|
||||
ret->zbuffer= malloc( ret->zbuffersize );
|
||||
ret->ybuffer= malloc( ret->zbuffersize );
|
||||
if( NULL == ret->zbuffer || NULL == ret->ybuffer ) {
|
||||
if( NULL == ret->zbuffer ) {
|
||||
|
||||
OTF_fprintf( stderr, "ERROR in function %s, file: %s, line: %i:\n "
|
||||
"no memory left.\n",
|
||||
|
@ -46,27 +46,28 @@ static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
|
||||
assert( progress.recv_statuses );
|
||||
progress.recv_indices= new int[alldata.numRanks-1];
|
||||
assert( progress.recv_indices );
|
||||
progress.rank_cur_bytes= new uint64_t[alldata.numRanks-1];
|
||||
assert( progress.rank_cur_bytes );
|
||||
|
||||
/* initialize array of current bytes read and start
|
||||
persistent communication */
|
||||
|
||||
for ( uint32_t i= 0; i < alldata.numRanks; i++ ) {
|
||||
for ( uint32_t i= 0; i < alldata.numRanks -1; i++ ) {
|
||||
|
||||
if ( 0 < i ) {
|
||||
progress.rank_cur_bytes[i]= 0;
|
||||
|
||||
/* create persistent request handle */
|
||||
MPI_Recv_init( &(progress.recv_buffers[i-1]), 1,
|
||||
MPI_LONG_LONG_INT, i, Progress::MSG_TAG,
|
||||
MPI_COMM_WORLD,
|
||||
&(progress.recv_requests[i-1]) );
|
||||
/* create persistent request handle */
|
||||
MPI_Recv_init( &(progress.recv_buffers[i]), 1,
|
||||
MPI_LONG_LONG_INT, i+1, Progress::MSG_TAG,
|
||||
MPI_COMM_WORLD,
|
||||
&(progress.recv_requests[i]) );
|
||||
|
||||
/* start persistent communication */
|
||||
MPI_Start( &(progress.recv_requests[i-1]) );
|
||||
/* start persistent communication */
|
||||
MPI_Start( &(progress.recv_requests[i]) );
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
} else { /* 0 != my_rank */
|
||||
} else { /* 0 != alldata.myRank */
|
||||
|
||||
/* initialize request handle for sending progress to rank 0 */
|
||||
progress.send_request = MPI_REQUEST_NULL;
|
||||
@ -76,6 +77,7 @@ static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
|
||||
/* block until all worker ranks have reached this point to avoid that the
|
||||
progress does a big jump at beginning */
|
||||
MPI_Barrier( MPI_COMM_WORLD );
|
||||
|
||||
}
|
||||
|
||||
if ( 0 == alldata.myRank ) {
|
||||
@ -93,14 +95,9 @@ static void update_progress( AllData& alldata, uint64_t delta_bytes,
|
||||
|
||||
Progress& progress= alldata.progress;
|
||||
|
||||
if ( 0 == alldata.myRank ) {
|
||||
progress.cur_bytes += delta_bytes;
|
||||
|
||||
progress.cur_bytes += delta_bytes;
|
||||
|
||||
} else {
|
||||
|
||||
progress.cur_bytes= delta_bytes;
|
||||
}
|
||||
uint64_t sum_cur_bytes= progress.cur_bytes;
|
||||
|
||||
if ( 1 < alldata.numRanks ) {
|
||||
|
||||
@ -109,6 +106,7 @@ static void update_progress( AllData& alldata, uint64_t delta_bytes,
|
||||
/* get current bytes read from all worker ranks */
|
||||
|
||||
int out_count;
|
||||
uint32_t i;
|
||||
|
||||
/* either wait or test for one or more updates from worker ranks */
|
||||
|
||||
@ -129,32 +127,37 @@ static void update_progress( AllData& alldata, uint64_t delta_bytes,
|
||||
|
||||
if ( MPI_UNDEFINED != out_count ) {
|
||||
|
||||
int index;
|
||||
uint32_t i;
|
||||
|
||||
for ( i= 0; i < (uint32_t) out_count; i++ ) {
|
||||
|
||||
index= progress.recv_indices[i];
|
||||
int index= progress.recv_indices[i];
|
||||
|
||||
/* worker rank (index+1) is finished? */
|
||||
if ( (uint64_t)-1 != progress.recv_buffers[index] ) {
|
||||
if ( (uint64_t)-1 == progress.recv_buffers[index] ) {
|
||||
|
||||
/* this rank is finished */
|
||||
progress.ranks_left--;
|
||||
|
||||
} else {
|
||||
|
||||
/* update rank's current bytes read and restart
|
||||
persistent communication */
|
||||
|
||||
progress.cur_bytes += progress.recv_buffers[index];
|
||||
progress.rank_cur_bytes[index]= progress.recv_buffers[index];
|
||||
|
||||
MPI_Start( &(progress.recv_requests[progress.recv_indices[i]]) );
|
||||
|
||||
} else {
|
||||
|
||||
/* this rank is finished */
|
||||
progress.ranks_left -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
} else { /* 0 != my_rank */
|
||||
/* recompute sum of current bytes read */
|
||||
for( i= 0; i < alldata.numRanks -1; i++ ) {
|
||||
|
||||
sum_cur_bytes += progress.rank_cur_bytes[i];
|
||||
}
|
||||
|
||||
} else { /* 0 != alldata.myRank */
|
||||
|
||||
int do_send = 1;
|
||||
MPI_Status status;
|
||||
@ -184,7 +187,7 @@ static void update_progress( AllData& alldata, uint64_t delta_bytes,
|
||||
/* show progress */
|
||||
|
||||
double percent =
|
||||
100.0 * (double) progress.cur_bytes / (double) progress.max_bytes;
|
||||
100.0 * (double) sum_cur_bytes / (double) progress.max_bytes;
|
||||
|
||||
static const char signs[2]= { '.',' ' };
|
||||
static int signi= 0;
|
||||
@ -214,7 +217,7 @@ static void finish_progress( AllData& alldata ) {
|
||||
update_progress( alldata, 0, true );
|
||||
}
|
||||
|
||||
} else { /* 0 != my_rank */
|
||||
} else { /* 0 != alldata.myRank */
|
||||
|
||||
MPI_Status status;
|
||||
MPI_Wait( &(progress.send_request), &status );
|
||||
@ -252,6 +255,7 @@ static void finish_progress( AllData& alldata ) {
|
||||
delete [] progress.recv_requests;
|
||||
delete [] progress.recv_statuses;
|
||||
delete [] progress.recv_indices;
|
||||
delete [] progress.rank_cur_bytes;
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -71,6 +71,7 @@ struct Progress {
|
||||
MPI_Status* recv_statuses; /* receive statuses */
|
||||
int* recv_indices; /* indices of completed recv. operations */
|
||||
|
||||
uint64_t* rank_cur_bytes; /* current bytes read per rank (except rank 0) */
|
||||
uint32_t ranks_left; /* root keeps track of ranks left to query */
|
||||
};
|
||||
|
||||
|
@ -592,7 +592,7 @@ bool ReduceData( AllData& alldata ) {
|
||||
|
||||
// DEBUG
|
||||
//cout << " round " << round << " recv " << peer << "--> " <<
|
||||
//my_rank << " with " <<
|
||||
//alldata.myRank << " with " <<
|
||||
//sizes[0] << " bytes, " <<
|
||||
//sizes[1] << ", " <<
|
||||
//sizes[2] << ", " <<
|
||||
@ -615,7 +615,7 @@ bool ReduceData( AllData& alldata ) {
|
||||
buffer= pack_worker_data( alldata, sizes );
|
||||
|
||||
// DEBUG
|
||||
//cout << " round " << round << " send " << my_rank <<
|
||||
//cout << " round " << round << " send " << alldata.myRank <<
|
||||
//" --> " << peer << " with " <<
|
||||
//sizes[0] << " bytes, " <<
|
||||
//sizes[1] << ", " <<
|
||||
|
@ -34,18 +34,18 @@ __VT_EXTERN_DECL void VT_User_end_id__(unsigned int rid);
|
||||
~VT_Tracer() __VT_NOINST_ATTR;
|
||||
const char* n;
|
||||
};
|
||||
template<> VT_Tracer<sizeof(int)>::VT_Tracer(const char* r, const char* f,
|
||||
template<> inline VT_Tracer<sizeof(int)>::VT_Tracer(const char* r, const char* f,
|
||||
int l) : n(r) { VT_User_start__(n, f, l); }
|
||||
template<> VT_Tracer<sizeof(int)>::~VT_Tracer() { VT_User_end__(n); }
|
||||
template<> inline VT_Tracer<sizeof(int)>::~VT_Tracer() { VT_User_end__(n); }
|
||||
|
||||
template<> struct VT_Tracer<1> {
|
||||
VT_Tracer(unsigned int r, const char* f = 0, int l = 0) __VT_NOINST_ATTR;
|
||||
~VT_Tracer() __VT_NOINST_ATTR;
|
||||
unsigned int i;
|
||||
};
|
||||
VT_Tracer<1>::VT_Tracer(unsigned int r, const char* f, int l)
|
||||
inline VT_Tracer<1>::VT_Tracer(unsigned int r, const char* f, int l)
|
||||
: i(r) { VT_User_start_id__(i); }
|
||||
VT_Tracer<1>::~VT_Tracer() { VT_User_end_id__(i); }
|
||||
inline VT_Tracer<1>::~VT_Tracer() { VT_User_end_id__(i); }
|
||||
#endif /* __cplusplus */
|
||||
|
||||
#if (defined(VTRACE)) && !(defined(VTRACE_NO_REGION))
|
||||
|
@ -1,3 +1,6 @@
|
||||
19. Revised 18.
|
||||
to get back support for comma-separated OpenMP clauses.
|
||||
|
||||
18. Fixed processing of Fortran line-continuation where
|
||||
commas at end of line (e.g. within OpenMP clauses) was removed
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
19. Revised 18.
|
||||
to get back support for comma-separated OpenMP clauses.
|
||||
|
||||
18. Fixed processing of Fortran line-continuation where
|
||||
commas at end of line (e.g. within OpenMP clauses) was removed
|
||||
|
||||
|
@ -130,6 +130,10 @@ void OMPragmaF::remove_empties() {
|
||||
if ( c != string::npos ) --c;
|
||||
string::size_type amp = lines[lastline].find_last_not_of(" \t", c);
|
||||
if ( lines[lastline][amp] == '&' ) lines[lastline][amp] = ' ';
|
||||
|
||||
// remove trailing comma
|
||||
amp = lines[lastline].find_last_not_of(" \t", c);
|
||||
if ( lines[lastline][amp] == ',' ) lines[lastline][amp] = ' ';
|
||||
}
|
||||
|
||||
OMPragma* OMPragmaF::split_combined() {
|
||||
|
@ -43,12 +43,11 @@ FilterCommonC::~FilterCommonC()
|
||||
// protected methods
|
||||
//
|
||||
|
||||
bool
|
||||
void
|
||||
FilterCommonC::prepareProgress( const uint64_t& maxBytes )
|
||||
{
|
||||
bool error = false;
|
||||
|
||||
uint64_t max_bytes = maxBytes;
|
||||
m_progress.curBytes = 0;
|
||||
m_progress.maxBytes = maxBytes;
|
||||
|
||||
#ifdef VT_MPI
|
||||
if( m_numWorkerRanks > 1 )
|
||||
@ -56,29 +55,16 @@ FilterCommonC::prepareProgress( const uint64_t& maxBytes )
|
||||
// reduce max. bytes to rank 0
|
||||
//
|
||||
uint64_t sum_max_bytes;
|
||||
MPI_Reduce( &max_bytes, &sum_max_bytes, 1, MPI_LONG_LONG_INT, MPI_SUM, 0,
|
||||
m_workerComm );
|
||||
MASTER max_bytes = sum_max_bytes;
|
||||
}
|
||||
#endif // VT_MPI
|
||||
MPI_Reduce( &(m_progress.maxBytes), &sum_max_bytes, 1, MPI_LONG_LONG_INT,
|
||||
MPI_SUM, 0, m_workerComm );
|
||||
|
||||
// initalize variables of data structure ProgressS
|
||||
//
|
||||
|
||||
m_progress.curBytes = 0;
|
||||
m_progress.maxBytes = max_bytes;
|
||||
|
||||
#ifdef VT_MPI
|
||||
// initialize MPI related variables of data structure ProgressS
|
||||
//
|
||||
if( m_numWorkerRanks > 1 )
|
||||
{
|
||||
MASTER
|
||||
{
|
||||
m_progress.maxBytes = sum_max_bytes;
|
||||
m_progress.ranksLeft = m_numWorkerRanks - 1;
|
||||
|
||||
// allocate memory for some arrays
|
||||
//
|
||||
m_progress.rankCurBytes = new uint64_t[m_numWorkerRanks];
|
||||
assert( m_progress.rankCurBytes );
|
||||
m_progress.recvBuffers = new uint64_t[m_numWorkerRanks-1];
|
||||
assert( m_progress.recvBuffers );
|
||||
m_progress.recvRequests = new MPI_Request[m_numWorkerRanks-1];
|
||||
@ -87,24 +73,22 @@ FilterCommonC::prepareProgress( const uint64_t& maxBytes )
|
||||
assert( m_progress.recvStatuses );
|
||||
m_progress.recvIndices = new VT_MPI_INT[m_numWorkerRanks-1];
|
||||
assert( m_progress.recvIndices );
|
||||
m_progress.rankCurBytes = new uint64_t[m_numWorkerRanks-1];
|
||||
assert( m_progress.rankCurBytes );
|
||||
|
||||
// initialize arrays
|
||||
//
|
||||
for( int i = 0; i < m_numWorkerRanks; i++ )
|
||||
for( VT_MPI_INT i = 0; i < m_numWorkerRanks -1; i++ )
|
||||
{
|
||||
// initialize array of current bytes read
|
||||
m_progress.rankCurBytes[i] = 0;
|
||||
|
||||
if( i > 0 )
|
||||
{
|
||||
// create persistent request handle
|
||||
MPI_Recv_init( &(m_progress.recvBuffers[i-1]), 1, MPI_LONG_LONG_INT,
|
||||
i, m_progress.msgTag, m_workerComm,
|
||||
&(m_progress.recvRequests[i-1]) );
|
||||
// create persistent request handle
|
||||
MPI_Recv_init( &(m_progress.recvBuffers[i]), 1, MPI_LONG_LONG_INT,
|
||||
i+1, m_progress.msgTag, m_workerComm,
|
||||
&(m_progress.recvRequests[i]) );
|
||||
|
||||
// start persistent communication
|
||||
MPI_Start( &(m_progress.recvRequests[i-1]) );
|
||||
}
|
||||
// start persistent communication
|
||||
MPI_Start( &(m_progress.recvRequests[i]) );
|
||||
}
|
||||
}
|
||||
else // SLAVE
|
||||
@ -119,42 +103,46 @@ FilterCommonC::prepareProgress( const uint64_t& maxBytes )
|
||||
}
|
||||
#endif // VT_MPI
|
||||
|
||||
return !error;
|
||||
MASTER
|
||||
{
|
||||
// show initial progress
|
||||
printf( " %7.2f %%\r", 0.0 );
|
||||
fflush( stdout );
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
FilterCommonC::updateProgress( const uint64_t& bytes )
|
||||
void
|
||||
FilterCommonC::updateProgress( const uint64_t& deltaBytes, bool wait )
|
||||
{
|
||||
bool error = false;
|
||||
|
||||
#if defined(HAVE_OMP) && HAVE_OMP
|
||||
# pragma omp critical (progress)
|
||||
{
|
||||
#endif // HAVE_OMP
|
||||
|
||||
// add bytes to current bytes read
|
||||
m_progress.curBytes += bytes;
|
||||
m_progress.curBytes += deltaBytes;
|
||||
|
||||
uint64_t sum_cur_bytes = m_progress.curBytes;
|
||||
|
||||
#ifdef VT_MPI
|
||||
if( m_numWorkerRanks > 1 )
|
||||
{
|
||||
MASTER
|
||||
{
|
||||
// add bytes to current bytes read of rank 0
|
||||
m_progress.rankCurBytes[0] += bytes;
|
||||
// get current bytes read from all worker ranks
|
||||
//
|
||||
|
||||
VT_MPI_INT i;
|
||||
|
||||
#if defined(HAVE_OMP) && HAVE_OMP
|
||||
# pragma omp master
|
||||
{
|
||||
#endif // HAVE_OMP
|
||||
|
||||
// get current bytes read from all worker ranks
|
||||
//
|
||||
|
||||
VT_MPI_INT out_count;
|
||||
|
||||
// rank 0 is finished? (called from finishProgress())
|
||||
if( bytes == 0 )
|
||||
if( wait )
|
||||
{
|
||||
// yes, wait for one or more updates from worker ranks
|
||||
MPI_Waitsome( m_numWorkerRanks - 1, m_progress.recvRequests, &out_count,
|
||||
@ -169,34 +157,35 @@ FilterCommonC::updateProgress( const uint64_t& bytes )
|
||||
|
||||
if( out_count != MPI_UNDEFINED )
|
||||
{
|
||||
int index;
|
||||
int i;
|
||||
|
||||
for( i = 0; i < out_count; i++ )
|
||||
{
|
||||
index = m_progress.recvIndices[i];
|
||||
VT_MPI_INT index = m_progress.recvIndices[i];
|
||||
|
||||
// worker rank (index+1) is finished?
|
||||
if( m_progress.recvBuffers[index] != (uint64_t)-1 )
|
||||
if( m_progress.recvBuffers[index] == (uint64_t)-1 )
|
||||
{
|
||||
// yes, decrement counter of ranks left
|
||||
m_progress.ranksLeft--;
|
||||
}
|
||||
else
|
||||
{
|
||||
// no, update rank's current bytes read and restart persistent
|
||||
// communication
|
||||
//
|
||||
m_progress.rankCurBytes[index+1] = m_progress.recvBuffers[index];
|
||||
m_progress.rankCurBytes[index] = m_progress.recvBuffers[index];
|
||||
MPI_Start( &(m_progress.recvRequests[m_progress.recvIndices[i]]) );
|
||||
}
|
||||
}
|
||||
|
||||
// recompute sum of current bytes read
|
||||
//
|
||||
m_progress.curBytes = 0;
|
||||
for( i = 0; i < m_numWorkerRanks; i++ )
|
||||
m_progress.curBytes += m_progress.rankCurBytes[i];
|
||||
}
|
||||
|
||||
#if defined(HAVE_OMP) && HAVE_OMP
|
||||
} // omp master
|
||||
#endif // HAVE_OMP
|
||||
|
||||
// recompute sum of current bytes read
|
||||
//
|
||||
for( i = 0; i < m_numWorkerRanks-1; i++ )
|
||||
sum_cur_bytes += m_progress.rankCurBytes[i];
|
||||
}
|
||||
else // SLAVE
|
||||
{
|
||||
@ -236,24 +225,20 @@ FilterCommonC::updateProgress( const uint64_t& bytes )
|
||||
//
|
||||
|
||||
double progress =
|
||||
100.0 * (double)m_progress.curBytes / (double)m_progress.maxBytes;
|
||||
100.0 * (double)sum_cur_bytes / (double)m_progress.maxBytes;
|
||||
|
||||
printf( " %7.2f %%\r", progress );
|
||||
printf( " %7.2f %%\n", progress );
|
||||
fflush( stdout );
|
||||
}
|
||||
|
||||
#if defined(HAVE_OMP) && HAVE_OMP
|
||||
} // omp critical
|
||||
#endif // HAVE_OMP
|
||||
|
||||
return !error;
|
||||
}
|
||||
|
||||
bool
|
||||
void
|
||||
FilterCommonC::finishProgress()
|
||||
{
|
||||
bool error = false;
|
||||
|
||||
#ifdef VT_MPI
|
||||
if( m_numWorkerRanks > 1 )
|
||||
{
|
||||
@ -262,10 +247,8 @@ FilterCommonC::finishProgress()
|
||||
// update progress until all worker ranks are
|
||||
// finished / all bytes are read
|
||||
//
|
||||
while( !error && m_progress.curBytes < m_progress.maxBytes )
|
||||
{
|
||||
error = !updateProgress( 0 );
|
||||
}
|
||||
while( m_progress.ranksLeft > 0 )
|
||||
updateProgress( 0, true );
|
||||
}
|
||||
else // SLAVE
|
||||
{
|
||||
@ -306,16 +289,14 @@ FilterCommonC::finishProgress()
|
||||
|
||||
// free memory
|
||||
//
|
||||
delete [] m_progress.rankCurBytes;
|
||||
delete [] m_progress.recvBuffers;
|
||||
delete [] m_progress.recvRequests;
|
||||
delete [] m_progress.recvStatuses;
|
||||
delete [] m_progress.recvIndices;
|
||||
delete [] m_progress.rankCurBytes;
|
||||
}
|
||||
}
|
||||
#endif // VT_MPI
|
||||
|
||||
return !error;
|
||||
}
|
||||
|
||||
#ifdef VT_MPI
|
||||
|
@ -36,13 +36,13 @@ public:
|
||||
protected:
|
||||
|
||||
// prepare progress
|
||||
bool prepareProgress( const uint64_t& maxBytes );
|
||||
void prepareProgress( const uint64_t& maxBytes );
|
||||
|
||||
// update progress
|
||||
bool updateProgress( const uint64_t& bytes );
|
||||
void updateProgress( const uint64_t& deltaBytes, bool wait = false );
|
||||
|
||||
// finish progress
|
||||
bool finishProgress( void );
|
||||
void finishProgress( void );
|
||||
|
||||
#ifdef VT_MPI
|
||||
// get number and communicator of worker ranks
|
||||
@ -71,14 +71,15 @@ private:
|
||||
|
||||
static const VT_MPI_INT msgTag = 500; // message tag
|
||||
|
||||
uint64_t* rankCurBytes; // current bytes read per rank
|
||||
|
||||
MPI_Request sendRequest; // sender request handle
|
||||
|
||||
uint64_t* recvBuffers; // receive buffers
|
||||
MPI_Request* recvRequests; // persistent receive request handles
|
||||
MPI_Status* recvStatuses; // receive statuses
|
||||
VT_MPI_INT* recvIndices; // indices of completed receive operations
|
||||
|
||||
uint64_t* rankCurBytes; // current bytes read per rank (except rank 0)
|
||||
uint32_t ranksLeft; // root keeps track of ranks left to quary
|
||||
#endif // VT_MPI
|
||||
|
||||
};
|
||||
|
@ -952,8 +952,7 @@ FilterTraceC::processEventsAndStatistics()
|
||||
if( !getMaxBytesToRead( max_bytes ) )
|
||||
return false;
|
||||
|
||||
if( !prepareProgress( max_bytes ) )
|
||||
return false;
|
||||
prepareProgress( max_bytes );
|
||||
}
|
||||
|
||||
// put output stream ids into a vector, so we can iterate over it by an index
|
||||
@ -1204,7 +1203,7 @@ FilterTraceC::processEventsAndStatistics()
|
||||
// set record limit, if progress is enabled
|
||||
//
|
||||
if( Params.show_progress )
|
||||
OTF_Reader_setRecordLimit( reader, 100000 );
|
||||
OTF_Reader_setRecordLimit( reader, 1000000 );
|
||||
|
||||
// read/write events and statistics
|
||||
//
|
||||
@ -1299,8 +1298,8 @@ FilterTraceC::processEventsAndStatistics()
|
||||
|
||||
// finish progress, if enabled
|
||||
//
|
||||
if( !error && Params.show_progress )
|
||||
error = !finishProgress();
|
||||
if( Params.show_progress )
|
||||
finishProgress();
|
||||
|
||||
return !error;
|
||||
}
|
||||
|
@ -1,6 +1,4 @@
|
||||
<!DOCTYPE vtenv SYSTEM "/home/mk/workspace2/vtLaunch/src/vt-environmentals.dtd">
|
||||
|
||||
<vtenv vtenvversion="1.0" vtversion="5.10">
|
||||
<vtenv vtenvversion="1.0" vtversion="@PACKAGE_VERSION@">
|
||||
<group identifier="gen" descname="General Trace Settings" priority="100">
|
||||
<desc>General options for tracing and profiling.</desc>
|
||||
</group>
|
||||
@ -119,6 +117,23 @@
|
||||
<available>1</available>
|
||||
<anchor>VT_SETUP_VT_BUFFER_SIZE</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="250" descname="Buffer size per thread" isadvanced="1" group="genTset"
|
||||
envname="VT_THREAD_BUFFER_SIZE">
|
||||
<desc>Set the size of the internal event buffer for threads.</desc>
|
||||
<valuedesc>
|
||||
<value type="integer">
|
||||
<default>0</default>
|
||||
<range max="10000000000000000" min="1" ></range>
|
||||
</value>
|
||||
<value type="enum">
|
||||
<default>K</default>
|
||||
<enumid>sizeunitenum</enumid>
|
||||
</value>
|
||||
</valuedesc>
|
||||
<since major="5" minor="0" patch="0" ></since>
|
||||
<available>1</available>
|
||||
<anchor>VT_SETUP_VT_THREAD_BUFFER_SIZE</anchor>
|
||||
</env>
|
||||
<env isdeprecated="0" priority="300" descname="Application path" isadvanced="1" group="genFset"
|
||||
envname="VT_APPPATH">
|
||||
<desc>Path to the application executable.</desc>
|
||||
|
@ -5,7 +5,7 @@ compiler_flags_env=VT_NVCFLAGS
|
||||
compiler=@VT_WRAPPER_NVCC_COMPILER@
|
||||
compiler_flags=-Xcompiler="@PTHREAD_CFLAGS@" @VT_WRAPPER_NVCC_EXTRA_COMPILER_FLAGS@
|
||||
linker_flags=@VT_WRAPPER_NVCC_EXTRA_LINKER_FLAGS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @VT_WRAPPER_NVCC_EXTRA_LIBS@
|
||||
libs=@OTFLIBDIR@ @OTFLIB@ @PAPILIBDIR@ @PAPILIB@ @CPCLIBDIR@ @CPCLIB@ @DLLIBDIR@ @DLLIB@ @MATHLIB@ @CUPTILIBDIR@ @CUPTILIB@ @VT_WRAPPER_NVCC_EXTRA_LIBS@
|
||||
includedir=${includedir}
|
||||
libdir=${libdir}
|
||||
vtlib=@VT_WRAPPER_VTLIB@
|
||||
|
@ -35,7 +35,7 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
||||
|
||||
/*
|
||||
* Register the finalize function of the CUDA wrapper to be called before
|
||||
* the program exits and CUDA has done its implizit clean-up.
|
||||
* the program exits and CUDA has done its implicit clean-up.
|
||||
* A CUDA function (context creating???) has to be called before, as
|
||||
* VampirTrace CUDA wrapper has to finalize before CUDA does its clean-up!!!
|
||||
*/
|
||||
@ -219,9 +219,6 @@ VTThrdMutex* VTThrdMutexCudart = NULL;
|
||||
|
||||
#define checkCUDACall(ecode, msg) __checkCUDACall(ecode, msg, __FILE__,__LINE__)
|
||||
|
||||
/* minimum size of an asynchronous task (in bytes) */
|
||||
#define MIN_ASYNC_ENTRY sizeof(VTCUDAMemcpy)
|
||||
|
||||
/* library wrapper object */
|
||||
VTLibwrap* vt_cudart_lw = VT_LIBWRAP_NULL;
|
||||
|
||||
@ -346,6 +343,14 @@ typedef struct
|
||||
size_t byteCount; /**< number of bytes */
|
||||
}VTCUDAMemcpy;
|
||||
|
||||
/* kernel configure stack element */
|
||||
typedef struct
|
||||
{
|
||||
VTCUDAStrm *strm; /**< corresponding stream/thread */
|
||||
uint32_t blocksPerGrid; /**< number of blocks per grid */
|
||||
uint32_t threadsPerBlock; /**< number of threads per block */
|
||||
}VTCUDAknconf;
|
||||
|
||||
/* structure of a VampirTrace CUDA malloc (initiated with cudaMalloc*() */
|
||||
typedef struct vtcMallocStruct
|
||||
{
|
||||
@ -370,7 +375,7 @@ typedef struct vtcudaDev_st
|
||||
buffer_t asyncbuf; /**< points to the first byte in buffer */
|
||||
buffer_t buf_pos; /**< current buffer position */
|
||||
buffer_t buf_size; /**< buffer size (in bytes) */
|
||||
uint8_t kn_conf; /**< flag: kernel configured? */
|
||||
buffer_t conf_stack; /**< top of the kernel configure stack */
|
||||
VTCUDABufEvt *evtbuf; /**< the preallocated cuda event list */
|
||||
VTCUDABufEvt *evtbuf_pos; /**< current unused event space */
|
||||
struct vtcudaDev_st *next; /**< pointer to next element in list */
|
||||
@ -464,6 +469,7 @@ void vt_cudartwrap_init(void)
|
||||
|
||||
if(vt_cudart_trace_enabled){
|
||||
size_t minTaskSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAMemcpy);
|
||||
size_t minBufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf);
|
||||
|
||||
syncLevel = (uint8_t)vt_env_cudatrace_sync();
|
||||
trace_kernels = (uint8_t)vt_env_cudatrace_kernel();
|
||||
@ -481,13 +487,17 @@ void vt_cudartwrap_init(void)
|
||||
#endif
|
||||
|
||||
trace_events = 0;
|
||||
if(trace_kernels){
|
||||
minTaskSize = sizeof(VTCUDAKernel);
|
||||
|
||||
if(trace_memcpyAsync){
|
||||
minTaskSize = sizeof(VTCUDAMemcpy);
|
||||
minBufSize = sizeof(VTCUDAMemcpy);
|
||||
trace_events = 1;
|
||||
}
|
||||
|
||||
if(trace_memcpyAsync){
|
||||
if(sizeof(VTCUDAMemcpy) < minTaskSize) minTaskSize = sizeof(VTCUDAMemcpy);
|
||||
|
||||
if(trace_kernels){
|
||||
if(sizeof(VTCUDAKernel) < minTaskSize) minTaskSize = sizeof(VTCUDAKernel);
|
||||
if(sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf) > minBufSize)
|
||||
minBufSize = sizeof(VTCUDAKernel) + sizeof(VTCUDAknconf);
|
||||
trace_events = 1;
|
||||
}
|
||||
|
||||
@ -495,17 +505,17 @@ void vt_cudartwrap_init(void)
|
||||
if(trace_events){
|
||||
/* get user-defined task buffer size and check it */
|
||||
asyncBufSize = vt_env_cudatrace_bsize();
|
||||
if(asyncBufSize < MIN_ASYNC_ENTRY){
|
||||
if(asyncBufSize < minBufSize){
|
||||
if(asyncBufSize > 0){
|
||||
vt_warning("[CUDART] Minimal buffer size is %d bytes", MIN_ASYNC_ENTRY);
|
||||
vt_warning("[CUDART] Minimal buffer size is %d bytes", minBufSize);
|
||||
}
|
||||
asyncBufSize = VTGPU_DEFAULT_BSIZE;
|
||||
}else if(VTGPU_MAX_BSIZE < asyncBufSize){
|
||||
vt_warning("[CUDART] Current CUDA buffer size requires %d CUDA events.\n"
|
||||
"The recommended max. CUDA buffer size is %d. "
|
||||
"(export VT_CUDA_BUFFER_SIZE=2097152)",
|
||||
2*asyncBufSize/sizeof(VTCUDAKernel), VTGPU_MAX_BSIZE);
|
||||
/* TODO: dynamic event creation for more than 2097152 bytes cuda buffer size */
|
||||
2*asyncBufSize/minTaskSize, VTGPU_MAX_BSIZE);
|
||||
/* TODO: dynamic event creation for more than 2097152 bytes CUDA buffer size */
|
||||
}
|
||||
|
||||
/* determine maximum necessary VT-events (=2 CUDA events) */
|
||||
@ -1143,7 +1153,7 @@ static VTCUDADevice* VTCUDAcreateDevice(uint32_t ptid, int device)
|
||||
vtDev->asyncbuf = NULL;
|
||||
vtDev->buf_pos = NULL;
|
||||
vtDev->buf_size = NULL;
|
||||
vtDev->kn_conf = 0;
|
||||
vtDev->conf_stack = NULL;
|
||||
vtDev->evtbuf = NULL;
|
||||
vtDev->evtbuf_pos = NULL;
|
||||
vtDev->strmList = NULL;
|
||||
@ -1183,6 +1193,7 @@ static VTCUDADevice* VTCUDAcreateDevice(uint32_t ptid, int device)
|
||||
}
|
||||
vtDev->buf_pos = vtDev->asyncbuf;
|
||||
vtDev->buf_size = vtDev->asyncbuf + asyncBufSize;
|
||||
vtDev->conf_stack = vtDev->buf_size;
|
||||
|
||||
vtDev->evtbuf = (VTCUDABufEvt*)malloc(maxEvtNum*sizeof(VTCUDABufEvt));
|
||||
if(vtDev->evtbuf == NULL)
|
||||
@ -1205,6 +1216,7 @@ static VTCUDADevice* VTCUDAcreateDevice(uint32_t ptid, int device)
|
||||
vtDev->asyncbuf = malloc(asyncBufSize);
|
||||
vtDev->buf_pos = vtDev->asyncbuf;
|
||||
vtDev->buf_size = vtDev->asyncbuf + asyncBufSize;
|
||||
vtDev->conf_stack = vtDev->buf_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1382,11 +1394,12 @@ static VTCUDAMemcpy* addMemcpy2Buf(enum cudaMemcpyKind kind, int count,
|
||||
ptid = VT_MY_THREAD;
|
||||
vtDev = VTCUDAcheckThread(stream, ptid, &ptrStrm);
|
||||
|
||||
if(vtDev->kn_conf) return NULL;
|
||||
|
||||
/* check if there is enough buffer space */
|
||||
if(vtDev->buf_pos + sizeof(VTCUDAMemcpy) > vtDev->buf_size){
|
||||
if(vtDev->buf_pos + sizeof(VTCUDAMemcpy) > vtDev->conf_stack){
|
||||
VTCUDAflush(vtDev, ptid);
|
||||
if(vtDev->buf_pos + sizeof(VTCUDAMemcpy) > vtDev->conf_stack){
|
||||
vt_error_msg("[CUDART] Not enough buffer space for asynchronous memory copy!");
|
||||
}
|
||||
}
|
||||
|
||||
/* get and increase entry buffer position */
|
||||
@ -2267,22 +2280,24 @@ cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cu
|
||||
if(vt_is_trace_on(ptid)){
|
||||
vtDev = VTCUDAcheckThread(stream, ptid, &ptrStrm);
|
||||
|
||||
/* avoid configure calls one after another without cudaLaunch */
|
||||
if(vtDev->kn_conf) return ret;
|
||||
vtDev->kn_conf = 1;
|
||||
|
||||
/* get kernel configure position */
|
||||
vtDev->conf_stack = vtDev->conf_stack - sizeof(VTCUDAknconf);
|
||||
|
||||
/* check if there is enough buffer space */
|
||||
if(vtDev->buf_pos + sizeof(VTCUDAKernel) > vtDev->buf_size){
|
||||
if(vtDev->buf_pos + sizeof(VTCUDAKernel) > vtDev->conf_stack){
|
||||
VTCUDAflush(vtDev, ptid);
|
||||
if(vtDev->buf_pos + sizeof(VTCUDAKernel) > vtDev->conf_stack){
|
||||
vt_error_msg("[CUDART] Not enough buffer space for this kernel!");
|
||||
}
|
||||
}
|
||||
|
||||
/* set already available values of kernel */
|
||||
/* add kernel configure to stack */
|
||||
{
|
||||
VTCUDAKernel* vtKernel = (VTCUDAKernel*) vtDev->buf_pos;
|
||||
VTCUDAknconf *vtKnconf = (VTCUDAknconf*) vtDev->conf_stack;
|
||||
|
||||
vtKernel->strm = ptrStrm;
|
||||
vtKernel->blocksPerGrid = gridDim.x * gridDim.y * gridDim.z;
|
||||
vtKernel->threadsPerBlock = blockDim.x * blockDim.y * blockDim.z;
|
||||
vtKnconf->strm = ptrStrm;
|
||||
vtKnconf->blocksPerGrid = gridDim.x * gridDim.y * gridDim.z;
|
||||
vtKnconf->threadsPerBlock = blockDim.x * blockDim.y * blockDim.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2327,17 +2342,27 @@ cudaError_t cudaLaunch(const char *entry)
|
||||
/* check if the kernel will be traced on the correct thread */
|
||||
vtDev = VTCUDAgetDevice(ptid);
|
||||
|
||||
/* check if this kernel has been configured */
|
||||
if(vtDev->kn_conf == 0){
|
||||
/* check the kernel configure stack for last configured kernel */
|
||||
if(vtDev->conf_stack == vtDev->buf_size){
|
||||
ret = VT_LIBWRAP_FUNC_CALL(vt_cudart_lw, (entry));
|
||||
vt_warning("[CUDART] No stacked configure call before launch of kernel "
|
||||
vt_warning("[CUDART] [CUDART] No kernel configure call found for "
|
||||
"'%s' (device %d, ptid %d)", e->name, vtDev->device, ptid);
|
||||
return ret;
|
||||
}
|
||||
vtDev->kn_conf = 0;
|
||||
|
||||
/* get the kernel, which has been partly filled in configure call */
|
||||
kernel = (VTCUDAKernel*)vtDev->buf_pos;
|
||||
|
||||
/* set configure information */
|
||||
{
|
||||
VTCUDAknconf *vtKnconf = (VTCUDAknconf*) vtDev->conf_stack;
|
||||
|
||||
kernel->blocksPerGrid = vtKnconf->blocksPerGrid;
|
||||
kernel->threadsPerBlock = vtKnconf->threadsPerBlock;
|
||||
kernel->strm = vtKnconf->strm;
|
||||
|
||||
vtDev->conf_stack = vtDev->conf_stack + sizeof(VTCUDAknconf);
|
||||
}
|
||||
|
||||
vt_cntl_msg(3, "[CUDART] Launch '%s' (device %d, tid %d, rid %d, strm %d)",
|
||||
e->name, vtDev->device, vtDev->ptid,
|
||||
|
@ -133,22 +133,22 @@ static size_t parse_size(char *str) {
|
||||
case '7':
|
||||
case '8':
|
||||
case '9':
|
||||
multiply = 1;
|
||||
break;
|
||||
multiply = 1;
|
||||
break;
|
||||
case 'K':
|
||||
case 'k':
|
||||
multiply = 1024;
|
||||
break;
|
||||
multiply = 1024;
|
||||
break;
|
||||
case 'M':
|
||||
case 'm':
|
||||
multiply = 1024*1024;
|
||||
break;
|
||||
multiply = 1024*1024;
|
||||
break;
|
||||
case 'G':
|
||||
case 'g':
|
||||
multiply = 1024*1024*1024;
|
||||
break;
|
||||
multiply = 1024*1024*1024;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
size = atoll(str) * multiply;
|
||||
@ -169,12 +169,12 @@ char* vt_env_apppath()
|
||||
tmp = getenv("VT_APPPATH");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
apppath = replace_vars(tmp);
|
||||
}
|
||||
apppath = replace_vars(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
apppath = vt_pform_exec();
|
||||
}
|
||||
apppath = vt_pform_exec();
|
||||
}
|
||||
}
|
||||
return apppath;
|
||||
}
|
||||
@ -274,7 +274,7 @@ char* vt_env_gdir()
|
||||
else
|
||||
{
|
||||
gdir = replace_vars(vt_pform_gdir());
|
||||
}
|
||||
}
|
||||
}
|
||||
return gdir;
|
||||
}
|
||||
@ -294,7 +294,7 @@ char* vt_env_ldir()
|
||||
else
|
||||
{
|
||||
ldir = replace_vars(vt_pform_ldir());
|
||||
}
|
||||
}
|
||||
}
|
||||
return ldir;
|
||||
}
|
||||
@ -353,21 +353,21 @@ char* vt_env_fprefix()
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp = vt_env_apppath();
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
fprefix = strip_dir(tmp);
|
||||
if (strlen(fprefix) >= 4 &&
|
||||
(strcmp(fprefix+(strlen(fprefix)-4), ".out") == 0 ||
|
||||
strcmp(fprefix+(strlen(fprefix)-4), ".exe") == 0))
|
||||
{
|
||||
fprefix[strlen(fprefix)-4] = '\0';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprefix = "a";
|
||||
}
|
||||
tmp = vt_env_apppath();
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
fprefix = strip_dir(tmp);
|
||||
if (strlen(fprefix) >= 4 &&
|
||||
(strcmp(fprefix+(strlen(fprefix)-4), ".out") == 0 ||
|
||||
strcmp(fprefix+(strlen(fprefix)-4), ".exe") == 0))
|
||||
{
|
||||
fprefix[strlen(fprefix)-4] = '\0';
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprefix = "a";
|
||||
}
|
||||
}
|
||||
}
|
||||
return fprefix;
|
||||
@ -383,58 +383,93 @@ int vt_env_funique()
|
||||
tmp = getenv("VT_FILE_UNIQUE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
strncpy(tmpbuf, tmp, 128);
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
strncpy(tmpbuf, tmp, 128);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
|
||||
if (strcmp(tmpbuf, "yes") == 0 ||
|
||||
strcmp(tmpbuf, "true") == 0 ||
|
||||
strcmp(tmpbuf, "auto") == 0)
|
||||
{
|
||||
funique = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
funique = atoi(tmp);
|
||||
if (funique == 0) funique = -1;
|
||||
else if (funique < 0)
|
||||
vt_error_msg("VT_FILE_UNIQUE not properly set");
|
||||
}
|
||||
}
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
|
||||
if (strcmp(tmpbuf, "yes") == 0 ||
|
||||
strcmp(tmpbuf, "true") == 0 ||
|
||||
strcmp(tmpbuf, "auto") == 0)
|
||||
{
|
||||
funique = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
funique = atoi(tmp);
|
||||
if (funique == 0) funique = -1;
|
||||
else if (funique < 0)
|
||||
vt_error_msg("VT_FILE_UNIQUE not properly set");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
funique = -1;
|
||||
}
|
||||
funique = -1;
|
||||
}
|
||||
}
|
||||
return funique;
|
||||
}
|
||||
|
||||
size_t vt_env_bsize()
|
||||
{
|
||||
static size_t buffer_size = 0;
|
||||
char* tmp;
|
||||
static size_t buffer_size = 0;
|
||||
char* tmp;
|
||||
|
||||
if (buffer_size == 0)
|
||||
{
|
||||
tmp = getenv("VT_BUFFER_SIZE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
buffer_size = parse_size(tmp);
|
||||
if (buffer_size <= 0)
|
||||
{
|
||||
vt_error_msg("VT_BUFFER_SIZE not properly set");
|
||||
}
|
||||
else if (buffer_size < VT_MIN_BUFSIZE)
|
||||
{
|
||||
vt_warning("VT_BUFFER_SIZE=%d resized to %d bytes",
|
||||
buffer_size, VT_MIN_BUFSIZE);
|
||||
buffer_size = VT_MIN_BUFSIZE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer_size = VT_DEFAULT_BUFSIZE;
|
||||
}
|
||||
}
|
||||
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
size_t vt_env_thread_bsize()
|
||||
{
|
||||
static size_t buffer_size = 0;
|
||||
char* tmp;
|
||||
|
||||
if (buffer_size == 0)
|
||||
{
|
||||
tmp = getenv("VT_THREAD_BUFFER_SIZE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
buffer_size = parse_size(tmp);
|
||||
if (buffer_size <= 0)
|
||||
{
|
||||
vt_error_msg("VT_BUFFER_SIZE not properly set");
|
||||
}
|
||||
else if (buffer_size < VT_MIN_BUFSIZE)
|
||||
{
|
||||
vt_warning("VT_BUFFER_SIZE=%d resized to %d bytes",
|
||||
buffer_size, VT_MIN_BUFSIZE);
|
||||
buffer_size = VT_MIN_BUFSIZE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer_size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer_size == 0)
|
||||
{
|
||||
tmp = getenv("VT_BUFFER_SIZE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
buffer_size = parse_size(tmp);
|
||||
if (buffer_size <= 0)
|
||||
vt_error_msg("VT_BUFFER_SIZE not properly set");
|
||||
else if (buffer_size < VT_MIN_BUFSIZE) {
|
||||
vt_warning("VT_BUFFER_SIZE=%d resized to %d bytes",
|
||||
buffer_size, VT_MIN_BUFSIZE);
|
||||
buffer_size = VT_MIN_BUFSIZE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer_size = VT_DEFAULT_BUFSIZE;
|
||||
}
|
||||
}
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
@ -448,14 +483,14 @@ size_t vt_env_copy_bsize()
|
||||
tmp = getenv("VT_COPY_BUFFER_SIZE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
buffer_size = parse_size(tmp);
|
||||
if (buffer_size <= 0)
|
||||
vt_error_msg("VT_COPY_BUFFER_SIZE not properly set");
|
||||
}
|
||||
buffer_size = parse_size(tmp);
|
||||
if (buffer_size <= 0)
|
||||
vt_error_msg("VT_COPY_BUFFER_SIZE not properly set");
|
||||
}
|
||||
else
|
||||
{
|
||||
buffer_size = VT_DEFAULT_COPY_BUFFER_SIZE;
|
||||
}
|
||||
buffer_size = VT_DEFAULT_COPY_BUFFER_SIZE;
|
||||
}
|
||||
}
|
||||
return buffer_size;
|
||||
}
|
||||
@ -490,34 +525,34 @@ int vt_env_mode()
|
||||
tmp = getenv("VT_MODE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
modeflags = 0;
|
||||
do {
|
||||
if (dc <= 1 &&
|
||||
(strcmp( tk, "trace" ) == 0))
|
||||
modeflags |= VT_MODE_TRACE;
|
||||
else if(dc <= 1 &&
|
||||
(strcmp( tk, "stat" ) == 0))
|
||||
modeflags |= VT_MODE_STAT;
|
||||
else
|
||||
vt_error_msg("VT_MODE not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
modeflags = 0;
|
||||
do {
|
||||
if (dc <= 1 &&
|
||||
(strcmp( tk, "trace" ) == 0))
|
||||
modeflags |= VT_MODE_TRACE;
|
||||
else if(dc <= 1 &&
|
||||
(strcmp( tk, "stat" ) == 0))
|
||||
modeflags |= VT_MODE_STAT;
|
||||
else
|
||||
vt_error_msg("VT_MODE not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
else
|
||||
{
|
||||
modeflags = VT_MODE_TRACE;
|
||||
}
|
||||
modeflags = VT_MODE_TRACE;
|
||||
}
|
||||
}
|
||||
return modeflags;
|
||||
}
|
||||
@ -532,14 +567,14 @@ int vt_env_stat_intv()
|
||||
tmp = getenv("VT_STAT_INTV");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
stat_intv = atoi(tmp);
|
||||
if (stat_intv < 0)
|
||||
vt_error_msg("VT_STAT_INTV not properly set");
|
||||
}
|
||||
stat_intv = atoi(tmp);
|
||||
if (stat_intv < 0)
|
||||
vt_error_msg("VT_STAT_INTV not properly set");
|
||||
}
|
||||
else
|
||||
{
|
||||
stat_intv = 0;
|
||||
}
|
||||
stat_intv = 0;
|
||||
}
|
||||
}
|
||||
return stat_intv;
|
||||
}
|
||||
@ -554,49 +589,49 @@ int vt_env_stat_props()
|
||||
tmp = getenv("VT_STAT_PROPS");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
|
||||
if (strcmp( tmpbuf, "all" ) == 0)
|
||||
{
|
||||
propflags = (VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP);
|
||||
}
|
||||
else
|
||||
{
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
propflags = 0;
|
||||
do {
|
||||
if (dc <= 2 &&
|
||||
(strcmp( tk, "func" ) == 0))
|
||||
propflags |= VT_SUM_PROP_FUNC;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "msg" ) == 0))
|
||||
propflags |= VT_SUM_PROP_MSG;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "collop" ) == 0))
|
||||
propflags |= VT_SUM_PROP_COLLOP;
|
||||
/* else if(dc <= 3 &&
|
||||
(strcmp( tk, "fileop" ) == 0))
|
||||
propflags |= VT_SUM_PROP_FILEOP; */
|
||||
else
|
||||
vt_error_msg("VT_STAT_PROPS not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
}
|
||||
if (strcmp( tmpbuf, "all" ) == 0)
|
||||
{
|
||||
propflags = (VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP);
|
||||
}
|
||||
else
|
||||
{
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
propflags = 0;
|
||||
do {
|
||||
if (dc <= 2 &&
|
||||
(strcmp( tk, "func" ) == 0))
|
||||
propflags |= VT_SUM_PROP_FUNC;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "msg" ) == 0))
|
||||
propflags |= VT_SUM_PROP_MSG;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "collop" ) == 0))
|
||||
propflags |= VT_SUM_PROP_COLLOP;
|
||||
/* else if(dc <= 3 &&
|
||||
(strcmp( tk, "fileop" ) == 0))
|
||||
propflags |= VT_SUM_PROP_FILEOP; */
|
||||
else
|
||||
vt_error_msg("VT_STAT_PROPS not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* propflags =
|
||||
(VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP | VT_SUM_PROP_FILEOP); */
|
||||
propflags = (VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP);
|
||||
}
|
||||
/* propflags =
|
||||
(VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP | VT_SUM_PROP_FILEOP); */
|
||||
propflags = (VT_SUM_PROP_FUNC | VT_SUM_PROP_MSG | VT_SUM_PROP_COLLOP);
|
||||
}
|
||||
}
|
||||
return propflags;
|
||||
}
|
||||
@ -611,37 +646,37 @@ int vt_env_stat_msg_dtls()
|
||||
tmp = getenv("VT_STAT_MSG_DTLS");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
dtlsflags = 0;
|
||||
do {
|
||||
if (dc <= 2 &&
|
||||
(strcmp( tk, "peer" ) == 0))
|
||||
dtlsflags |= VT_SUM_MSG_DTL_PEER;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "comm" ) == 0))
|
||||
dtlsflags |= VT_SUM_MSG_DTL_COMM;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "tag" ) == 0))
|
||||
dtlsflags |= VT_SUM_MSG_DTL_TAG;
|
||||
else
|
||||
vt_error_msg("VT_STAT_MSG_DTLS not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
dtlsflags = 0;
|
||||
do {
|
||||
if (dc <= 2 &&
|
||||
(strcmp( tk, "peer" ) == 0))
|
||||
dtlsflags |= VT_SUM_MSG_DTL_PEER;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "comm" ) == 0))
|
||||
dtlsflags |= VT_SUM_MSG_DTL_COMM;
|
||||
else if(dc <= 2 &&
|
||||
(strcmp( tk, "tag" ) == 0))
|
||||
dtlsflags |= VT_SUM_MSG_DTL_TAG;
|
||||
else
|
||||
vt_error_msg("VT_STAT_MSG_DTLS not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
else
|
||||
{
|
||||
dtlsflags = VT_SUM_MSG_DTL_PEER;
|
||||
}
|
||||
dtlsflags = VT_SUM_MSG_DTL_PEER;
|
||||
}
|
||||
}
|
||||
return dtlsflags;
|
||||
}
|
||||
@ -656,34 +691,34 @@ int vt_env_stat_collop_dtls()
|
||||
tmp = getenv("VT_STAT_COLLOP_DTLS");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
char tmpbuf[128];
|
||||
char* p = tmpbuf;
|
||||
char* tk;
|
||||
int dc;
|
||||
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
strncpy(tmpbuf, tmp, 127);
|
||||
tmpbuf[127] = '\0';
|
||||
while( *p ) { *p = tolower(*p); p++; }
|
||||
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
dtlsflags = 0;
|
||||
do {
|
||||
if (dc <= 1 &&
|
||||
(strcmp( tk, "comm" ) == 0))
|
||||
dtlsflags |= VT_SUM_COLLOP_DTL_COMM;
|
||||
else if(dc <= 1 &&
|
||||
(strcmp( tk, "op" ) == 0))
|
||||
dtlsflags |= VT_SUM_COLLOP_DTL_OP;
|
||||
else
|
||||
vt_error_msg("VT_STAT_COLLOP_DTLS not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
tk = strtok(tmpbuf, ":");
|
||||
dc = 0;
|
||||
dtlsflags = 0;
|
||||
do {
|
||||
if (dc <= 1 &&
|
||||
(strcmp( tk, "comm" ) == 0))
|
||||
dtlsflags |= VT_SUM_COLLOP_DTL_COMM;
|
||||
else if(dc <= 1 &&
|
||||
(strcmp( tk, "op" ) == 0))
|
||||
dtlsflags |= VT_SUM_COLLOP_DTL_OP;
|
||||
else
|
||||
vt_error_msg("VT_STAT_COLLOP_DTLS not properly set");
|
||||
dc++;
|
||||
} while((tk = strtok(0, ":")));
|
||||
}
|
||||
else
|
||||
{
|
||||
dtlsflags = VT_SUM_COLLOP_DTL_OP;
|
||||
}
|
||||
dtlsflags = VT_SUM_COLLOP_DTL_OP;
|
||||
}
|
||||
}
|
||||
return dtlsflags;
|
||||
}
|
||||
@ -698,13 +733,13 @@ int vt_env_verbose()
|
||||
tmp = getenv("VT_VERBOSE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
verbose = atoi(tmp);
|
||||
if (verbose < 0) verbose = 0;
|
||||
}
|
||||
verbose = atoi(tmp);
|
||||
if (verbose < 0) verbose = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
verbose = 1;
|
||||
}
|
||||
verbose = 1;
|
||||
}
|
||||
}
|
||||
return verbose;
|
||||
}
|
||||
@ -719,13 +754,13 @@ int vt_env_debug()
|
||||
tmp = getenv("VT_DEBUG");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
debug = atoi(tmp);
|
||||
if (debug < 0) debug = 0;
|
||||
}
|
||||
debug = atoi(tmp);
|
||||
if (debug < 0) debug = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
debug = 0;
|
||||
}
|
||||
debug = 0;
|
||||
}
|
||||
}
|
||||
return debug;
|
||||
}
|
||||
@ -740,12 +775,12 @@ int vt_env_do_unify()
|
||||
tmp = getenv("VT_UNIFY");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
do_unify = parse_bool(tmp);
|
||||
}
|
||||
do_unify = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
do_unify = 1;
|
||||
}
|
||||
do_unify = 1;
|
||||
}
|
||||
}
|
||||
return do_unify;
|
||||
}
|
||||
@ -760,12 +795,12 @@ int vt_env_do_clean()
|
||||
tmp = getenv("VT_CLEAN");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
do_clean = parse_bool(tmp);
|
||||
}
|
||||
do_clean = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
do_clean = 1;
|
||||
}
|
||||
do_clean = 1;
|
||||
}
|
||||
}
|
||||
return do_clean;
|
||||
}
|
||||
@ -780,12 +815,12 @@ int vt_env_memtrace()
|
||||
tmp = getenv("VT_MEMTRACE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
memtrace = parse_bool(tmp);
|
||||
}
|
||||
memtrace = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
memtrace = 0;
|
||||
}
|
||||
memtrace = 0;
|
||||
}
|
||||
}
|
||||
return memtrace;
|
||||
}
|
||||
@ -840,12 +875,12 @@ int vt_env_iotrace()
|
||||
tmp = getenv("VT_IOTRACE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
iotrace = parse_bool(tmp);
|
||||
}
|
||||
iotrace = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
iotrace = 0;
|
||||
}
|
||||
iotrace = 0;
|
||||
}
|
||||
}
|
||||
return iotrace;
|
||||
}
|
||||
@ -880,12 +915,12 @@ int vt_env_libctrace()
|
||||
tmp = getenv("VT_LIBCTRACE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
libctrace = parse_bool(tmp);
|
||||
}
|
||||
libctrace = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
libctrace = 1;
|
||||
}
|
||||
libctrace = 1;
|
||||
}
|
||||
}
|
||||
return libctrace;
|
||||
}
|
||||
@ -920,12 +955,12 @@ int vt_env_mpitrace()
|
||||
tmp = getenv("VT_MPITRACE");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
mpitrace = parse_bool(tmp);
|
||||
}
|
||||
mpitrace = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
mpitrace = 1;
|
||||
}
|
||||
mpitrace = 1;
|
||||
}
|
||||
}
|
||||
return mpitrace;
|
||||
}
|
||||
@ -1037,14 +1072,14 @@ int vt_env_rusage_intv()
|
||||
tmp = getenv("VT_RUSAGE_INTV");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
rusage_intv = atoi(tmp);
|
||||
if (rusage_intv < 0)
|
||||
vt_error_msg("VT_RUSAGE_INTV not properly set");
|
||||
}
|
||||
rusage_intv = atoi(tmp);
|
||||
if (rusage_intv < 0)
|
||||
vt_error_msg("VT_RUSAGE_INTV not properly set");
|
||||
}
|
||||
else
|
||||
{
|
||||
rusage_intv = 100;
|
||||
}
|
||||
rusage_intv = 100;
|
||||
}
|
||||
}
|
||||
return rusage_intv;
|
||||
}
|
||||
@ -1059,7 +1094,7 @@ char* vt_env_metrics()
|
||||
read = 0;
|
||||
metrics = getenv("VT_METRICS");
|
||||
if ( metrics != NULL && strlen(metrics) == 0 )
|
||||
metrics = NULL;
|
||||
metrics = NULL;
|
||||
}
|
||||
return metrics;
|
||||
}
|
||||
@ -1122,12 +1157,12 @@ int vt_env_sync_flush()
|
||||
tmp = getenv("VT_SYNC_FLUSH");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
sync_flush = parse_bool(tmp);
|
||||
}
|
||||
sync_flush = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
sync_flush = 0;
|
||||
}
|
||||
sync_flush = 0;
|
||||
}
|
||||
}
|
||||
return sync_flush;
|
||||
}
|
||||
@ -1142,14 +1177,14 @@ int vt_env_sync_flush_level()
|
||||
tmp = getenv("VT_SYNC_FLUSH_LEVEL");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
sync_flush_level = atoi(tmp);
|
||||
if (sync_flush_level < 0 || sync_flush_level > 100)
|
||||
vt_error_msg("VT_SYNC_FLUSH_LEVEL not properly set");
|
||||
}
|
||||
sync_flush_level = atoi(tmp);
|
||||
if (sync_flush_level < 0 || sync_flush_level > 100)
|
||||
vt_error_msg("VT_SYNC_FLUSH_LEVEL not properly set");
|
||||
}
|
||||
else
|
||||
{
|
||||
sync_flush_level = 80;
|
||||
}
|
||||
sync_flush_level = 80;
|
||||
}
|
||||
}
|
||||
return sync_flush_level;
|
||||
}
|
||||
@ -1164,14 +1199,14 @@ int vt_env_max_stack_depth()
|
||||
tmp = getenv("VT_MAX_STACK_DEPTH");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
max_stack_depth = atoi(tmp);
|
||||
if (max_stack_depth < 0)
|
||||
vt_error_msg("VT_MAX_STACK_DEPTH not properly set");
|
||||
}
|
||||
max_stack_depth = atoi(tmp);
|
||||
if (max_stack_depth < 0)
|
||||
vt_error_msg("VT_MAX_STACK_DEPTH not properly set");
|
||||
}
|
||||
else
|
||||
{
|
||||
max_stack_depth = 0;
|
||||
}
|
||||
max_stack_depth = 0;
|
||||
}
|
||||
}
|
||||
return max_stack_depth;
|
||||
}
|
||||
@ -1186,14 +1221,14 @@ int vt_env_max_flushes()
|
||||
tmp = getenv("VT_MAX_FLUSHES");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
max_flushes = atoi(tmp);
|
||||
if (max_flushes < 0)
|
||||
vt_error_msg("VT_MAX_FLUSHES not properly set");
|
||||
}
|
||||
max_flushes = atoi(tmp);
|
||||
if (max_flushes < 0)
|
||||
vt_error_msg("VT_MAX_FLUSHES not properly set");
|
||||
}
|
||||
else
|
||||
{
|
||||
max_flushes = 1;
|
||||
}
|
||||
max_flushes = 1;
|
||||
}
|
||||
}
|
||||
return max_flushes;
|
||||
}
|
||||
@ -1231,12 +1266,12 @@ int vt_env_compression()
|
||||
tmp = getenv("VT_COMPRESSION");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
compression = parse_bool(tmp);
|
||||
}
|
||||
compression = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
compression = 1;
|
||||
}
|
||||
compression = 1;
|
||||
}
|
||||
}
|
||||
return compression;
|
||||
#else /* HAVE_ZLIB */
|
||||
@ -1334,8 +1369,8 @@ char* vt_env_filter_spec()
|
||||
tmp = getenv("VT_FILTER_SPEC");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
spec = replace_vars(tmp);
|
||||
}
|
||||
spec = replace_vars(tmp);
|
||||
}
|
||||
}
|
||||
return spec;
|
||||
}
|
||||
@ -1352,8 +1387,8 @@ char* vt_env_groups_spec()
|
||||
tmp = getenv("VT_GROUPS_SPEC");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
spec = replace_vars(tmp);
|
||||
}
|
||||
spec = replace_vars(tmp);
|
||||
}
|
||||
}
|
||||
return spec;
|
||||
}
|
||||
@ -1368,12 +1403,12 @@ int vt_env_etimesync()
|
||||
tmp = getenv("VT_ETIMESYNC");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
etimesync = parse_bool(tmp);
|
||||
}
|
||||
etimesync = parse_bool(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
etimesync = 0;
|
||||
}
|
||||
etimesync = 0;
|
||||
}
|
||||
}
|
||||
return etimesync;
|
||||
}
|
||||
@ -1388,14 +1423,14 @@ int vt_env_etimesync_intv()
|
||||
tmp = getenv("VT_ETIMESYNC_INTV");
|
||||
if (tmp != NULL && strlen(tmp) > 0)
|
||||
{
|
||||
etimesync_intv = atoi(tmp);
|
||||
if (etimesync_intv < 0)
|
||||
vt_error_msg("VT_ETIMESYNC_INTV not properly set");
|
||||
}
|
||||
etimesync_intv = atoi(tmp);
|
||||
if (etimesync_intv < 0)
|
||||
vt_error_msg("VT_ETIMESYNC_INTV not properly set");
|
||||
}
|
||||
else
|
||||
{
|
||||
etimesync_intv = 120;
|
||||
}
|
||||
etimesync_intv = 120;
|
||||
}
|
||||
}
|
||||
return etimesync_intv;
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ EXTERN int vt_env_ldir_check(void);
|
||||
EXTERN char* vt_env_fprefix(void);
|
||||
EXTERN int vt_env_funique(void);
|
||||
EXTERN size_t vt_env_bsize(void);
|
||||
EXTERN size_t vt_env_thread_bsize(void);
|
||||
EXTERN size_t vt_env_copy_bsize(void);
|
||||
EXTERN int vt_env_pthread_reuse(void);
|
||||
EXTERN int vt_env_mode(void);
|
||||
|
@ -233,10 +233,18 @@ void VTThrd_open(uint32_t tid)
|
||||
VTThrd* thrd = VTThrdv[tid];
|
||||
size_t bsize = vt_env_bsize();
|
||||
#if (defined(VT_MT) || defined(VT_HYB) || defined(VT_JAVA))
|
||||
if ( tid == 0 ) { /* master thread gets most buffer space */
|
||||
bsize = (bsize / 10) * 7;
|
||||
} else { /* worker threads get less buffer space */
|
||||
bsize = (bsize / 10);
|
||||
size_t tbsize = vt_env_thread_bsize();
|
||||
if( tbsize != 0 )
|
||||
{
|
||||
if( tid != 0 )
|
||||
bsize = tbsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( tid == 0 ) /* master thread gets most buffer space */
|
||||
bsize = (bsize / 10) * 7;
|
||||
else /* worker threads get less buffer space */
|
||||
bsize = (bsize / 10);
|
||||
}
|
||||
#endif /* VT_MT || VT_HYB || VT_JAVA */
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user