1
1
otfprofile-mpi:
      - added progress display
      - added verbose messages
      - added functions to sychronize the error indicator to all worker ranks
        (enforces that all ranks will be terminated by calling MPI_Abort if anyone fails)
      - wrap def. comments after 80 characters
      - use pdf[la]tex instead of latex/dvipdf to convert TeX output to PDF
      - added configure checks for pdf[la]tex and PGFPLOTS v1.4
      - fixed function invocation statistics generated from summarized information (--stat)
      - fixed memory leak

Changes to VT:
   MPI wrappers:
      - fixed wrapper generation for MPI implementations which don't support the MPI-2 standard (e.g. MVAPICH, MPICH)
      - corrected IN_PLACE denotation for MPI_Alltoall* and MPI_Scatter*
   vtwrapper:
      - corrected detection of IBM XL's OpenMP flag -qsmp=*:omp:*
   vtunify:
      - fixed faulty cleanup of temporary files which occurred if VT is configured without trace compression support

This commit was SVN r24851.
Этот коммит содержится в:
Matthias Jurenz 2011-07-01 07:17:15 +00:00
родитель a4b2bd903b
Коммит 5e6919b4e1
31 изменённых файлов: 1078 добавлений и 375 удалений

Просмотреть файл

@ -3,6 +3,12 @@
(see extlib/otf/ChangeLog)
- improved filtering of CUDA kernels
- fixed unification of local process group definitions
- fixed wrapper generation for MPI implementations which don't support
the MPI-2 standard
- fixed faulty cleanup of temporary files in vtunify which occurred if
VT is configured without trace compression support
- fixed detection of OpenMP flag '-qsmp=*:omp:*' in the compiler
wrappers
5.11
- updated version of internal OTF to 1.9sawfish

Просмотреть файл

@ -63,7 +63,10 @@ AC_DEFUN([ACVT_ZLIB],
])
AS_IF([test x"$ZLIBLIB" != x -a x"$zlib_error" = "xno"],
[have_zlib="yes"])
[
have_zlib="yes"
AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the ZLIB.])
])
AS_IF([test x"$force_zlib" = "xyes" -a x"$zlib_error" = "xyes"],
[exit 1])

Просмотреть файл

@ -11,8 +11,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
have_mpi2=0
if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
have_mpi2=1
fi
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi

Просмотреть файл

@ -23,8 +23,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
have_mpi2=0
if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
have_mpi2=1
fi
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi

Просмотреть файл

@ -11,7 +11,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
have_mpi2=0
if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
have_mpi2=1
fi
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi

Просмотреть файл

@ -40,8 +40,8 @@ VT_MPI_INT MPI_Address(void* location, MPI_Aint* address_CLASS_SINGLE_OUT);
VT_MPI_INT MPI_Allgather(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Allgatherv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* displs, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Allreduce(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Alltoall(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Alltoallv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Alltoall(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Alltoallv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Attr_delete(MPI_Comm comm, VT_MPI_INT keyval);
VT_MPI_INT MPI_Attr_get(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val, VT_MPI_INT* flag);
VT_MPI_INT MPI_Attr_put(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val);
@ -129,8 +129,8 @@ VT_MPI_INT MPI_Request_free(MPI_Request* request_CLASS_SINGLE_IO);
VT_MPI_INT MPI_Rsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
VT_MPI_INT MPI_Rsend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
VT_MPI_INT MPI_Scan(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_OTHER*/
VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
VT_MPI_INT MPI_Send(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
VT_MPI_INT MPI_Send_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
VT_MPI_INT MPI_Sendrecv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, VT_MPI_INT dest, VT_MPI_INT sendtag, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT source, VT_MPI_INT recvtag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);

Просмотреть файл

@ -66,7 +66,7 @@ BODY { font-family: sans-serif; }
<P>
<P>
<B><BIG CLASS="XHUGE">VampirTrace 5.11&nbsp;User Manual</BIG></B>
<B><BIG CLASS="XHUGE">VampirTrace 5.11.1&nbsp;User Manual</BIG></B>
<BR>
<BR>
<BR>
@ -252,20 +252,20 @@ OpenMP events, and performance counters.
<P>
After a successful tracing run, VampirTrace writes all collected data to a
trace file in the Open Trace Format (OTF)<A NAME="tex2html3"
HREF="#foot1144"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1146"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
As a result, the information is available for post-mortem analysis and
visualization by various tools.
Most notably, VampirTrace provides the input data for the Vampir analysis
and visualization tool<A NAME="tex2html5"
HREF="#foot1145"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1147"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
<P>
VampirTrace is included in OpenMPI&nbsp;1.3 and later versions.
If not disabled explicitly, VampirTrace is built automatically when installing
OpenMPI<A NAME="tex2html7"
HREF="#foot1146"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1148"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
<P>
@ -1083,7 +1083,7 @@ in a single file, that
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
The lists end with <TT>END[_FILE]_&lt;INCLUDE|EXCLUDE&gt;_LIST</TT>. For further information on selective
profiling have a look at the TAU documentation<A NAME="tex2html11"
HREF="#foot1170"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1172"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
<PRE>
@ -1100,7 +1100,7 @@ Binary Instrumentation Using Dyninst
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
instrument the application during runtime (binary instrumentation), by using
Dyninst<A NAME="tex2html13"
HREF="#foot1171"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1173"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
Recompiling is not necessary for this kind of instrumentation,
but relinking:
@ -1820,7 +1820,7 @@ for the enhanced timer synchronization:
<UL>
<LI>CLAPACK<A NAME="tex2html15"
HREF="#foot1181"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
HREF="#foot1183"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
</LI>
<LI>AMD ACML
@ -2190,6 +2190,14 @@ Controls how VampirTrace handles synchronizing CUDA API calls, especially
introduces a minimal overhead but increases timer precision and prevents
flushes elsewhere in the trace.
</DD>
<DT></DT>
<DD><TT>VT_CUDATRACE_ERROR</TT> (default: <TT>no</TT>)
<BR>
Print out an error message and exit the program, if a CUDA wrapper call
does not return 'cudaSuccess'. The default is just a warning message
without program exit.
</DD>
<DT></DT>
<DD><TT>VT_CUPTI_METRICS</TT> (default: <TT>""</TT>)
@ -3373,21 +3381,21 @@ by the Linux 2.6 kernel are shown in the table.
<P>
<BR><HR><H4>Footnotes</H4>
<DL>
<DT><A NAME="foot1144">... (OTF)</A><A
<DT><A NAME="foot1146">... (OTF)</A><A
HREF="UserManual.html#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD><TT><A NAME="tex2html4"
HREF="http://www.tu-dresden.de/zih/otf">http://www.tu-dresden.de/zih/otf</A></TT>
</DD>
<DT><A NAME="foot1145">... tool </A><A
<DT><A NAME="foot1147">... tool </A><A
HREF="UserManual.html#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD><TT><A NAME="tex2html6"
HREF="http://www.vampir.eu">http://www.vampir.eu</A></TT>
</DD>
<DT><A NAME="foot1146">...
<DT><A NAME="foot1148">...
Open MPI </A><A
HREF="UserManual.html#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
@ -3395,14 +3403,14 @@ Open MPI </A><A
HREF="http://www.open-mpi.org/faq/?category=vampirtrace">http://www.open-mpi.org/faq/?category=vampirtrace</A></TT>
</DD>
<DT><A NAME="foot1170">... documentation </A><A
<DT><A NAME="foot1172">... documentation </A><A
HREF="UserManual.html#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD><TT><A NAME="tex2html12"
HREF="http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling">http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling</A></TT>
</DD>
<DT><A NAME="foot1171">...
<DT><A NAME="foot1173">...
Dyninst </A><A
HREF="UserManual.html#tex2html13"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
@ -3410,7 +3418,7 @@ Dyninst </A><A
HREF="http://www.dyninst.org">http://www.dyninst.org</A></TT>
</DD>
<DT><A NAME="foot1181">... CLAPACK</A><A
<DT><A NAME="foot1183">... CLAPACK</A><A
HREF="UserManual.html#tex2html15"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
<DD><TT><A NAME="tex2html16"

Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf

Двоичный файл не отображается.

Просмотреть файл

@ -1,4 +1,9 @@
1.9.1openmpi = 1.9sawfish
1.9.1openmpi
- added progress display to otfprofile-mpi
- use pdf[la]tex instead of latex/dvipdf to convert otfprofile-mpi's
TeX output to PDF
- fixed function invocation statistics in otfprofile-mpi generated
from summarized information (--stat)
1.9sawfish
- added MPI-parallel version of otfprofile (otfprofile-mpi)

Просмотреть файл

@ -2,6 +2,7 @@ m4_include(config/m4/acinclude.debug.m4)
m4_include(config/m4/acinclude.math.m4)
m4_include(config/m4/acinclude.mpi.m4)
m4_include(config/m4/acinclude.omp.m4)
m4_include(config/m4/acinclude.pdflatex_pgfplots.m4)
m4_include(config/m4/acinclude.swig_python.m4)
m4_include(config/m4/acinclude.vtf3.m4)
m4_include(config/m4/acinclude.verbose.m4)

Просмотреть файл

@ -0,0 +1,35 @@
AC_DEFUN([CHECK_PDFLATEX_PGFPLOTS],
[
AC_ARG_VAR([PDFTEX], [pdfTeX typesetter command])
AC_CHECK_PROGS([PDFTEX], [pdflatex pdftex])
if test x"$PDFTEX" != x; then
AC_DEFINE_UNQUOTED([PDFTEX], ["$PDFTEX"], [pdfTeX typesetter command.])
AC_MSG_CHECKING([for PGFPLOTS version >= 1.4])
cat << EOF >conftest.tex
\documentclass[[a4paper,10pt]]{article}
\nonstopmode
\usepackage{pgfplots}
\begin{document}
\pgfplotstableread{
col1 col2
1 2
}\testtable
test
\end{document}
EOF
$PDFTEX conftest.tex >/dev/null 2>&1
if test $? -eq 0; then
AC_MSG_RESULT([yes])
AC_DEFINE([HAVE_PGFPLOTS_1_4], [1], [Define to 1 if you have the TeX package PGFPLOTS version >=1.4.])
else
AC_MSG_RESULT([no])
fi
rm -f conftest.*
fi
])

Просмотреть файл

@ -69,6 +69,9 @@ CHECK_SWIG_PYTHON
if test x"$force_swig_python" = "xyes" -a x"$swig_python_error" = "xyes"; then exit 1; fi
AM_CONDITIONAL(AMHAVESWIGPYTHON, test x"$have_swig_python" = xyes)
# Checks for pdflatex and PGFPLOTS needed for otfprofile-mpi to convert TeX output to PDF
CHECK_PDFLATEX_PGFPLOTS
WITH_DEBUG
WITH_VERBOSE

Двоичный файл не отображается.

Двоичный файл не отображается.

Просмотреть файл

@ -17,6 +17,7 @@ otfprofile_mpi_SOURCES = \
collect_data.h \
create_latex.h \
datastructs.h \
otfprofile-mpi.h \
reduce_data.h \
summarize_data.h \
collect_data.cpp \

Просмотреть файл

@ -15,26 +15,245 @@ using namespace std;
#include "otf.h"
#include "otfaux.h"
#include "mpi.h"
#include "collect_data.h"
#include "otfprofile-mpi.h"
/* logarithm to base b for unsigned 64-bit integer x */
static uint64_t logi( uint64_t x, uint64_t b= 2 ) {
static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
assert( b > 1 );
Progress& progress= alldata.progress;
uint64_t c= 1;
uint64_t i= 0;
progress.cur_bytes= 0;
progress.max_bytes= max_bytes;
progress.ranks_left= alldata.numRanks -1;
while( c <= x ) {
if ( 1 < alldata.numRanks ) {
c*= b;
i++;
/* reduce max. bytes to rank 0 */
uint64_t sum_max_bytes;
MPI_Reduce( &max_bytes, &sum_max_bytes, 1, MPI_LONG_LONG_INT, MPI_SUM,
0, MPI_COMM_WORLD );
if ( 0 == alldata.myRank ) {
progress.max_bytes= sum_max_bytes;
progress.recv_buffers= new uint64_t[alldata.numRanks-1];
assert( progress.recv_buffers );
progress.recv_requests= new MPI_Request[alldata.numRanks-1];
assert( progress.recv_requests );
progress.recv_statuses= new MPI_Status[alldata.numRanks-1];
assert( progress.recv_statuses );
progress.recv_indices= new int[alldata.numRanks-1];
assert( progress.recv_indices );
/* initialize array of current bytes read and start
persistent communication */
for ( uint32_t i= 0; i < alldata.numRanks; i++ ) {
if ( 0 < i ) {
/* create persistent request handle */
MPI_Recv_init( &(progress.recv_buffers[i-1]), 1,
MPI_LONG_LONG_INT, i, Progress::MSG_TAG,
MPI_COMM_WORLD,
&(progress.recv_requests[i-1]) );
/* start persistent communication */
MPI_Start( &(progress.recv_requests[i-1]) );
}
}
} else { /* 0 != my_rank */
/* initialize request handle for sending progress to rank 0 */
progress.send_request = MPI_REQUEST_NULL;
}
/* block until all worker ranks have reached this point to avoid that the
progress does a big jump at beginning */
MPI_Barrier( MPI_COMM_WORLD );
}
return i;
if ( 0 == alldata.myRank ) {
/* show initial progress */
printf( "%7.2f %%\r", 0.0 );
fflush( stdout );
}
}
static void update_progress( AllData& alldata, uint64_t delta_bytes,
bool wait= false ) {
Progress& progress= alldata.progress;
if ( 0 == alldata.myRank ) {
progress.cur_bytes += delta_bytes;
} else {
progress.cur_bytes= delta_bytes;
}
if ( 1 < alldata.numRanks ) {
if ( 0 == alldata.myRank ) {
/* get current bytes read from all worker ranks */
int out_count;
/* either wait or test for one or more updates from worker ranks */
if ( wait )
{
MPI_Waitsome( alldata.numRanks - 1, progress.recv_requests,
&out_count, progress.recv_indices,
progress.recv_statuses );
} else {
MPI_Testsome( alldata.numRanks - 1, progress.recv_requests,
&out_count, progress.recv_indices,
progress.recv_statuses );
}
if ( MPI_UNDEFINED != out_count ) {
int index;
uint32_t i;
for ( i= 0; i < (uint32_t) out_count; i++ ) {
index= progress.recv_indices[i];
/* worker rank (index+1) is finished? */
if ( (uint64_t)-1 != progress.recv_buffers[index] ) {
/* update rank's current bytes read and restart
persistent communication */
progress.cur_bytes += progress.recv_buffers[index];
MPI_Start( &(progress.recv_requests[progress.recv_indices[i]]) );
} else {
/* this rank is finished */
progress.ranks_left -= 1;
}
}
}
} else { /* 0 != my_rank */
int do_send = 1;
MPI_Status status;
/* send only if it's the first send or the request handle isn't
currently in use */
if ( MPI_REQUEST_NULL != progress.send_request ) {
MPI_Test( &(progress.send_request), &do_send, &status );
}
if ( do_send ) {
MPI_Issend( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
Progress::MSG_TAG, MPI_COMM_WORLD,
&progress.send_request );
}
}
}
if ( 0 == alldata.myRank ) {
/* show progress */
double percent =
100.0 * (double) progress.cur_bytes / (double) progress.max_bytes;
static const char signs[2]= { '.',' ' };
static int signi= 0;
printf( "%7.2f %% %c\r", percent, signs[signi] );
fflush( stdout );
signi^= 1;
}
}
static void finish_progress( AllData& alldata ) {
Progress& progress= alldata.progress;
if ( 1 < alldata.numRanks ) {
if ( 0 == alldata.myRank ) {
/* update progress until all worker ranks are
finished / all bytes are read */
while ( 0 < progress.ranks_left ) {
update_progress( alldata, 0, true );
}
} else { /* 0 != my_rank */
MPI_Status status;
MPI_Wait( &(progress.send_request), &status );
/* send last current bytes read to rank 0 */
MPI_Send( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
Progress::MSG_TAG, MPI_COMM_WORLD );
/* send marker (-1) to rank 0 which indicates that this worker rank
is finished */
progress.cur_bytes = (uint64_t) -1;
MPI_Send( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
Progress::MSG_TAG, MPI_COMM_WORLD );
}
}
if ( 0 == alldata.myRank ) {
/* show final progress */
printf( "%7.2f %% done\n", 100.0 );
}
if( 1 < alldata.numRanks && 0 == alldata.myRank ) {
/* ensure that all requests are inactive before freeing memory */
MPI_Waitall( alldata.numRanks - 1, progress.recv_requests,
progress.recv_statuses );
/* free memory */
delete [] progress.recv_buffers;
delete [] progress.recv_requests;
delete [] progress.recv_statuses;
delete [] progress.recv_indices;
}
}
@ -80,12 +299,39 @@ static int handle_def_comment( void* fha, uint32_t stream, const char* comment,
AllData* alldata= (AllData*) fha;
/* add new-line between each comment record */
if ( 0 < alldata->comments.length() ) {
alldata->comments+= "\n";
}
alldata->comments+= comment;
/* wrap lines after 80 characters */
const string::size_type LINE_WRAP= 80;
string tmp= comment;
do {
if ( tmp.length() <= LINE_WRAP ) {
alldata->comments+= tmp;
break;
} else {
string::size_type next_wrap=
tmp.find_last_of( " .!?:;,", LINE_WRAP -1 );
next_wrap= ( string::npos == next_wrap ) ? LINE_WRAP : next_wrap +1;
alldata->comments+= tmp.substr( 0, next_wrap ) + '\n';
tmp= tmp.substr( next_wrap );
}
} while( 0 != tmp.length() );
return OTF_RETURN_OK;
}
@ -315,8 +561,8 @@ static int handle_send( void* fha, uint64_t time, uint32_t sender,
if ( 0 != alldata->recvTimeKey ) {
uint64_t recv_time;
if ( OTF_KeyValueList_getUint64( kvlist, alldata->recvTimeKey,
&recv_time ) == 0 ) {
if ( 0 == OTF_KeyValueList_getUint64( kvlist, alldata->recvTimeKey,
&recv_time ) ) {
duration= (double) ( recv_time - time );
@ -331,11 +577,11 @@ static int handle_send( void* fha, uint64_t time, uint32_t sender,
if ( length > 0 && duration > 0.0 ) {
uint64_t speed_bin=
logi( (uint64_t)(
Logi( (uint64_t)(
( (double)length * (double)alldata->timerResolution ) /
duration ), MessageSpeedData::BIN_LOG_BASE );
uint64_t length_bin= logi( length, MessageSpeedData::BIN_LOG_BASE );
uint64_t length_bin= Logi( length, MessageSpeedData::BIN_LOG_BASE );
alldata->messageSpeedMapPerLength[ Pair( speed_bin, length_bin ) ]
.add( 1 );
@ -444,7 +690,18 @@ static int handle_function_summary( void* fha, uint64_t time, uint32_t func,
/* add/overwrite function statistics */
FunctionData tmp;
tmp.add( count, exclTime, inclTime );
tmp.count.cnt = tmp.count.sum = count;
tmp.count.min = tmp.count.max = 0;
tmp.excl_time.cnt = count;
tmp.excl_time.sum = exclTime;
tmp.excl_time.min = tmp.excl_time.max = 0;
tmp.incl_time.cnt = count;
tmp.incl_time.sum = inclTime;
tmp.incl_time.min = tmp.incl_time.max = 0;
alldata->functionMapPerRank[ Pair( func, process ) ]= tmp;
return OTF_RETURN_OK;
@ -550,7 +807,9 @@ static int handle_collop_summary( void* fha, uint64_t time, uint32_t process,
}
static void read_definitions( OTF_Reader* reader, AllData& alldata ) {
static bool read_definitions( AllData& alldata, OTF_Reader* reader ) {
bool error= false;
/* open OTF handler array */
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
@ -608,16 +867,22 @@ static void read_definitions( OTF_Reader* reader, AllData& alldata ) {
OTF_DEFKEYVALUE_RECORD );
/* read definitions */
uint64_t defs_read_ret= OTF_Reader_readDefinitions( reader, handlers );
assert( OTF_READ_ERROR != defs_read_ret );
uint64_t read_ret= OTF_Reader_readDefinitions( reader, handlers );
if ( OTF_READ_ERROR == read_ret ) {
cerr << "ERROR: Could not read definitions." << endl;
error= true;
}
/* close OTF handler array */
OTF_HandlerArray_close( handlers );
return !error;
}
static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
AllData& alldata ) {
static void share_definitions( AllData& alldata ) {
MPI_Barrier( MPI_COMM_WORLD );
@ -627,7 +892,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
/* get size needed to send definitions to workers */
if ( my_rank == 0 ) {
if ( 0 == alldata.myRank ) {
MPI_Pack_size( 1 + alldata.collectiveOperationsToClasses.size() * 2 +
1 + alldata.countersOfInterest.size() +
@ -646,7 +911,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
/* pack definitions to buffer */
if ( my_rank == 0 ) {
if ( 0 == alldata.myRank ) {
/* collectiveOperationsToClasses.size() */
uint64_t collop_classes_map_size=
@ -701,7 +966,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
/* unpack definitions from buffer */
if ( my_rank != 0 ) {
if ( 0 != alldata.myRank ) {
/* collectiveOperationsToClasses.size() */
uint64_t collop_classes_map_size;
@ -756,7 +1021,9 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
}
static void read_events( OTF_Reader* reader, AllData& alldata ) {
static bool read_events( AllData& alldata, OTF_Reader* reader ) {
bool error= false;
/* open OTF handler array */
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
@ -810,16 +1077,67 @@ static void read_events( OTF_Reader* reader, AllData& alldata ) {
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
}
/* prepare progress */
if ( alldata.params.progress ) {
OTF_Reader_setRecordLimit( reader, 0 );
if ( OTF_READ_ERROR != OTF_Reader_readEvents( reader, handlers ) ) {
uint64_t min, cur, max;
OTF_Reader_eventBytesProgress( reader, &min, &cur, &max );
prepare_progress( alldata, max );
}
OTF_Reader_setRecordLimit( reader, Progress::EVENTS_RECORD_LIMIT );
}
/* read events */
uint64_t events_read_ret= OTF_Reader_readEvents( reader, handlers );
assert( OTF_READ_ERROR != events_read_ret );
uint64_t records_read= 0;
while ( OTF_READ_ERROR !=
( records_read= OTF_Reader_readEvents( reader, handlers ) ) ) {
/* update progress */
if ( alldata.params.progress ) {
uint64_t min, cur, max;
static uint64_t last_cur= 0;
OTF_Reader_eventBytesProgress( reader, &min, &cur, &max );
update_progress( alldata, cur - last_cur );
last_cur = cur;
}
/* stop reading if done */
if ( 0 == records_read )
break;
}
/* show error message if reading failed */
if ( OTF_READ_ERROR == records_read ) {
cerr << "ERROR: Could not read events." << endl;
error= true;
}
/* close OTF handler array */
OTF_HandlerArray_close( handlers );
return !error;
}
static void read_statistics( OTF_Reader* reader, AllData& alldata ) {
static bool read_statistics( AllData& alldata, OTF_Reader* reader ) {
bool error= false;
/* open OTF handler array */
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
@ -853,18 +1171,66 @@ static void read_statistics( OTF_Reader* reader, AllData& alldata ) {
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
}
/* read events */
uint64_t stats_read_ret= OTF_Reader_readStatistics( reader, handlers );
assert( OTF_READ_ERROR != stats_read_ret );
/* prepare progress */
if ( alldata.params.progress ) {
OTF_Reader_setRecordLimit( reader, 0 );
if ( OTF_READ_ERROR != OTF_Reader_readStatistics( reader, handlers ) ) {
uint64_t min, cur, max;
OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max );
prepare_progress( alldata, max );
}
OTF_Reader_setRecordLimit( reader, Progress::STATS_RECORD_LIMIT );
}
/* read statistics */
uint64_t records_read= 0;
while ( OTF_READ_ERROR !=
( records_read= OTF_Reader_readStatistics( reader, handlers ) ) ) {
/* update progress */
if ( alldata.params.progress ) {
uint64_t min, cur, max;
static uint64_t last_cur= 0;
OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max );
update_progress( alldata, cur - last_cur );
last_cur = cur;
}
/* stop reading if done */
if ( 0 == records_read )
break;
}
/* show error message if reading failed */
if ( OTF_READ_ERROR == records_read ) {
cerr << "ERROR: Could not read statistics." << endl;
error= true;
}
/* close OTF handler array */
OTF_HandlerArray_close( handlers );
return !error;
}
bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
bool CollectData( AllData& alldata ) {
bool ret= true;
bool error= false;
/* open OTF file manager and reader */
@ -876,37 +1242,65 @@ bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
OTF_Reader_open( alldata.params.input_file_prefix.c_str(), manager );
assert( reader );
if ( my_rank == 0 ) {
do {
/* read definitions */
read_definitions( reader, alldata );
if ( 0 == alldata.myRank ) {
}
/* read definitions */
/* share definitions needed for reading events to workers */
VerbosePrint( alldata, 1, true, "reading definitions\n" );
if ( num_ranks > 1 ) {
error= !read_definitions( alldata, reader );
share_definitions( my_rank, num_ranks, alldata );
}
}
/* broadcast error indicator to workers */
if ( SyncError( alldata, error, 0 ) ) {
/* either read data from events or statistics */
break;
if ( alldata.params.read_from_stats ) {
}
read_statistics( reader, alldata );
/* share definitions needed for reading events to workers */
} else {
if ( 1 < alldata.numRanks ) {
read_events( reader, alldata );
share_definitions( alldata );
}
}
/* either read data from events or statistics */
if ( alldata.params.read_from_stats ) {
VerbosePrint( alldata, 1, true, "reading statistics\n" );
error= !read_statistics( alldata, reader );
} else {
VerbosePrint( alldata, 1, true, "reading events\n" );
error= !read_events( alldata, reader );
}
/* finish progress */
if ( alldata.params.progress ) {
finish_progress( alldata );
}
/* synchronize error indicator with workers */
SyncError( alldata, error );
} while( false );
/* close OTF file manager and reader */
OTF_Reader_close( reader );
OTF_FileManager_close( manager );
return ret;
return !error;
}

Просмотреть файл

@ -12,6 +12,7 @@
/* collect the data for the assigned trace processes from the given
trace file name */
bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
bool CollectData( AllData& alldata );
#endif /* COLLECT_DATA_H */

Просмотреть файл

@ -19,6 +19,8 @@
using namespace std;
#include "create_latex.h"
#include "otfprofile-mpi.h"
#include "OTF_inttypes.h"
#include "OTF_Definitions.h"
@ -251,6 +253,7 @@ static void collectiveId2String(uint64_t id, string& name)
static void write_header(fstream& tex)
{
tex << "\\documentclass[a4paper,10pt]{article}" << endl;
tex << "\\nonstopmode" << endl;
tex << "\\usepackage{amssymb}" << endl;
tex << "\\usepackage{longtable}" << endl;
tex << "\\usepackage{ifthen}" << endl;
@ -2015,9 +2018,11 @@ static void write_p2pMsgRateHist(fstream& tex, struct AllData& alldata)
*
* @param alldata data structure containing summarized profiling information
*/
bool createTex( AllData& alldata ) {
bool CreateTex( AllData& alldata ) {
bool ret= true;
bool error= false;
VerbosePrint( alldata, 1, true, "producing LaTeX output\n" );
string tex_file_name= alldata.params.output_file_prefix + ".tex";
fstream tex_file;
@ -2057,59 +2062,39 @@ bool createTex( AllData& alldata ) {
write_footer(tex_file);
tex_file.close();
VerbosePrint( alldata, 2, true, " created file: %s\n",
tex_file_name.c_str() );
#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
/* create PDF file, if desired */
if ( alldata.params.create_pdf ) {
int rc;
VerbosePrint( alldata, 1, true, "producing PDF output\n" );
/* compose pdflatex command */
ostringstream cmd;
cmd << PDFTEX << " " << tex_file_name << " >/dev/null 2>&1";
/* compose latex command */
cmd << alldata.params.latex_command << " " << tex_file_name
<< " >/dev/null 2>&1";
/* execute latex command (two times) on TEX file */
/* execute pdflatex command (two times) on TeX file */
for ( uint8_t i = 0; i < 2; i++ ) {
rc= system( cmd.str().c_str() );
VerbosePrint( alldata, 2, true, " %srunning command: %s\n",
(0 == i) ? "" : "re-", cmd.str().c_str() );
int rc= system( cmd.str().c_str() );
if ( 0 != WEXITSTATUS( rc ) ) {
cerr << "ERROR: Could not create DVI file from '"
cerr << "ERROR: Could not create PDF file from '"
<< tex_file_name << "'." << endl;
ret= false;
error= true;
break;
}
}
if ( 0 == rc ) {
/* compose DVI file name */
string dvi_file_name= tex_file_name;
dvi_file_name.replace( tex_file_name.length() - 4, 4, ".dvi" );
/* compose PDF file name */
string pdf_file_name= tex_file_name;
pdf_file_name.replace( tex_file_name.length() - 4, 4, ".pdf" );
/* compose DVI to PDF convert command */
cmd.str(""); cmd.clear();
cmd << alldata.params.dvipdf_command << " " << dvi_file_name
<< " >/dev/null 2>&1";
/* execute DVI to PDF command */
rc= system( cmd.str().c_str() );
if ( 0 != WEXITSTATUS( rc ) ) {
cerr << "ERROR: Could not convert '" << dvi_file_name
<< "' to '" << pdf_file_name << "'." << endl;
ret= false;
}
}
}
#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
return ret;
return !error;
}

Просмотреть файл

@ -6,11 +6,12 @@
#ifndef CREATE_LATEX_H
#define CREATE_LATEX_H
#include "datastructs.h"
/* generate PGF output */
bool createTex( AllData& alldata );
bool CreateTex( AllData& alldata );
#endif /* CREATE_LATEX_H */

Просмотреть файл

@ -6,6 +6,7 @@
#ifndef DATASTRUCTS_H
#define DATASTRUCTS_H
using namespace std;
#include <stdlib.h>
@ -16,6 +17,8 @@ using namespace std;
#include <list>
#include <set>
#include "mpi.h"
#include "OTF_inttypes.h"
@ -25,26 +28,84 @@ struct Params {
static const uint32_t DEFAULT_MAX_FILE_HANDLES= 50;
static const uint32_t DEFAULT_BUFFER_SIZE= 1024 * 1024;
static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
static const string DEFAULT_LATEX_COMMAND() { return "latex"; }
static const string DEFAULT_DVIPDF_COMMAND() { return "dvipdf"; }
static const uint8_t DEFAULT_VERBOSE_LEVEL= 0;
static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
uint32_t max_file_handles;
uint32_t buffer_size;
uint8_t verbose_level;
bool progress;
bool read_from_stats;
bool create_pdf;
string input_file_prefix;
string output_file_prefix;
string latex_command;
string dvipdf_command;
Params()
: max_file_handles(DEFAULT_MAX_FILE_HANDLES),
buffer_size(DEFAULT_BUFFER_SIZE),
verbose_level(DEFAULT_VERBOSE_LEVEL), progress(false),
read_from_stats(false), create_pdf(true),
output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()),
latex_command(DEFAULT_LATEX_COMMAND()),
dvipdf_command(DEFAULT_DVIPDF_COMMAND()) {}
output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()) {}
};
/* *** progress information *** */
struct Progress {
/* maximum number of records to read between progress updates */
static const uint64_t EVENTS_RECORD_LIMIT= 1000000;
static const uint64_t STATS_RECORD_LIMIT= 100;
/* message tag to use for communication */
static const int MSG_TAG= 500;
uint64_t cur_bytes; /* current bytes read */
uint64_t max_bytes; /* max. bytes readable */
MPI_Request send_request; /* sender request handle */
uint64_t* recv_buffers; /* receive buffers */
MPI_Request* recv_requests; /* persistent receive request handles */
MPI_Status* recv_statuses; /* receive statuses */
int* recv_indices; /* indices of completed recv. operations */
uint32_t ranks_left; /* root keeps track of ranks left to query */
};
/* *** runtime measurement *** */
struct MeasureBlock {
/* routine to get a global timestamp */
# define GETTIME() MPI_Wtime()
double start_time; /* start timestamp of measurement block */
double stop_time; /* stop timestamp of measurement block */
MeasureBlock() : start_time(-1.0), stop_time(-1.0) {}
/* start runtime measurment */
void start() {
start_time= GETTIME();
}
/* stop runtime measurment */
void stop() {
assert( -1.0 != start_time );
stop_time= GETTIME();
}
/* get result of runtime measurement */
double duration() const {
assert( -1.0 != start_time && -1.0 != stop_time );
return stop_time - start_time;
}
};
@ -413,6 +474,9 @@ struct PendingCollective {
struct AllData {
const uint32_t myRank;
const uint32_t numRanks;
/* number and list of processes to be handled by every worker */
uint32_t myProcessesNum;
uint32_t* myProcessesList;
@ -424,6 +488,12 @@ struct AllData {
/* program parameters */
Params params;
/* progress information */
Progress progress;
/* store per-measure block runtimes */
map< string, MeasureBlock > measureBlockMap;
/* clustering information for ranks */
Clustering clustering;
@ -537,7 +607,9 @@ struct AllData {
AllData() : myProcessesNum(0), myProcessesList(NULL),
AllData( uint32_t my_rank, uint32_t num_ranks ) :
myRank(my_rank), numRanks(num_ranks),
myProcessesNum(0), myProcessesList(NULL),
packbuffersize(0), packbuffer(NULL), timerResolution(0),
recvTimeKey(0) {}
@ -565,6 +637,16 @@ struct AllData {
}
char* freePackBuffer( ) {
free( packbuffer );
packbuffer= NULL;
packbuffersize= 0;
return NULL;
}
char* getPackBuffer( ) {
return packbuffer;

Просмотреть файл

@ -7,72 +7,42 @@ using namespace std;
#include <cassert>
#include <iostream>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include "mpi.h"
#include "otf.h"
#include "OTF_Platform.h"
#include "datastructs.h"
#include "collect_data.h"
#include "otfprofile-mpi.h"
#include "summarize_data.h"
#include "reduce_data.h"
#include "create_latex.h"
#define FPRINTF_ROOT if(my_rank == 0) fprintf
/* define the following macro to synchronize the error indicator with all
worker ranks
This enforces that all ranks will be terminated by calling MPI_Abort if
anyone fails. This is necessary to work around a bug that appears at least
with Open MPI where calling MPI_Abort on one task doesn't terminate all
other ranks. */
#define SYNC_ERROR
/* define this macro to print result data to stdout */
/* define the following macro to print result data to stdout */
/*#define SHOW_RESULTS*/
/* define this macro to have runtime measurement of certain profile scopes */
/*#define RUNTIME_MEASUREMENT*/
#ifdef RUNTIME_MEASUREMENT
struct MeasureBlock {
# define GETTIME() MPI_Wtime()
double start_time;
double stop_time;
MeasureBlock() : start_time(-1.0), stop_time(-1.0) {}
void start() {
start_time= GETTIME();
}
void stop() {
assert( -1.0 != start_time );
stop_time= GETTIME();
}
double duration() const {
assert( -1.0 != start_time && -1.0 != stop_time );
return stop_time - start_time;
}
};
/* store per-measure block runtimes */
map < string, MeasureBlock > MeasureBlocksMap;
#endif /* RUNTIME_MEASUREMENT */
/* parse command line options
return 0 if succeeded, 1 if help text or version showed, 2 if failed */
static int parse_command_line( uint32_t my_rank, int argc, char** argv,
AllData& alldata );
return 0 if succeeded, 1 if help text or version showed, -1 if failed */
static int parse_command_line( int argc, char** argv, AllData& alldata );
/* assign trace processes to analysis processes explicitly in order to allow
sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc.
in the future, return true if succeeded */
static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
AllData& alldata );
static bool assign_procs_to_ranks( AllData& alldata );
#ifdef SHOW_RESULTS
/* show results on stdout */
@ -97,22 +67,33 @@ int main( int argc, char** argv ) {
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank );
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks );
AllData alldata( my_rank, num_ranks );
do {
AllData alldata;
/* step 0: parse command line options */
if ( 0 !=
( ret= parse_command_line( my_rank, argc, argv, alldata ) ) ) {
if ( 0 != ( ret= parse_command_line( argc, argv, alldata ) ) ) {
if ( 1 == ret ) {
ret= 0;
} else { /* -1 == ret */
ret= 1;
}
break;
}
VerbosePrint( alldata, 1, true, "initializing\n" );
MPI_Barrier( MPI_COMM_WORLD );
/* step 1: assign trace processes to analysis processes */
if ( !assign_procs_to_ranks( my_rank, num_ranks, alldata ) ) {
if ( !assign_procs_to_ranks( alldata ) ) {
ret= 1;
break;
@ -121,16 +102,14 @@ int main( int argc, char** argv ) {
MPI_Barrier( MPI_COMM_WORLD );
#ifdef RUNTIME_MEASUREMENT
if ( 0 == my_rank ) {
if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
MeasureBlocksMap[ "analyze data" ].start();
alldata.measureBlockMap[ "analyze data" ].start();
}
#endif /* RUNTIME_MEASUREMENT */
/* step 2: collect data by reading input trace file */
if ( !collectData( my_rank, num_ranks, alldata ) ) {
if ( !CollectData( alldata ) ) {
ret= 1;
break;
@ -141,7 +120,7 @@ int main( int argc, char** argv ) {
/* step 3: summarize data; every analysis rank summarizes it's local
data independently */
if ( !summarizeData( my_rank, num_ranks, alldata ) ) {
if ( !SummarizeData( alldata ) ) {
ret= 1;
break;
@ -151,7 +130,7 @@ int main( int argc, char** argv ) {
MPI_Barrier( MPI_COMM_WORLD );
/* step 4: reduce data to master */
if ( !reduceData( my_rank, num_ranks, alldata ) ) {
if ( !ReduceData( alldata ) ) {
ret= 1;
break;
@ -160,13 +139,11 @@ int main( int argc, char** argv ) {
MPI_Barrier( MPI_COMM_WORLD );
#ifdef RUNTIME_MEASUREMENT
if ( 0 == my_rank ) {
if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
MeasureBlocksMap[ "analyze data" ].stop();
alldata.measureBlockMap[ "analyze data" ].stop();
}
#endif /* RUNTIME_MEASUREMENT */
/* step 5: produce outputs */
@ -190,50 +167,44 @@ int main( int argc, char** argv ) {
show_results( alldata );
#endif /* SHOW_RESULTS */
#ifdef RUNTIME_MEASUREMENT
MeasureBlocksMap[ "write tex" ].start();
#endif /* RUNTIME_MEASUREMENT */
alldata.measureBlockMap[ "produce output" ].start();
/* step 5.3: generate PGF output */
if ( !createTex( alldata ) ) {
if ( !CreateTex( alldata ) ) {
ret= 1;
break;
}
#ifdef RUNTIME_MEASUREMENT
MeasureBlocksMap[ "write tex" ].stop();
#endif /* RUNTIME_MEASUREMENT */
alldata.measureBlockMap[ "produce output" ].stop();
}
} while( false );
#ifdef RUNTIME_MEASUREMENT
/* show runtime measurement results */
if ( 0 == my_rank && 0 == ret ) {
cout << endl << "runtime measurement results:" << endl;
for ( map < string, MeasureBlock >::const_iterator it=
MeasureBlocksMap.begin(); it != MeasureBlocksMap.end(); it++ ) {
cout << " " << it->first << ": " << it->second.duration()
<< "s" << endl;
}
}
#endif /* RUNTIME_MEASUREMENT */
/* either finalize or abort on error */
if ( 0 == ret || 1 == ret ) {
if ( 0 == ret ) {
/* show runtime measurement results */
if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
cout << "runtime measurement results:" << endl;
for ( map < string, MeasureBlock >::const_iterator it=
alldata.measureBlockMap.begin();
it != alldata.measureBlockMap.end(); it++ ) {
cout << " " << it->first << ": " << it->second.duration()
<< "s" << endl;
}
}
MPI_Finalize();
VerbosePrint( alldata, 1, true, "done\n" );
} else {
MPI_Abort( MPI_COMM_WORLD, ret );
@ -244,37 +215,12 @@ int main( int argc, char** argv ) {
}
static int parse_command_line( uint32_t my_rank, int argc, char** argv,
AllData& alldata ) {
static int parse_command_line( int argc, char** argv, AllData& alldata ) {
int ret= 0;
Params& params= alldata.params;
/* show help text if no options are given */
if ( 1 == argc ) {
if ( 0 == my_rank ) {
show_helptext();
}
return 1;
}
/* read environment variables */
char* env;
env= getenv( "OTF_PROFILE_LATEX" );
if ( env && 0 < strlen( env ) )
params.latex_command= env;
env= getenv( "OTF_PROFILE_DVIPDF" );
if ( env && 0 < strlen( env ) )
params.dvipdf_command= env;
/* parse command line options */
enum { ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID };
@ -288,7 +234,7 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
if ( 0 == strcmp( "-h", argv[i] ) ||
0 == strcmp( "--help", argv[i] ) ) {
if ( 0 == my_rank ) {
if ( 0 == alldata.myRank ) {
show_helptext();
@ -300,13 +246,27 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
/* -V */
} else if ( 0 == strcmp( "-V", argv[i] ) ) {
FPRINTF_ROOT( stdout, "%u.%u.%u \"%s\"\n",
OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB,
OTF_VERSION_STRING );
if ( 0 == alldata.myRank ) {
printf( "%u.%u.%u \"%s\"\n",
OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB,
OTF_VERSION_STRING );
}
ret= 1;
break;
/* -v */
} else if ( 0 == strcmp( "-v", argv[i] ) ) {
params.verbose_level++;
/* -p */
} else if ( 0 == strcmp( "-p", argv[i] ) ) {
params.progress= true;
/* -f */
} else if ( 0 == strcmp( "-f", argv[i] ) ) {
@ -364,11 +324,15 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
params.read_from_stats= true;
#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
/* --nopdf */
} else if ( 0 == strcmp( "--nopdf", argv[i] ) ) {
params.create_pdf= false;
#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
/* input file or unknown option */
} else {
@ -394,74 +358,102 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
}
/* show specific message on error */
if ( ERR_OK != parse_error ) {
switch( parse_error ) {
if ( 0 == alldata.myRank ) {
case ERR_OPT_UNKNOWN:
switch( parse_error ) {
FPRINTF_ROOT( stderr, "ERROR: Unknown option '%s'.\n", argv[i] );
break;
case ERR_OPT_UNKNOWN:
case ERR_ARG_MISSING:
cerr << "ERROR: Unknown option '" << argv[i] << "'."
<< endl;
break;
FPRINTF_ROOT( stderr, "ERROR: Expected argument for option '%s'.\n",
argv[i] );
break;
case ERR_ARG_MISSING:
case ERR_ARG_INVALID:
cerr << "ERROR: Expected argument for option '" << argv[i]
<< "'." << endl;
break;
FPRINTF_ROOT( stderr, "ERROR: Invalid argument for option '%s'.\n",
argv[i] );
break;
case ERR_ARG_INVALID:
default:
cerr << "ERROR: Invalid argument for option '" << argv[i]
<< "'." << endl;
break;
break;
default:
break;
}
}
ret= 2;
ret= -1;
/* show help text if no input trace file is given */
} else if ( 0 == params.input_file_prefix.length() ) {
if ( 0 == alldata.myRank ) {
show_helptext();
}
ret= 1;
}
return ret;
}
static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
AllData& alldata ) {
static bool assign_procs_to_ranks( AllData& alldata ) {
bool ret= true;
bool error= false;
if ( 0 == my_rank ) {
OTF_FileManager* manager= NULL;
OTF_MasterControl* master= NULL;
if ( 0 == alldata.myRank ) {
/* rank 0 reads OTF master control of input trace file */
OTF_FileManager* manager= OTF_FileManager_open( 1 );
manager= OTF_FileManager_open( 1 );
assert( manager );
OTF_MasterControl* master= OTF_MasterControl_new( manager );
master= OTF_MasterControl_new( manager );
assert( master );
int master_read_ret=
OTF_MasterControl_read( master,
alldata.params.input_file_prefix.c_str() );
/* that's the first access to the input trace file; show tidy error
message if failed */
if ( 0 == master_read_ret ) {
cerr << "ERROR: Unable to open file '"
<< alldata.params.input_file_prefix << ".otf' for reading."
<< endl;
error= true;
}
}
/* broadcast error indicator to workers because Open MPI had all
ranks except rank 0 waiting endlessly in the MPI_Recv, when the '.otf' file
was absent. */
if ( SyncError( alldata, error, 0 ) ) {
return false;
}
if ( 0 == alldata.myRank ) {
do {
int master_read_ret=
OTF_MasterControl_read( master,
alldata.params.input_file_prefix.c_str() );
/* that's the first access to the input trace file; show tidy error
message if failed */
if ( 0 == master_read_ret ) {
cerr << "ERROR: Unable to open file '"
<< alldata.params.input_file_prefix << ".otf' for reading."
<< endl;
ret= false;
break;
}
/* fill the global array of processes */
alldata.myProcessesNum= OTF_MasterControl_getrCount( master );
@ -507,19 +499,20 @@ static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
/* remaining ranks and remaining workers */
uint32_t r_ranks= alldata.myProcessesNum;
uint32_t r_workers= num_ranks;
uint32_t r_workers= alldata.numRanks;
uint32_t pos= 0;
bool warn_for_empty= true;
for ( int w= 0; w < (int)num_ranks; w++ ) {
for ( int w= 0; w < (int)alldata.numRanks; w++ ) {
uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ?
( r_ranks / r_workers +1 ) : ( r_ranks / r_workers );
if ( ( 0 == n ) && warn_for_empty ) {
cerr << "Warning: more analysis ranks than trace processes, " <<
"ranks " << w << " to " << num_ranks -1 << " are unemployed" << endl;
cerr << "Warning: more analysis ranks than trace processes, "
<< "ranks " << w << " to " << alldata.numRanks -1
<< " are unemployed" << endl;
warn_for_empty= false;
}
@ -578,7 +571,7 @@ static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
}
cerr << endl;*/
return ret;
return !error;
}
@ -802,6 +795,9 @@ static void show_helptext() {
<< " options:" << endl
<< " -h, --help show this help message" << endl
<< " -V show OTF version" << endl
<< " -v increase output verbosity" << endl
<< " (can be used more than once)" << endl
<< " -p show progress" << endl
<< " -f <n> max. number of filehandles available per rank" << endl
<< " (default: " << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl
<< " -b <size> set buffersize of the reader" << endl
@ -809,15 +805,102 @@ static void show_helptext() {
<< " -o <prefix> specify the prefix of output file(s)" << endl
<< " (default: " << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl
<< " --stat read only summarized information, no events" << endl
#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
<< " --nopdf do not produce PDF output" << endl
<< endl
<< " environment variables:" << endl
<< " OTF_PROFILE_LATEX LaTeX command" << endl
<< " (default: " << Params::DEFAULT_LATEX_COMMAND() << ")" << endl
<< " OTF_PROFILE_DVIPDF DVI to PDF converter command" << endl
<< " (default: " << Params::DEFAULT_DVIPDF_COMMAND() << ")" << endl
#else /* PDFTEX && HAVE_PGFPLOTS_1_4 */
<< endl
<< " PDF creation requires the PGFPLOTS package version >1.4" << endl
<< " http://sourceforge.net/projects/pgfplots/ " << endl
#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
<< endl;
}
void VerbosePrint( AllData& alldata, uint8_t level, bool root_only,
const char* fmt, ... ) {
if ( alldata.params.verbose_level >= level ) {
va_list ap;
va_start( ap, fmt );
/* either only rank 0 print the message */
if ( root_only ) {
if ( 0 == alldata.myRank ) {
vprintf( fmt, ap );
}
/* or all ranks print the message */
} else {
char msg[1024];
/* prepend current rank to message */
snprintf( msg, sizeof( msg ) -1, "[%u] ", alldata.myRank );
vsnprintf( msg + strlen( msg ), sizeof( msg ) -1, fmt, ap );
/* print message */
printf( "%s ", msg );
}
va_end( ap );
}
}
bool SyncError( AllData& alldata, bool& error, uint32_t root ) {
#ifdef SYNC_ERROR
if ( 1 < alldata.numRanks ) {
int buf= ( error ) ? 1 : 0;
/* either broadcast the error indicator from one rank (root)
or reduce them from all */
if ( root != (uint32_t)-1 ) {
MPI_Bcast( &buf, 1, MPI_INT, (int)root, MPI_COMM_WORLD );
error= ( 1 == buf );
} else {
int recv_buf;
MPI_Allreduce( &buf, &recv_buf, 1, MPI_INT, MPI_MAX,
MPI_COMM_WORLD );
error= ( 1 == recv_buf );
}
}
#endif /* SYNC_ERROR */
return error;
}
uint64_t Logi( uint64_t x, uint64_t b ) {
assert( b > 1 );
uint64_t c= 1;
uint64_t i= 0;
while( c <= x ) {
c*= b;
i++;
}
return i;
}

Просмотреть файл

@ -0,0 +1,30 @@
/*
This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011.
Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
*/
#ifndef OTFPROFILE_MPI_H
#define OTFPROFILE_MPI_H
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif /* HAVE_CONFIG_H */
#include "datastructs.h"
/* print verbose message to stdout
(if root_only is true only rank 0 will print the message) */
void VerbosePrint( AllData& alldata, uint8_t level, bool root_only,
const char* fmt, ... );
/* synchronize error indicator with all worker ranks
(either broadcast from one rank (root) or reduce from all) */
bool SyncError( AllData& alldata, bool& error, uint32_t root= (uint32_t)-1 );
/* logarithm to base b for unsigned 64-bit integer x */
uint64_t Logi( uint64_t x, uint64_t b= 2 );
#endif /* OTFPROFILE_MPI_H */

Просмотреть файл

@ -8,8 +8,7 @@ using namespace std;
#include <cassert>
#include <iostream>
#include "mpi.h"
#include "otfprofile-mpi.h"
#include "reduce_data.h"
@ -546,71 +545,103 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
}
bool reduceData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
bool ReduceData( AllData& alldata ) {
bool ret= true;
/* implement reduction myself because MPI and C++ STL don't play with each other */
if ( 1 < alldata.numRanks ) {
/* how many rounds until master has all the data? */
uint32_t round= 1;
while ( round < num_ranks ) {
VerbosePrint( alldata, 1, true, "reducing data\n" );
uint32_t peer= my_rank ^ round;
/* implement reduction myself because MPI and C++ STL don't play with
each other */
/* if peer rank is not there, do nothing but go on */
if ( peer >= num_ranks ) {
/* how many rounds until master has all the data? */
uint32_t num_rounds= Logi( alldata.numRanks ) -1;
uint32_t round_no= 0;
uint32_t round= 1;
while ( round < alldata.numRanks ) {
round_no++;
if ( 1 == alldata.params.verbose_level ) {
VerbosePrint( alldata, 1, true, " round %u / %u\n",
round_no, num_rounds );
}
uint32_t peer= alldata.myRank ^ round;
/* if peer rank is not there, do nothing but go on */
if ( peer >= alldata.numRanks ) {
round= round << 1;
continue;
}
/* send to smaller peer, receive from larger one */
uint32_t sizes[10];
char* buffer;
if ( alldata.myRank < peer ) {
MPI_Status status;
MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD,
&status );
// DEBUG
//cout << " round " << round << " recv " << peer << "--> " <<
//my_rank << " with " <<
//sizes[0] << " bytes, " <<
//sizes[1] << ", " <<
//sizes[2] << ", " <<
//sizes[3] << ", " <<
//sizes[4] << "" << endl << flush;
buffer= prepare_worker_data( alldata, sizes );
VerbosePrint( alldata, 2, false,
"round %u / %u: receiving %u bytes from rank %u\n",
round_no, num_rounds, sizes[0], peer );
MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD,
&status );
unpack_worker_data( alldata, sizes );
} else {
buffer= pack_worker_data( alldata, sizes );
// DEBUG
//cout << " round " << round << " send " << my_rank <<
//" --> " << peer << " with " <<
//sizes[0] << " bytes, " <<
//sizes[1] << ", " <<
//sizes[2] << ", " <<
//sizes[3] << ", " <<
//sizes[4] << "" << endl << flush;
VerbosePrint( alldata, 2, false,
"round %u / %u: sending %u bytes to rank %u\n",
round_no, num_rounds, sizes[0], peer );
MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5,
MPI_COMM_WORLD );
/* every work has to send off its data at most once,
after that, break from the collective reduction operation */
break;
}
round= round << 1;
continue;
}
/* send to smaller peer, receive from larger one */
uint32_t sizes[10];
char* buffer;
if ( my_rank < peer ) {
MPI_Status status;
MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD, &status );
// DEBUG
//cout << " round " << round << " recv " << peer << "--> "<< my_rank << " with " <<
//sizes[0] << " bytes, " <<
//sizes[1] << ", " <<
//sizes[2] << ", " <<
//sizes[3] << ", " <<
//sizes[4] << "" << endl << flush;
buffer= prepare_worker_data( alldata, sizes );
MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD, &status );
unpack_worker_data( alldata, sizes );
} else {
buffer= pack_worker_data( alldata, sizes );
// DEBUG
//cout << " round " << round << " send " << my_rank << " --> " << peer << " with " <<
//sizes[0] << " bytes, " <<
//sizes[1] << ", " <<
//sizes[2] << ", " <<
//sizes[3] << ", " <<
//sizes[4] << "" << endl << flush;
MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD );
/* every work has to send off its data at most once,
after that, break from the collective reduction operation */
break;
}
round= round << 1;
alldata.freePackBuffer();
}

Просмотреть файл

@ -6,11 +6,12 @@
#ifndef REDUCE_DATA_H
#define REDUCE_DATA_H
#include "datastructs.h"
/* reduce the data to the master process */
bool reduceData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
bool ReduceData( AllData& alldata );
#endif /* REDUCE_DATA_H */

Просмотреть файл

@ -8,13 +8,10 @@ using namespace std;
#include <cassert>
#include <iostream>
#include "mpi.h"
#include "summarize_data.h"
static void get_clustering( uint32_t my_rank, uint32_t num_ranks,
AllData& alldata ) {
static void get_clustering( AllData& alldata ) {
uint32_t r_processes= alldata.allProcesses.size();
uint32_t r_clusters= Clustering::MAX_CLUSTERS;
@ -44,8 +41,7 @@ static void get_clustering( uint32_t my_rank, uint32_t num_ranks,
}
static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
AllData& alldata ) {
static void share_clustering( AllData& alldata ) {
MPI_Barrier( MPI_COMM_WORLD );
@ -53,7 +49,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
int buffer_size= 0;
int buffer_pos= 0;
if ( my_rank == 0 ) {
if ( 0 == alldata.myRank ) {
/* get size needed to send clustering information to workers */
@ -86,7 +82,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
/* pack clustering information to buffer */
if ( my_rank == 0 ) {
if ( 0 == alldata.myRank ) {
/* alldata.clustering.clustersToProcesses.size() */
uint64_t clust_proc_map_size=
@ -128,7 +124,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
/* unpack clustering information from buffer */
if ( my_rank != 0 ) {
if ( 0 != alldata.myRank ) {
/* alldata.clustering.clustersToProcesses.size() */
uint64_t clust_proc_map_size;
@ -168,29 +164,29 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
}
bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
bool SummarizeData( AllData& alldata ) {
bool ret= true;
bool error= false;
/* rank 0 gets clustering information */
if ( my_rank == 0 ) {
if ( 0 == alldata.myRank ) {
get_clustering( my_rank, num_ranks, alldata );
get_clustering( alldata );
}
/* share clustering information to workers */
if ( num_ranks > 1 ) {
if ( 1 < alldata.numRanks ) {
share_clustering( my_rank, num_ranks, alldata );
share_clustering( alldata );
}
/* macro to set min, max to sum before summarizing */
# define MINMAX2SUM(v) \
if( (v).cnt != 0 ) { \
if( 0 != (v).cnt ) { \
(v).cnt = 1; \
(v).min= (v).max= (v).sum; \
} else { \
@ -243,9 +239,9 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
if ( alldata.clustering.enabled ) {
cluster_a= alldata.clustering.process2cluster( it->first.a );
assert( cluster_a != 0 );
assert( 0 != cluster_a );
cluster_b= alldata.clustering.process2cluster( it->first.b );
assert( cluster_b != 0 );
assert( 0 != cluster_b );
}
@ -273,7 +269,7 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
if ( alldata.clustering.enabled ) {
cluster= alldata.clustering.process2cluster( it->first );
assert( cluster != 0 );
assert( 0 != cluster );
}
@ -302,7 +298,7 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
if ( alldata.clustering.enabled ) {
cluster= alldata.clustering.process2cluster( it->first.b );
assert( cluster != 0 );
assert( 0 != cluster );
}
@ -319,5 +315,5 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
alldata.collectiveMapPerRank.clear();
}
return ret;
return !error;
}

Просмотреть файл

@ -6,11 +6,12 @@
#ifndef SUMMARIZE_DATA_H
#define SUMMARIZE_DATA_H
#include "datastructs.h"
/* summarize the data for all trace processes on the current worker */
bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
bool SummarizeData( AllData& alldata );
#endif /* SUMMARIZE_DATA_H */

Просмотреть файл

@ -812,10 +812,12 @@ parseCommandLine( int argc, char ** argv )
Params.droprecvs = true;
}
#endif // VT_UNIFY_HOOKS_MSGMATCH
#if defined(HAVE_ZLIB) && HAVE_ZLIB
else if( strcmp( argv[i], "--nocompress" ) == 0 )
{
Params.docompress = false;
}
#endif // HAVE_ZLIB
else if( strcmp( argv[i], "-k" ) == 0
|| strcmp( argv[i], "--keeplocal" ) == 0 )
{
@ -1055,8 +1057,10 @@ showUsage()
<< " -v, --verbose Increase output verbosity." << std::endl
<< " (can be used more than once)" << std::endl
<< std::endl
#if defined(HAVE_ZLIB) && HAVE_ZLIB
<< " --nocompress Don't compress output trace files." << std::endl
<< std::endl
#endif // HAVE_ZLIB
#ifdef VT_UNIFY_HOOKS_MSGMATCH
<< " --nomsgmatch Don't match messages." << std::endl
<< std::endl

Просмотреть файл

@ -63,10 +63,19 @@
struct ParamsS
{
ParamsS()
: verbose_level( 0 ), docompress( true ), doclean( true ),
: verbose_level( 0 ), docompress( false ), doclean( true ),
showusage( false ), showversion( false ), showprogress( false ),
bequiet( false ), domsgmatch( true ), droprecvs( false ),
prof_sort_flags( 0x22 ) {}
bequiet( false ), domsgmatch( false ), droprecvs( false ),
prof_sort_flags( 0x22 )
{
#if defined(HAVE_ZLIB) && HAVE_ZLIB
docompress = true;
#endif // HAVE_ZLIB
#ifdef VT_UNIFY_HOOKS_MSGMATCH
domsgmatch = true;
#endif // VT_UNIFY_HOOKS_MSGMATCH
}
std::string in_file_prefix; // input trace file prefix
std::string out_file_prefix; // output trace file prefix

Просмотреть файл

@ -687,12 +687,28 @@ Wrapper::parseCommandLine( int argc, char ** argv )
|| arg.compare( "-fopenmp" ) == 0
|| arg.compare( "-Popenmp" ) == 0
|| arg.compare( "-xopenmp" ) == 0
|| arg.compare( "-mp" ) == 0
|| arg.compare( "-qsmp=omp" ) == 0 )
|| arg.compare( "-mp" ) == 0 )
{
m_pConfig->setUsesThreads( true );
m_pConfig->setUsesOpenMP( true );
}
else if( arg.length() > 6 && arg.compare( 0, 6, "-qsmp=" ) == 0 )
{
char carg[128];
strncpy( carg, arg.substr(6).c_str(), sizeof( carg ) - 1 );
carg[sizeof(carg) - 1] = '\0';
char * token = strtok( carg, ":" );
do
{
if( strcmp( token, "omp" ) == 0 )
{
m_pConfig->setUsesThreads( true );
m_pConfig->setUsesOpenMP( true );
break;
}
} while( ( token = strtok( 0, ":" ) ) );
}
//
// nvcc's pthread/openmp flag
//

Просмотреть файл

@ -1222,6 +1222,7 @@ int vt_env_max_threads()
int vt_env_compression()
{
#if defined(HAVE_ZLIB) && HAVE_ZLIB
static int compression = -1;
char* tmp;
@ -1238,6 +1239,9 @@ int vt_env_compression()
}
}
return compression;
#else /* HAVE_ZLIB */
return 0;
#endif /* HAVE_ZLIB */
}
int vt_env_java_native()

Просмотреть файл

@ -796,7 +796,7 @@ static void unify_traces(void)
/* compose unify arguments */
argv = (char**)calloc(10 + vt_env_verbose(), sizeof(char*));
argv = (char**)calloc(10 + vt_env_verbose()+1, sizeof(char*));
if (argv == NULL) vt_error();
argv[0] = NULL;
@ -816,12 +816,14 @@ static void unify_traces(void)
}
argc++;
#if defined(HAVE_ZLIB) && HAVE_ZLIB
if (!vt_env_compression()) argv[argc++] = strdup("--nocompress");
#endif /* HAVE_ZLIB */
if (!vt_env_do_clean()) argv[argc++] = strdup("-k");
if (vt_env_verbose() == 0) argv[argc++] = strdup("-q");
else if (vt_env_verbose() >= 2)
{
for (i=1;i<vt_env_verbose();i++)
for (i=0;i<vt_env_verbose()+1;i++)
argv[argc++] = strdup("-v");
argv[argc++] = strdup("-p");
}