Changes to OTF:
otfprofile-mpi: - added progress display - added verbose messages - added functions to sychronize the error indicator to all worker ranks (enforces that all ranks will be terminated by calling MPI_Abort if anyone fails) - wrap def. comments after 80 characters - use pdf[la]tex instead of latex/dvipdf to convert TeX output to PDF - added configure checks for pdf[la]tex and PGFPLOTS v1.4 - fixed function invocation statistics generated from summarized information (--stat) - fixed memory leak Changes to VT: MPI wrappers: - fixed wrapper generation for MPI implementations which don't support the MPI-2 standard (e.g. MVAPICH, MPICH) - corrected IN_PLACE denotation for MPI_Alltoall* and MPI_Scatter* vtwrapper: - corrected detection of IBM XL's OpenMP flag -qsmp=*:omp:* vtunify: - fixed faulty cleanup of temporary files which occurred if VT is configured without trace compression support This commit was SVN r24851.
Этот коммит содержится в:
родитель
a4b2bd903b
Коммит
5e6919b4e1
@ -3,6 +3,12 @@
|
|||||||
(see extlib/otf/ChangeLog)
|
(see extlib/otf/ChangeLog)
|
||||||
- improved filtering of CUDA kernels
|
- improved filtering of CUDA kernels
|
||||||
- fixed unification of local process group definitions
|
- fixed unification of local process group definitions
|
||||||
|
- fixed wrapper generation for MPI implementations which don't support
|
||||||
|
the MPI-2 standard
|
||||||
|
- fixed faulty cleanup of temporary files in vtunify which occurred if
|
||||||
|
VT is configured without trace compression support
|
||||||
|
- fixed detection of OpenMP flag '-qsmp=*:omp:*' in the compiler
|
||||||
|
wrappers
|
||||||
|
|
||||||
5.11
|
5.11
|
||||||
- updated version of internal OTF to 1.9sawfish
|
- updated version of internal OTF to 1.9sawfish
|
||||||
|
@ -63,7 +63,10 @@ AC_DEFUN([ACVT_ZLIB],
|
|||||||
])
|
])
|
||||||
|
|
||||||
AS_IF([test x"$ZLIBLIB" != x -a x"$zlib_error" = "xno"],
|
AS_IF([test x"$ZLIBLIB" != x -a x"$zlib_error" = "xno"],
|
||||||
[have_zlib="yes"])
|
[
|
||||||
|
have_zlib="yes"
|
||||||
|
AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the ZLIB.])
|
||||||
|
])
|
||||||
|
|
||||||
AS_IF([test x"$force_zlib" = "xyes" -a x"$zlib_error" = "xyes"],
|
AS_IF([test x"$force_zlib" = "xyes" -a x"$zlib_error" = "xyes"],
|
||||||
[exit 1])
|
[exit 1])
|
||||||
|
@ -11,8 +11,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
|
|||||||
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
|
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
|
||||||
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
|
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
|
||||||
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
||||||
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
have_mpi2=0
|
||||||
if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
|
if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
|
||||||
have_mpi2=1
|
have_mpi2=1
|
||||||
fi
|
fi
|
||||||
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
|
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
|
||||||
|
@ -23,8 +23,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
|
|||||||
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
|
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
|
||||||
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
|
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
|
||||||
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
||||||
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
have_mpi2=0
|
||||||
if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
|
if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
|
||||||
have_mpi2=1
|
have_mpi2=1
|
||||||
fi
|
fi
|
||||||
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
|
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
|
||||||
|
@ -11,7 +11,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
|
|||||||
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
|
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
|
||||||
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
|
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
|
||||||
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
|
||||||
if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
|
have_mpi2=0
|
||||||
|
if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
|
||||||
have_mpi2=1
|
have_mpi2=1
|
||||||
fi
|
fi
|
||||||
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
|
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
|
||||||
|
@ -40,8 +40,8 @@ VT_MPI_INT MPI_Address(void* location, MPI_Aint* address_CLASS_SINGLE_OUT);
|
|||||||
VT_MPI_INT MPI_Allgather(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
VT_MPI_INT MPI_Allgather(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
||||||
VT_MPI_INT MPI_Allgatherv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* displs, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
VT_MPI_INT MPI_Allgatherv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* displs, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
||||||
VT_MPI_INT MPI_Allreduce(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
|
VT_MPI_INT MPI_Allreduce(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
|
||||||
VT_MPI_INT MPI_Alltoall(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
VT_MPI_INT MPI_Alltoall(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
||||||
VT_MPI_INT MPI_Alltoallv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
VT_MPI_INT MPI_Alltoallv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
|
||||||
VT_MPI_INT MPI_Attr_delete(MPI_Comm comm, VT_MPI_INT keyval);
|
VT_MPI_INT MPI_Attr_delete(MPI_Comm comm, VT_MPI_INT keyval);
|
||||||
VT_MPI_INT MPI_Attr_get(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val, VT_MPI_INT* flag);
|
VT_MPI_INT MPI_Attr_get(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val, VT_MPI_INT* flag);
|
||||||
VT_MPI_INT MPI_Attr_put(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val);
|
VT_MPI_INT MPI_Attr_put(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val);
|
||||||
@ -129,8 +129,8 @@ VT_MPI_INT MPI_Request_free(MPI_Request* request_CLASS_SINGLE_IO);
|
|||||||
VT_MPI_INT MPI_Rsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
|
VT_MPI_INT MPI_Rsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
|
||||||
VT_MPI_INT MPI_Rsend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
|
VT_MPI_INT MPI_Rsend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
|
||||||
VT_MPI_INT MPI_Scan(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_OTHER*/
|
VT_MPI_INT MPI_Scan(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_OTHER*/
|
||||||
VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
|
VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
|
||||||
VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
|
VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
|
||||||
VT_MPI_INT MPI_Send(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
|
VT_MPI_INT MPI_Send(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
|
||||||
VT_MPI_INT MPI_Send_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
|
VT_MPI_INT MPI_Send_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
|
||||||
VT_MPI_INT MPI_Sendrecv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, VT_MPI_INT dest, VT_MPI_INT sendtag, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT source, VT_MPI_INT recvtag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
|
VT_MPI_INT MPI_Sendrecv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, VT_MPI_INT dest, VT_MPI_INT sendtag, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT source, VT_MPI_INT recvtag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
|
||||||
|
@ -66,7 +66,7 @@ BODY { font-family: sans-serif; }
|
|||||||
<P>
|
<P>
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
<B><BIG CLASS="XHUGE">VampirTrace 5.11 User Manual</BIG></B>
|
<B><BIG CLASS="XHUGE">VampirTrace 5.11.1 User Manual</BIG></B>
|
||||||
<BR>
|
<BR>
|
||||||
<BR>
|
<BR>
|
||||||
<BR>
|
<BR>
|
||||||
@ -252,20 +252,20 @@ OpenMP events, and performance counters.
|
|||||||
<P>
|
<P>
|
||||||
After a successful tracing run, VampirTrace writes all collected data to a
|
After a successful tracing run, VampirTrace writes all collected data to a
|
||||||
trace file in the Open Trace Format (OTF)<A NAME="tex2html3"
|
trace file in the Open Trace Format (OTF)<A NAME="tex2html3"
|
||||||
HREF="#foot1144"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1146"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
As a result, the information is available for post-mortem analysis and
|
As a result, the information is available for post-mortem analysis and
|
||||||
visualization by various tools.
|
visualization by various tools.
|
||||||
Most notably, VampirTrace provides the input data for the Vampir analysis
|
Most notably, VampirTrace provides the input data for the Vampir analysis
|
||||||
and visualization tool<A NAME="tex2html5"
|
and visualization tool<A NAME="tex2html5"
|
||||||
HREF="#foot1145"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1147"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
VampirTrace is included in OpenMPI 1.3 and later versions.
|
VampirTrace is included in OpenMPI 1.3 and later versions.
|
||||||
If not disabled explicitly, VampirTrace is built automatically when installing
|
If not disabled explicitly, VampirTrace is built automatically when installing
|
||||||
OpenMPI<A NAME="tex2html7"
|
OpenMPI<A NAME="tex2html7"
|
||||||
HREF="#foot1146"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1148"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
|
|
||||||
<P>
|
<P>
|
||||||
@ -1083,7 +1083,7 @@ in a single file, that
|
|||||||
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
|
||||||
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
The lists end with <TT>END[_FILE]_<INCLUDE|EXCLUDE>_LIST</TT>. For further information on selective
|
||||||
profiling have a look at the TAU documentation<A NAME="tex2html11"
|
profiling have a look at the TAU documentation<A NAME="tex2html11"
|
||||||
HREF="#foot1170"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1172"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
To announce the file through the compiler wrapper use the option <TT>-vt:tau</TT>:
|
||||||
<PRE>
|
<PRE>
|
||||||
@ -1100,7 +1100,7 @@ Binary Instrumentation Using Dyninst
|
|||||||
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
The option <TT>-vt:inst dyninst</TT> is used with the compiler wrapper to
|
||||||
instrument the application during runtime (binary instrumentation), by using
|
instrument the application during runtime (binary instrumentation), by using
|
||||||
Dyninst<A NAME="tex2html13"
|
Dyninst<A NAME="tex2html13"
|
||||||
HREF="#foot1171"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1173"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>.
|
||||||
Recompiling is not necessary for this kind of instrumentation,
|
Recompiling is not necessary for this kind of instrumentation,
|
||||||
but relinking:
|
but relinking:
|
||||||
@ -1820,7 +1820,7 @@ for the enhanced timer synchronization:
|
|||||||
|
|
||||||
<UL>
|
<UL>
|
||||||
<LI>CLAPACK<A NAME="tex2html15"
|
<LI>CLAPACK<A NAME="tex2html15"
|
||||||
HREF="#foot1181"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="#foot1183"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A>
|
||||||
</LI>
|
</LI>
|
||||||
<LI>AMD ACML
|
<LI>AMD ACML
|
||||||
@ -2190,6 +2190,14 @@ Controls how VampirTrace handles synchronizing CUDA API calls, especially
|
|||||||
introduces a minimal overhead but increases timer precision and prevents
|
introduces a minimal overhead but increases timer precision and prevents
|
||||||
flushes elsewhere in the trace.
|
flushes elsewhere in the trace.
|
||||||
|
|
||||||
|
</DD>
|
||||||
|
<DT></DT>
|
||||||
|
<DD><TT>VT_CUDATRACE_ERROR</TT> (default: <TT>no</TT>)
|
||||||
|
<BR>
|
||||||
|
Print out an error message and exit the program, if a CUDA wrapper call
|
||||||
|
does not return 'cudaSuccess'. The default is just a warning message
|
||||||
|
without program exit.
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT></DT>
|
<DT></DT>
|
||||||
<DD><TT>VT_CUPTI_METRICS</TT> (default: <TT>""</TT>)
|
<DD><TT>VT_CUPTI_METRICS</TT> (default: <TT>""</TT>)
|
||||||
@ -3373,21 +3381,21 @@ by the Linux 2.6 kernel are shown in the table.
|
|||||||
<P>
|
<P>
|
||||||
<BR><HR><H4>Footnotes</H4>
|
<BR><HR><H4>Footnotes</H4>
|
||||||
<DL>
|
<DL>
|
||||||
<DT><A NAME="foot1144">... (OTF)</A><A
|
<DT><A NAME="foot1146">... (OTF)</A><A
|
||||||
HREF="UserManual.html#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="UserManual.html#tex2html3"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD><TT><A NAME="tex2html4"
|
<DD><TT><A NAME="tex2html4"
|
||||||
HREF="http://www.tu-dresden.de/zih/otf">http://www.tu-dresden.de/zih/otf</A></TT>
|
HREF="http://www.tu-dresden.de/zih/otf">http://www.tu-dresden.de/zih/otf</A></TT>
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1145">... tool </A><A
|
<DT><A NAME="foot1147">... tool </A><A
|
||||||
HREF="UserManual.html#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="UserManual.html#tex2html5"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD><TT><A NAME="tex2html6"
|
<DD><TT><A NAME="tex2html6"
|
||||||
HREF="http://www.vampir.eu">http://www.vampir.eu</A></TT>
|
HREF="http://www.vampir.eu">http://www.vampir.eu</A></TT>
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1146">...
|
<DT><A NAME="foot1148">...
|
||||||
Open MPI </A><A
|
Open MPI </A><A
|
||||||
HREF="UserManual.html#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="UserManual.html#tex2html7"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
@ -3395,14 +3403,14 @@ Open MPI </A><A
|
|||||||
HREF="http://www.open-mpi.org/faq/?category=vampirtrace">http://www.open-mpi.org/faq/?category=vampirtrace</A></TT>
|
HREF="http://www.open-mpi.org/faq/?category=vampirtrace">http://www.open-mpi.org/faq/?category=vampirtrace</A></TT>
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1170">... documentation </A><A
|
<DT><A NAME="foot1172">... documentation </A><A
|
||||||
HREF="UserManual.html#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="UserManual.html#tex2html11"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD><TT><A NAME="tex2html12"
|
<DD><TT><A NAME="tex2html12"
|
||||||
HREF="http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling">http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling</A></TT>
|
HREF="http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling">http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling</A></TT>
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1171">...
|
<DT><A NAME="foot1173">...
|
||||||
Dyninst </A><A
|
Dyninst </A><A
|
||||||
HREF="UserManual.html#tex2html13"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="UserManual.html#tex2html13"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
@ -3410,7 +3418,7 @@ Dyninst </A><A
|
|||||||
HREF="http://www.dyninst.org">http://www.dyninst.org</A></TT>
|
HREF="http://www.dyninst.org">http://www.dyninst.org</A></TT>
|
||||||
|
|
||||||
</DD>
|
</DD>
|
||||||
<DT><A NAME="foot1181">... CLAPACK</A><A
|
<DT><A NAME="foot1183">... CLAPACK</A><A
|
||||||
HREF="UserManual.html#tex2html15"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
HREF="UserManual.html#tex2html15"><SUP><IMG ALIGN="BOTTOM" BORDER="1" ALT="[*]"
|
||||||
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
SRC="/usr/share/latex2html/icons/footnote.png"></SUP></A></DT>
|
||||||
<DD><TT><A NAME="tex2html16"
|
<DD><TT><A NAME="tex2html16"
|
||||||
|
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичный файл не отображается.
@ -1,4 +1,9 @@
|
|||||||
1.9.1openmpi = 1.9sawfish
|
1.9.1openmpi
|
||||||
|
- added progress display to otfprofile-mpi
|
||||||
|
- use pdf[la]tex instead of latex/dvipdf to convert otfprofile-mpi's
|
||||||
|
TeX output to PDF
|
||||||
|
- fixed function invocation statistics in otfprofile-mpi generated
|
||||||
|
from summarized information (--stat)
|
||||||
|
|
||||||
1.9sawfish
|
1.9sawfish
|
||||||
- added MPI-parallel version of otfprofile (otfprofile-mpi)
|
- added MPI-parallel version of otfprofile (otfprofile-mpi)
|
||||||
|
@ -2,6 +2,7 @@ m4_include(config/m4/acinclude.debug.m4)
|
|||||||
m4_include(config/m4/acinclude.math.m4)
|
m4_include(config/m4/acinclude.math.m4)
|
||||||
m4_include(config/m4/acinclude.mpi.m4)
|
m4_include(config/m4/acinclude.mpi.m4)
|
||||||
m4_include(config/m4/acinclude.omp.m4)
|
m4_include(config/m4/acinclude.omp.m4)
|
||||||
|
m4_include(config/m4/acinclude.pdflatex_pgfplots.m4)
|
||||||
m4_include(config/m4/acinclude.swig_python.m4)
|
m4_include(config/m4/acinclude.swig_python.m4)
|
||||||
m4_include(config/m4/acinclude.vtf3.m4)
|
m4_include(config/m4/acinclude.vtf3.m4)
|
||||||
m4_include(config/m4/acinclude.verbose.m4)
|
m4_include(config/m4/acinclude.verbose.m4)
|
||||||
|
@ -0,0 +1,35 @@
|
|||||||
|
AC_DEFUN([CHECK_PDFLATEX_PGFPLOTS],
|
||||||
|
[
|
||||||
|
AC_ARG_VAR([PDFTEX], [pdfTeX typesetter command])
|
||||||
|
|
||||||
|
AC_CHECK_PROGS([PDFTEX], [pdflatex pdftex])
|
||||||
|
if test x"$PDFTEX" != x; then
|
||||||
|
AC_DEFINE_UNQUOTED([PDFTEX], ["$PDFTEX"], [pdfTeX typesetter command.])
|
||||||
|
|
||||||
|
AC_MSG_CHECKING([for PGFPLOTS version >= 1.4])
|
||||||
|
|
||||||
|
cat << EOF >conftest.tex
|
||||||
|
\documentclass[[a4paper,10pt]]{article}
|
||||||
|
\nonstopmode
|
||||||
|
\usepackage{pgfplots}
|
||||||
|
\begin{document}
|
||||||
|
\pgfplotstableread{
|
||||||
|
col1 col2
|
||||||
|
1 2
|
||||||
|
}\testtable
|
||||||
|
test
|
||||||
|
\end{document}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
$PDFTEX conftest.tex >/dev/null 2>&1
|
||||||
|
if test $? -eq 0; then
|
||||||
|
AC_MSG_RESULT([yes])
|
||||||
|
AC_DEFINE([HAVE_PGFPLOTS_1_4], [1], [Define to 1 if you have the TeX package PGFPLOTS version >=1.4.])
|
||||||
|
else
|
||||||
|
AC_MSG_RESULT([no])
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f conftest.*
|
||||||
|
|
||||||
|
fi
|
||||||
|
])
|
@ -69,6 +69,9 @@ CHECK_SWIG_PYTHON
|
|||||||
if test x"$force_swig_python" = "xyes" -a x"$swig_python_error" = "xyes"; then exit 1; fi
|
if test x"$force_swig_python" = "xyes" -a x"$swig_python_error" = "xyes"; then exit 1; fi
|
||||||
AM_CONDITIONAL(AMHAVESWIGPYTHON, test x"$have_swig_python" = xyes)
|
AM_CONDITIONAL(AMHAVESWIGPYTHON, test x"$have_swig_python" = xyes)
|
||||||
|
|
||||||
|
# Checks for pdflatex and PGFPLOTS needed for otfprofile-mpi to convert TeX output to PDF
|
||||||
|
CHECK_PDFLATEX_PGFPLOTS
|
||||||
|
|
||||||
|
|
||||||
WITH_DEBUG
|
WITH_DEBUG
|
||||||
WITH_VERBOSE
|
WITH_VERBOSE
|
||||||
|
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf
Двоичный файл не отображается.
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf
Двоичные данные
ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf
Двоичный файл не отображается.
@ -17,6 +17,7 @@ otfprofile_mpi_SOURCES = \
|
|||||||
collect_data.h \
|
collect_data.h \
|
||||||
create_latex.h \
|
create_latex.h \
|
||||||
datastructs.h \
|
datastructs.h \
|
||||||
|
otfprofile-mpi.h \
|
||||||
reduce_data.h \
|
reduce_data.h \
|
||||||
summarize_data.h \
|
summarize_data.h \
|
||||||
collect_data.cpp \
|
collect_data.cpp \
|
||||||
|
@ -15,26 +15,245 @@ using namespace std;
|
|||||||
#include "otf.h"
|
#include "otf.h"
|
||||||
#include "otfaux.h"
|
#include "otfaux.h"
|
||||||
|
|
||||||
#include "mpi.h"
|
|
||||||
|
|
||||||
#include "collect_data.h"
|
#include "collect_data.h"
|
||||||
|
#include "otfprofile-mpi.h"
|
||||||
|
|
||||||
|
|
||||||
/* logarithm to base b for unsigned 64-bit integer x */
|
static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
|
||||||
static uint64_t logi( uint64_t x, uint64_t b= 2 ) {
|
|
||||||
|
|
||||||
assert( b > 1 );
|
Progress& progress= alldata.progress;
|
||||||
|
|
||||||
uint64_t c= 1;
|
progress.cur_bytes= 0;
|
||||||
uint64_t i= 0;
|
progress.max_bytes= max_bytes;
|
||||||
|
progress.ranks_left= alldata.numRanks -1;
|
||||||
|
|
||||||
while( c <= x ) {
|
if ( 1 < alldata.numRanks ) {
|
||||||
|
|
||||||
c*= b;
|
/* reduce max. bytes to rank 0 */
|
||||||
i++;
|
uint64_t sum_max_bytes;
|
||||||
|
MPI_Reduce( &max_bytes, &sum_max_bytes, 1, MPI_LONG_LONG_INT, MPI_SUM,
|
||||||
|
0, MPI_COMM_WORLD );
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
progress.max_bytes= sum_max_bytes;
|
||||||
|
|
||||||
|
progress.recv_buffers= new uint64_t[alldata.numRanks-1];
|
||||||
|
assert( progress.recv_buffers );
|
||||||
|
progress.recv_requests= new MPI_Request[alldata.numRanks-1];
|
||||||
|
assert( progress.recv_requests );
|
||||||
|
progress.recv_statuses= new MPI_Status[alldata.numRanks-1];
|
||||||
|
assert( progress.recv_statuses );
|
||||||
|
progress.recv_indices= new int[alldata.numRanks-1];
|
||||||
|
assert( progress.recv_indices );
|
||||||
|
|
||||||
|
/* initialize array of current bytes read and start
|
||||||
|
persistent communication */
|
||||||
|
|
||||||
|
for ( uint32_t i= 0; i < alldata.numRanks; i++ ) {
|
||||||
|
|
||||||
|
if ( 0 < i ) {
|
||||||
|
|
||||||
|
/* create persistent request handle */
|
||||||
|
MPI_Recv_init( &(progress.recv_buffers[i-1]), 1,
|
||||||
|
MPI_LONG_LONG_INT, i, Progress::MSG_TAG,
|
||||||
|
MPI_COMM_WORLD,
|
||||||
|
&(progress.recv_requests[i-1]) );
|
||||||
|
|
||||||
|
/* start persistent communication */
|
||||||
|
MPI_Start( &(progress.recv_requests[i-1]) );
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { /* 0 != my_rank */
|
||||||
|
|
||||||
|
/* initialize request handle for sending progress to rank 0 */
|
||||||
|
progress.send_request = MPI_REQUEST_NULL;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* block until all worker ranks have reached this point to avoid that the
|
||||||
|
progress does a big jump at beginning */
|
||||||
|
MPI_Barrier( MPI_COMM_WORLD );
|
||||||
}
|
}
|
||||||
|
|
||||||
return i;
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
/* show initial progress */
|
||||||
|
printf( "%7.2f %%\r", 0.0 );
|
||||||
|
fflush( stdout );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void update_progress( AllData& alldata, uint64_t delta_bytes,
|
||||||
|
bool wait= false ) {
|
||||||
|
|
||||||
|
Progress& progress= alldata.progress;
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
progress.cur_bytes += delta_bytes;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
progress.cur_bytes= delta_bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( 1 < alldata.numRanks ) {
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
/* get current bytes read from all worker ranks */
|
||||||
|
|
||||||
|
int out_count;
|
||||||
|
|
||||||
|
/* either wait or test for one or more updates from worker ranks */
|
||||||
|
|
||||||
|
if ( wait )
|
||||||
|
{
|
||||||
|
|
||||||
|
MPI_Waitsome( alldata.numRanks - 1, progress.recv_requests,
|
||||||
|
&out_count, progress.recv_indices,
|
||||||
|
progress.recv_statuses );
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
MPI_Testsome( alldata.numRanks - 1, progress.recv_requests,
|
||||||
|
&out_count, progress.recv_indices,
|
||||||
|
progress.recv_statuses );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( MPI_UNDEFINED != out_count ) {
|
||||||
|
|
||||||
|
int index;
|
||||||
|
uint32_t i;
|
||||||
|
|
||||||
|
for ( i= 0; i < (uint32_t) out_count; i++ ) {
|
||||||
|
|
||||||
|
index= progress.recv_indices[i];
|
||||||
|
|
||||||
|
/* worker rank (index+1) is finished? */
|
||||||
|
if ( (uint64_t)-1 != progress.recv_buffers[index] ) {
|
||||||
|
|
||||||
|
/* update rank's current bytes read and restart
|
||||||
|
persistent communication */
|
||||||
|
|
||||||
|
progress.cur_bytes += progress.recv_buffers[index];
|
||||||
|
|
||||||
|
MPI_Start( &(progress.recv_requests[progress.recv_indices[i]]) );
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
/* this rank is finished */
|
||||||
|
progress.ranks_left -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { /* 0 != my_rank */
|
||||||
|
|
||||||
|
int do_send = 1;
|
||||||
|
MPI_Status status;
|
||||||
|
|
||||||
|
/* send only if it's the first send or the request handle isn't
|
||||||
|
currently in use */
|
||||||
|
|
||||||
|
if ( MPI_REQUEST_NULL != progress.send_request ) {
|
||||||
|
|
||||||
|
MPI_Test( &(progress.send_request), &do_send, &status );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( do_send ) {
|
||||||
|
|
||||||
|
MPI_Issend( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
|
||||||
|
Progress::MSG_TAG, MPI_COMM_WORLD,
|
||||||
|
&progress.send_request );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
/* show progress */
|
||||||
|
|
||||||
|
double percent =
|
||||||
|
100.0 * (double) progress.cur_bytes / (double) progress.max_bytes;
|
||||||
|
|
||||||
|
static const char signs[2]= { '.',' ' };
|
||||||
|
static int signi= 0;
|
||||||
|
|
||||||
|
printf( "%7.2f %% %c\r", percent, signs[signi] );
|
||||||
|
fflush( stdout );
|
||||||
|
|
||||||
|
signi^= 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void finish_progress( AllData& alldata ) {
|
||||||
|
|
||||||
|
Progress& progress= alldata.progress;
|
||||||
|
|
||||||
|
if ( 1 < alldata.numRanks ) {
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
/* update progress until all worker ranks are
|
||||||
|
finished / all bytes are read */
|
||||||
|
|
||||||
|
while ( 0 < progress.ranks_left ) {
|
||||||
|
|
||||||
|
update_progress( alldata, 0, true );
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { /* 0 != my_rank */
|
||||||
|
|
||||||
|
MPI_Status status;
|
||||||
|
MPI_Wait( &(progress.send_request), &status );
|
||||||
|
|
||||||
|
/* send last current bytes read to rank 0 */
|
||||||
|
MPI_Send( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
|
||||||
|
Progress::MSG_TAG, MPI_COMM_WORLD );
|
||||||
|
|
||||||
|
/* send marker (-1) to rank 0 which indicates that this worker rank
|
||||||
|
is finished */
|
||||||
|
|
||||||
|
progress.cur_bytes = (uint64_t) -1;
|
||||||
|
MPI_Send( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
|
||||||
|
Progress::MSG_TAG, MPI_COMM_WORLD );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
/* show final progress */
|
||||||
|
printf( "%7.2f %% done\n", 100.0 );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if( 1 < alldata.numRanks && 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
/* ensure that all requests are inactive before freeing memory */
|
||||||
|
MPI_Waitall( alldata.numRanks - 1, progress.recv_requests,
|
||||||
|
progress.recv_statuses );
|
||||||
|
|
||||||
|
/* free memory */
|
||||||
|
delete [] progress.recv_buffers;
|
||||||
|
delete [] progress.recv_requests;
|
||||||
|
delete [] progress.recv_statuses;
|
||||||
|
delete [] progress.recv_indices;
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -80,12 +299,39 @@ static int handle_def_comment( void* fha, uint32_t stream, const char* comment,
|
|||||||
AllData* alldata= (AllData*) fha;
|
AllData* alldata= (AllData*) fha;
|
||||||
|
|
||||||
|
|
||||||
|
/* add new-line between each comment record */
|
||||||
if ( 0 < alldata->comments.length() ) {
|
if ( 0 < alldata->comments.length() ) {
|
||||||
|
|
||||||
alldata->comments+= "\n";
|
alldata->comments+= "\n";
|
||||||
|
|
||||||
}
|
}
|
||||||
alldata->comments+= comment;
|
|
||||||
|
|
||||||
|
/* wrap lines after 80 characters */
|
||||||
|
|
||||||
|
const string::size_type LINE_WRAP= 80;
|
||||||
|
|
||||||
|
string tmp= comment;
|
||||||
|
|
||||||
|
do {
|
||||||
|
|
||||||
|
if ( tmp.length() <= LINE_WRAP ) {
|
||||||
|
|
||||||
|
alldata->comments+= tmp;
|
||||||
|
break;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
string::size_type next_wrap=
|
||||||
|
tmp.find_last_of( " .!?:;,", LINE_WRAP -1 );
|
||||||
|
next_wrap= ( string::npos == next_wrap ) ? LINE_WRAP : next_wrap +1;
|
||||||
|
|
||||||
|
alldata->comments+= tmp.substr( 0, next_wrap ) + '\n';
|
||||||
|
tmp= tmp.substr( next_wrap );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} while( 0 != tmp.length() );
|
||||||
|
|
||||||
return OTF_RETURN_OK;
|
return OTF_RETURN_OK;
|
||||||
}
|
}
|
||||||
@ -315,8 +561,8 @@ static int handle_send( void* fha, uint64_t time, uint32_t sender,
|
|||||||
if ( 0 != alldata->recvTimeKey ) {
|
if ( 0 != alldata->recvTimeKey ) {
|
||||||
|
|
||||||
uint64_t recv_time;
|
uint64_t recv_time;
|
||||||
if ( OTF_KeyValueList_getUint64( kvlist, alldata->recvTimeKey,
|
if ( 0 == OTF_KeyValueList_getUint64( kvlist, alldata->recvTimeKey,
|
||||||
&recv_time ) == 0 ) {
|
&recv_time ) ) {
|
||||||
|
|
||||||
duration= (double) ( recv_time - time );
|
duration= (double) ( recv_time - time );
|
||||||
|
|
||||||
@ -331,11 +577,11 @@ static int handle_send( void* fha, uint64_t time, uint32_t sender,
|
|||||||
if ( length > 0 && duration > 0.0 ) {
|
if ( length > 0 && duration > 0.0 ) {
|
||||||
|
|
||||||
uint64_t speed_bin=
|
uint64_t speed_bin=
|
||||||
logi( (uint64_t)(
|
Logi( (uint64_t)(
|
||||||
( (double)length * (double)alldata->timerResolution ) /
|
( (double)length * (double)alldata->timerResolution ) /
|
||||||
duration ), MessageSpeedData::BIN_LOG_BASE );
|
duration ), MessageSpeedData::BIN_LOG_BASE );
|
||||||
|
|
||||||
uint64_t length_bin= logi( length, MessageSpeedData::BIN_LOG_BASE );
|
uint64_t length_bin= Logi( length, MessageSpeedData::BIN_LOG_BASE );
|
||||||
|
|
||||||
alldata->messageSpeedMapPerLength[ Pair( speed_bin, length_bin ) ]
|
alldata->messageSpeedMapPerLength[ Pair( speed_bin, length_bin ) ]
|
||||||
.add( 1 );
|
.add( 1 );
|
||||||
@ -444,7 +690,18 @@ static int handle_function_summary( void* fha, uint64_t time, uint32_t func,
|
|||||||
/* add/overwrite function statistics */
|
/* add/overwrite function statistics */
|
||||||
|
|
||||||
FunctionData tmp;
|
FunctionData tmp;
|
||||||
tmp.add( count, exclTime, inclTime );
|
|
||||||
|
tmp.count.cnt = tmp.count.sum = count;
|
||||||
|
tmp.count.min = tmp.count.max = 0;
|
||||||
|
|
||||||
|
tmp.excl_time.cnt = count;
|
||||||
|
tmp.excl_time.sum = exclTime;
|
||||||
|
tmp.excl_time.min = tmp.excl_time.max = 0;
|
||||||
|
|
||||||
|
tmp.incl_time.cnt = count;
|
||||||
|
tmp.incl_time.sum = inclTime;
|
||||||
|
tmp.incl_time.min = tmp.incl_time.max = 0;
|
||||||
|
|
||||||
alldata->functionMapPerRank[ Pair( func, process ) ]= tmp;
|
alldata->functionMapPerRank[ Pair( func, process ) ]= tmp;
|
||||||
|
|
||||||
return OTF_RETURN_OK;
|
return OTF_RETURN_OK;
|
||||||
@ -550,7 +807,9 @@ static int handle_collop_summary( void* fha, uint64_t time, uint32_t process,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void read_definitions( OTF_Reader* reader, AllData& alldata ) {
|
static bool read_definitions( AllData& alldata, OTF_Reader* reader ) {
|
||||||
|
|
||||||
|
bool error= false;
|
||||||
|
|
||||||
/* open OTF handler array */
|
/* open OTF handler array */
|
||||||
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
|
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
|
||||||
@ -608,16 +867,22 @@ static void read_definitions( OTF_Reader* reader, AllData& alldata ) {
|
|||||||
OTF_DEFKEYVALUE_RECORD );
|
OTF_DEFKEYVALUE_RECORD );
|
||||||
|
|
||||||
/* read definitions */
|
/* read definitions */
|
||||||
uint64_t defs_read_ret= OTF_Reader_readDefinitions( reader, handlers );
|
uint64_t read_ret= OTF_Reader_readDefinitions( reader, handlers );
|
||||||
assert( OTF_READ_ERROR != defs_read_ret );
|
if ( OTF_READ_ERROR == read_ret ) {
|
||||||
|
|
||||||
|
cerr << "ERROR: Could not read definitions." << endl;
|
||||||
|
error= true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* close OTF handler array */
|
/* close OTF handler array */
|
||||||
OTF_HandlerArray_close( handlers );
|
OTF_HandlerArray_close( handlers );
|
||||||
|
|
||||||
|
return !error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
|
static void share_definitions( AllData& alldata ) {
|
||||||
AllData& alldata ) {
|
|
||||||
|
|
||||||
MPI_Barrier( MPI_COMM_WORLD );
|
MPI_Barrier( MPI_COMM_WORLD );
|
||||||
|
|
||||||
@ -627,7 +892,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
|
|
||||||
/* get size needed to send definitions to workers */
|
/* get size needed to send definitions to workers */
|
||||||
|
|
||||||
if ( my_rank == 0 ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
MPI_Pack_size( 1 + alldata.collectiveOperationsToClasses.size() * 2 +
|
MPI_Pack_size( 1 + alldata.collectiveOperationsToClasses.size() * 2 +
|
||||||
1 + alldata.countersOfInterest.size() +
|
1 + alldata.countersOfInterest.size() +
|
||||||
@ -646,7 +911,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
|
|
||||||
/* pack definitions to buffer */
|
/* pack definitions to buffer */
|
||||||
|
|
||||||
if ( my_rank == 0 ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
/* collectiveOperationsToClasses.size() */
|
/* collectiveOperationsToClasses.size() */
|
||||||
uint64_t collop_classes_map_size=
|
uint64_t collop_classes_map_size=
|
||||||
@ -701,7 +966,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
|
|
||||||
/* unpack definitions from buffer */
|
/* unpack definitions from buffer */
|
||||||
|
|
||||||
if ( my_rank != 0 ) {
|
if ( 0 != alldata.myRank ) {
|
||||||
|
|
||||||
/* collectiveOperationsToClasses.size() */
|
/* collectiveOperationsToClasses.size() */
|
||||||
uint64_t collop_classes_map_size;
|
uint64_t collop_classes_map_size;
|
||||||
@ -756,7 +1021,9 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void read_events( OTF_Reader* reader, AllData& alldata ) {
|
static bool read_events( AllData& alldata, OTF_Reader* reader ) {
|
||||||
|
|
||||||
|
bool error= false;
|
||||||
|
|
||||||
/* open OTF handler array */
|
/* open OTF handler array */
|
||||||
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
|
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
|
||||||
@ -810,16 +1077,67 @@ static void read_events( OTF_Reader* reader, AllData& alldata ) {
|
|||||||
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
|
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* prepare progress */
|
||||||
|
if ( alldata.params.progress ) {
|
||||||
|
|
||||||
|
OTF_Reader_setRecordLimit( reader, 0 );
|
||||||
|
|
||||||
|
if ( OTF_READ_ERROR != OTF_Reader_readEvents( reader, handlers ) ) {
|
||||||
|
|
||||||
|
uint64_t min, cur, max;
|
||||||
|
|
||||||
|
OTF_Reader_eventBytesProgress( reader, &min, &cur, &max );
|
||||||
|
prepare_progress( alldata, max );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
OTF_Reader_setRecordLimit( reader, Progress::EVENTS_RECORD_LIMIT );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* read events */
|
/* read events */
|
||||||
uint64_t events_read_ret= OTF_Reader_readEvents( reader, handlers );
|
|
||||||
assert( OTF_READ_ERROR != events_read_ret );
|
uint64_t records_read= 0;
|
||||||
|
|
||||||
|
while ( OTF_READ_ERROR !=
|
||||||
|
( records_read= OTF_Reader_readEvents( reader, handlers ) ) ) {
|
||||||
|
|
||||||
|
/* update progress */
|
||||||
|
if ( alldata.params.progress ) {
|
||||||
|
|
||||||
|
uint64_t min, cur, max;
|
||||||
|
static uint64_t last_cur= 0;
|
||||||
|
|
||||||
|
OTF_Reader_eventBytesProgress( reader, &min, &cur, &max );
|
||||||
|
update_progress( alldata, cur - last_cur );
|
||||||
|
|
||||||
|
last_cur = cur;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* stop reading if done */
|
||||||
|
if ( 0 == records_read )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* show error message if reading failed */
|
||||||
|
if ( OTF_READ_ERROR == records_read ) {
|
||||||
|
|
||||||
|
cerr << "ERROR: Could not read events." << endl;
|
||||||
|
error= true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* close OTF handler array */
|
/* close OTF handler array */
|
||||||
OTF_HandlerArray_close( handlers );
|
OTF_HandlerArray_close( handlers );
|
||||||
|
|
||||||
|
return !error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void read_statistics( OTF_Reader* reader, AllData& alldata ) {
|
static bool read_statistics( AllData& alldata, OTF_Reader* reader ) {
|
||||||
|
|
||||||
|
bool error= false;
|
||||||
|
|
||||||
/* open OTF handler array */
|
/* open OTF handler array */
|
||||||
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
|
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
|
||||||
@ -853,18 +1171,66 @@ static void read_statistics( OTF_Reader* reader, AllData& alldata ) {
|
|||||||
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
|
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
|
||||||
}
|
}
|
||||||
|
|
||||||
/* read events */
|
/* prepare progress */
|
||||||
uint64_t stats_read_ret= OTF_Reader_readStatistics( reader, handlers );
|
if ( alldata.params.progress ) {
|
||||||
assert( OTF_READ_ERROR != stats_read_ret );
|
|
||||||
|
OTF_Reader_setRecordLimit( reader, 0 );
|
||||||
|
|
||||||
|
if ( OTF_READ_ERROR != OTF_Reader_readStatistics( reader, handlers ) ) {
|
||||||
|
|
||||||
|
uint64_t min, cur, max;
|
||||||
|
OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max );
|
||||||
|
prepare_progress( alldata, max );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
OTF_Reader_setRecordLimit( reader, Progress::STATS_RECORD_LIMIT );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* read statistics */
|
||||||
|
|
||||||
|
uint64_t records_read= 0;
|
||||||
|
|
||||||
|
while ( OTF_READ_ERROR !=
|
||||||
|
( records_read= OTF_Reader_readStatistics( reader, handlers ) ) ) {
|
||||||
|
|
||||||
|
/* update progress */
|
||||||
|
if ( alldata.params.progress ) {
|
||||||
|
|
||||||
|
uint64_t min, cur, max;
|
||||||
|
static uint64_t last_cur= 0;
|
||||||
|
|
||||||
|
OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max );
|
||||||
|
update_progress( alldata, cur - last_cur );
|
||||||
|
|
||||||
|
last_cur = cur;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* stop reading if done */
|
||||||
|
if ( 0 == records_read )
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* show error message if reading failed */
|
||||||
|
if ( OTF_READ_ERROR == records_read ) {
|
||||||
|
|
||||||
|
cerr << "ERROR: Could not read statistics." << endl;
|
||||||
|
error= true;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* close OTF handler array */
|
/* close OTF handler array */
|
||||||
OTF_HandlerArray_close( handlers );
|
OTF_HandlerArray_close( handlers );
|
||||||
|
|
||||||
|
return !error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
bool CollectData( AllData& alldata ) {
|
||||||
|
|
||||||
bool ret= true;
|
bool error= false;
|
||||||
|
|
||||||
/* open OTF file manager and reader */
|
/* open OTF file manager and reader */
|
||||||
|
|
||||||
@ -876,37 +1242,65 @@ bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
|||||||
OTF_Reader_open( alldata.params.input_file_prefix.c_str(), manager );
|
OTF_Reader_open( alldata.params.input_file_prefix.c_str(), manager );
|
||||||
assert( reader );
|
assert( reader );
|
||||||
|
|
||||||
if ( my_rank == 0 ) {
|
do {
|
||||||
|
|
||||||
/* read definitions */
|
if ( 0 == alldata.myRank ) {
|
||||||
read_definitions( reader, alldata );
|
|
||||||
|
|
||||||
}
|
/* read definitions */
|
||||||
|
|
||||||
/* share definitions needed for reading events to workers */
|
VerbosePrint( alldata, 1, true, "reading definitions\n" );
|
||||||
|
|
||||||
if ( num_ranks > 1 ) {
|
error= !read_definitions( alldata, reader );
|
||||||
|
|
||||||
share_definitions( my_rank, num_ranks, alldata );
|
}
|
||||||
|
|
||||||
}
|
/* broadcast error indicator to workers */
|
||||||
|
if ( SyncError( alldata, error, 0 ) ) {
|
||||||
|
|
||||||
/* either read data from events or statistics */
|
break;
|
||||||
|
|
||||||
if ( alldata.params.read_from_stats ) {
|
}
|
||||||
|
|
||||||
read_statistics( reader, alldata );
|
/* share definitions needed for reading events to workers */
|
||||||
|
|
||||||
} else {
|
if ( 1 < alldata.numRanks ) {
|
||||||
|
|
||||||
read_events( reader, alldata );
|
share_definitions( alldata );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* either read data from events or statistics */
|
||||||
|
|
||||||
|
if ( alldata.params.read_from_stats ) {
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 1, true, "reading statistics\n" );
|
||||||
|
|
||||||
|
error= !read_statistics( alldata, reader );
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 1, true, "reading events\n" );
|
||||||
|
|
||||||
|
error= !read_events( alldata, reader );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* finish progress */
|
||||||
|
if ( alldata.params.progress ) {
|
||||||
|
|
||||||
|
finish_progress( alldata );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* synchronize error indicator with workers */
|
||||||
|
SyncError( alldata, error );
|
||||||
|
|
||||||
|
} while( false );
|
||||||
|
|
||||||
/* close OTF file manager and reader */
|
/* close OTF file manager and reader */
|
||||||
|
|
||||||
OTF_Reader_close( reader );
|
OTF_Reader_close( reader );
|
||||||
OTF_FileManager_close( manager );
|
OTF_FileManager_close( manager );
|
||||||
|
|
||||||
return ret;
|
return !error;
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
|
|
||||||
/* collect the data for the assigned trace processes from the given
|
/* collect the data for the assigned trace processes from the given
|
||||||
trace file name */
|
trace file name */
|
||||||
bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
|
bool CollectData( AllData& alldata );
|
||||||
|
|
||||||
|
|
||||||
#endif /* COLLECT_DATA_H */
|
#endif /* COLLECT_DATA_H */
|
||||||
|
@ -19,6 +19,8 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include "create_latex.h"
|
#include "create_latex.h"
|
||||||
|
#include "otfprofile-mpi.h"
|
||||||
|
|
||||||
#include "OTF_inttypes.h"
|
#include "OTF_inttypes.h"
|
||||||
#include "OTF_Definitions.h"
|
#include "OTF_Definitions.h"
|
||||||
|
|
||||||
@ -251,6 +253,7 @@ static void collectiveId2String(uint64_t id, string& name)
|
|||||||
static void write_header(fstream& tex)
|
static void write_header(fstream& tex)
|
||||||
{
|
{
|
||||||
tex << "\\documentclass[a4paper,10pt]{article}" << endl;
|
tex << "\\documentclass[a4paper,10pt]{article}" << endl;
|
||||||
|
tex << "\\nonstopmode" << endl;
|
||||||
tex << "\\usepackage{amssymb}" << endl;
|
tex << "\\usepackage{amssymb}" << endl;
|
||||||
tex << "\\usepackage{longtable}" << endl;
|
tex << "\\usepackage{longtable}" << endl;
|
||||||
tex << "\\usepackage{ifthen}" << endl;
|
tex << "\\usepackage{ifthen}" << endl;
|
||||||
@ -2015,9 +2018,11 @@ static void write_p2pMsgRateHist(fstream& tex, struct AllData& alldata)
|
|||||||
*
|
*
|
||||||
* @param alldata data structure containing summarized profiling information
|
* @param alldata data structure containing summarized profiling information
|
||||||
*/
|
*/
|
||||||
bool createTex( AllData& alldata ) {
|
bool CreateTex( AllData& alldata ) {
|
||||||
|
|
||||||
bool ret= true;
|
bool error= false;
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 1, true, "producing LaTeX output\n" );
|
||||||
|
|
||||||
string tex_file_name= alldata.params.output_file_prefix + ".tex";
|
string tex_file_name= alldata.params.output_file_prefix + ".tex";
|
||||||
fstream tex_file;
|
fstream tex_file;
|
||||||
@ -2057,59 +2062,39 @@ bool createTex( AllData& alldata ) {
|
|||||||
write_footer(tex_file);
|
write_footer(tex_file);
|
||||||
tex_file.close();
|
tex_file.close();
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 2, true, " created file: %s\n",
|
||||||
|
tex_file_name.c_str() );
|
||||||
|
|
||||||
|
#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
|
||||||
/* create PDF file, if desired */
|
/* create PDF file, if desired */
|
||||||
if ( alldata.params.create_pdf ) {
|
if ( alldata.params.create_pdf ) {
|
||||||
|
|
||||||
int rc;
|
VerbosePrint( alldata, 1, true, "producing PDF output\n" );
|
||||||
|
|
||||||
|
/* compose pdflatex command */
|
||||||
ostringstream cmd;
|
ostringstream cmd;
|
||||||
|
cmd << PDFTEX << " " << tex_file_name << " >/dev/null 2>&1";
|
||||||
|
|
||||||
/* compose latex command */
|
/* execute pdflatex command (two times) on TeX file */
|
||||||
cmd << alldata.params.latex_command << " " << tex_file_name
|
|
||||||
<< " >/dev/null 2>&1";
|
|
||||||
|
|
||||||
/* execute latex command (two times) on TEX file */
|
|
||||||
for ( uint8_t i = 0; i < 2; i++ ) {
|
for ( uint8_t i = 0; i < 2; i++ ) {
|
||||||
|
|
||||||
rc= system( cmd.str().c_str() );
|
VerbosePrint( alldata, 2, true, " %srunning command: %s\n",
|
||||||
|
(0 == i) ? "" : "re-", cmd.str().c_str() );
|
||||||
|
|
||||||
|
int rc= system( cmd.str().c_str() );
|
||||||
if ( 0 != WEXITSTATUS( rc ) ) {
|
if ( 0 != WEXITSTATUS( rc ) ) {
|
||||||
|
|
||||||
cerr << "ERROR: Could not create DVI file from '"
|
cerr << "ERROR: Could not create PDF file from '"
|
||||||
<< tex_file_name << "'." << endl;
|
<< tex_file_name << "'." << endl;
|
||||||
ret= false;
|
error= true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( 0 == rc ) {
|
|
||||||
|
|
||||||
/* compose DVI file name */
|
|
||||||
string dvi_file_name= tex_file_name;
|
|
||||||
dvi_file_name.replace( tex_file_name.length() - 4, 4, ".dvi" );
|
|
||||||
|
|
||||||
/* compose PDF file name */
|
|
||||||
string pdf_file_name= tex_file_name;
|
|
||||||
pdf_file_name.replace( tex_file_name.length() - 4, 4, ".pdf" );
|
|
||||||
|
|
||||||
/* compose DVI to PDF convert command */
|
|
||||||
cmd.str(""); cmd.clear();
|
|
||||||
cmd << alldata.params.dvipdf_command << " " << dvi_file_name
|
|
||||||
<< " >/dev/null 2>&1";
|
|
||||||
|
|
||||||
/* execute DVI to PDF command */
|
|
||||||
rc= system( cmd.str().c_str() );
|
|
||||||
if ( 0 != WEXITSTATUS( rc ) ) {
|
|
||||||
|
|
||||||
cerr << "ERROR: Could not convert '" << dvi_file_name
|
|
||||||
<< "' to '" << pdf_file_name << "'." << endl;
|
|
||||||
ret= false;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
|
||||||
|
|
||||||
return ret;
|
return !error;
|
||||||
}
|
}
|
||||||
|
@ -6,11 +6,12 @@
|
|||||||
#ifndef CREATE_LATEX_H
|
#ifndef CREATE_LATEX_H
|
||||||
#define CREATE_LATEX_H
|
#define CREATE_LATEX_H
|
||||||
|
|
||||||
|
|
||||||
#include "datastructs.h"
|
#include "datastructs.h"
|
||||||
|
|
||||||
|
|
||||||
/* generate PGF output */
|
/* generate PGF output */
|
||||||
bool createTex( AllData& alldata );
|
bool CreateTex( AllData& alldata );
|
||||||
|
|
||||||
|
|
||||||
#endif /* CREATE_LATEX_H */
|
#endif /* CREATE_LATEX_H */
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#ifndef DATASTRUCTS_H
|
#ifndef DATASTRUCTS_H
|
||||||
#define DATASTRUCTS_H
|
#define DATASTRUCTS_H
|
||||||
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -16,6 +17,8 @@ using namespace std;
|
|||||||
#include <list>
|
#include <list>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
|
||||||
#include "OTF_inttypes.h"
|
#include "OTF_inttypes.h"
|
||||||
|
|
||||||
|
|
||||||
@ -25,26 +28,84 @@ struct Params {
|
|||||||
|
|
||||||
static const uint32_t DEFAULT_MAX_FILE_HANDLES= 50;
|
static const uint32_t DEFAULT_MAX_FILE_HANDLES= 50;
|
||||||
static const uint32_t DEFAULT_BUFFER_SIZE= 1024 * 1024;
|
static const uint32_t DEFAULT_BUFFER_SIZE= 1024 * 1024;
|
||||||
static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
|
static const uint8_t DEFAULT_VERBOSE_LEVEL= 0;
|
||||||
static const string DEFAULT_LATEX_COMMAND() { return "latex"; }
|
static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
|
||||||
static const string DEFAULT_DVIPDF_COMMAND() { return "dvipdf"; }
|
|
||||||
|
|
||||||
uint32_t max_file_handles;
|
uint32_t max_file_handles;
|
||||||
uint32_t buffer_size;
|
uint32_t buffer_size;
|
||||||
|
uint8_t verbose_level;
|
||||||
|
bool progress;
|
||||||
bool read_from_stats;
|
bool read_from_stats;
|
||||||
|
|
||||||
bool create_pdf;
|
bool create_pdf;
|
||||||
string input_file_prefix;
|
string input_file_prefix;
|
||||||
string output_file_prefix;
|
string output_file_prefix;
|
||||||
string latex_command;
|
|
||||||
string dvipdf_command;
|
|
||||||
|
|
||||||
Params()
|
Params()
|
||||||
: max_file_handles(DEFAULT_MAX_FILE_HANDLES),
|
: max_file_handles(DEFAULT_MAX_FILE_HANDLES),
|
||||||
buffer_size(DEFAULT_BUFFER_SIZE),
|
buffer_size(DEFAULT_BUFFER_SIZE),
|
||||||
|
verbose_level(DEFAULT_VERBOSE_LEVEL), progress(false),
|
||||||
read_from_stats(false), create_pdf(true),
|
read_from_stats(false), create_pdf(true),
|
||||||
output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()),
|
output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()) {}
|
||||||
latex_command(DEFAULT_LATEX_COMMAND()),
|
};
|
||||||
dvipdf_command(DEFAULT_DVIPDF_COMMAND()) {}
|
|
||||||
|
|
||||||
|
/* *** progress information *** */
|
||||||
|
|
||||||
|
struct Progress {
|
||||||
|
|
||||||
|
/* maximum number of records to read between progress updates */
|
||||||
|
static const uint64_t EVENTS_RECORD_LIMIT= 1000000;
|
||||||
|
static const uint64_t STATS_RECORD_LIMIT= 100;
|
||||||
|
|
||||||
|
/* message tag to use for communication */
|
||||||
|
static const int MSG_TAG= 500;
|
||||||
|
|
||||||
|
uint64_t cur_bytes; /* current bytes read */
|
||||||
|
uint64_t max_bytes; /* max. bytes readable */
|
||||||
|
|
||||||
|
MPI_Request send_request; /* sender request handle */
|
||||||
|
|
||||||
|
uint64_t* recv_buffers; /* receive buffers */
|
||||||
|
MPI_Request* recv_requests; /* persistent receive request handles */
|
||||||
|
MPI_Status* recv_statuses; /* receive statuses */
|
||||||
|
int* recv_indices; /* indices of completed recv. operations */
|
||||||
|
|
||||||
|
uint32_t ranks_left; /* root keeps track of ranks left to query */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/* *** runtime measurement *** */
|
||||||
|
|
||||||
|
struct MeasureBlock {
|
||||||
|
|
||||||
|
/* routine to get a global timestamp */
|
||||||
|
# define GETTIME() MPI_Wtime()
|
||||||
|
|
||||||
|
double start_time; /* start timestamp of measurement block */
|
||||||
|
double stop_time; /* stop timestamp of measurement block */
|
||||||
|
|
||||||
|
MeasureBlock() : start_time(-1.0), stop_time(-1.0) {}
|
||||||
|
|
||||||
|
/* start runtime measurment */
|
||||||
|
void start() {
|
||||||
|
|
||||||
|
start_time= GETTIME();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* stop runtime measurment */
|
||||||
|
void stop() {
|
||||||
|
|
||||||
|
assert( -1.0 != start_time );
|
||||||
|
stop_time= GETTIME();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* get result of runtime measurement */
|
||||||
|
double duration() const {
|
||||||
|
|
||||||
|
assert( -1.0 != start_time && -1.0 != stop_time );
|
||||||
|
return stop_time - start_time;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -413,6 +474,9 @@ struct PendingCollective {
|
|||||||
|
|
||||||
struct AllData {
|
struct AllData {
|
||||||
|
|
||||||
|
const uint32_t myRank;
|
||||||
|
const uint32_t numRanks;
|
||||||
|
|
||||||
/* number and list of processes to be handled by every worker */
|
/* number and list of processes to be handled by every worker */
|
||||||
uint32_t myProcessesNum;
|
uint32_t myProcessesNum;
|
||||||
uint32_t* myProcessesList;
|
uint32_t* myProcessesList;
|
||||||
@ -424,6 +488,12 @@ struct AllData {
|
|||||||
/* program parameters */
|
/* program parameters */
|
||||||
Params params;
|
Params params;
|
||||||
|
|
||||||
|
/* progress information */
|
||||||
|
Progress progress;
|
||||||
|
|
||||||
|
/* store per-measure block runtimes */
|
||||||
|
map< string, MeasureBlock > measureBlockMap;
|
||||||
|
|
||||||
/* clustering information for ranks */
|
/* clustering information for ranks */
|
||||||
Clustering clustering;
|
Clustering clustering;
|
||||||
|
|
||||||
@ -537,7 +607,9 @@ struct AllData {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
AllData() : myProcessesNum(0), myProcessesList(NULL),
|
AllData( uint32_t my_rank, uint32_t num_ranks ) :
|
||||||
|
myRank(my_rank), numRanks(num_ranks),
|
||||||
|
myProcessesNum(0), myProcessesList(NULL),
|
||||||
packbuffersize(0), packbuffer(NULL), timerResolution(0),
|
packbuffersize(0), packbuffer(NULL), timerResolution(0),
|
||||||
recvTimeKey(0) {}
|
recvTimeKey(0) {}
|
||||||
|
|
||||||
@ -565,6 +637,16 @@ struct AllData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char* freePackBuffer( ) {
|
||||||
|
|
||||||
|
free( packbuffer );
|
||||||
|
packbuffer= NULL;
|
||||||
|
packbuffersize= 0;
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
char* getPackBuffer( ) {
|
char* getPackBuffer( ) {
|
||||||
|
|
||||||
return packbuffer;
|
return packbuffer;
|
||||||
|
@ -7,72 +7,42 @@ using namespace std;
|
|||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <stdarg.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include "mpi.h"
|
|
||||||
|
|
||||||
#include "otf.h"
|
#include "otf.h"
|
||||||
|
#include "OTF_Platform.h"
|
||||||
|
|
||||||
#include "datastructs.h"
|
|
||||||
#include "collect_data.h"
|
#include "collect_data.h"
|
||||||
|
#include "otfprofile-mpi.h"
|
||||||
#include "summarize_data.h"
|
#include "summarize_data.h"
|
||||||
#include "reduce_data.h"
|
#include "reduce_data.h"
|
||||||
#include "create_latex.h"
|
#include "create_latex.h"
|
||||||
|
|
||||||
|
|
||||||
#define FPRINTF_ROOT if(my_rank == 0) fprintf
|
/* define the following macro to synchronize the error indicator with all
|
||||||
|
worker ranks
|
||||||
|
|
||||||
|
This enforces that all ranks will be terminated by calling MPI_Abort if
|
||||||
|
anyone fails. This is necessary to work around a bug that appears at least
|
||||||
|
with Open MPI where calling MPI_Abort on one task doesn't terminate all
|
||||||
|
other ranks. */
|
||||||
|
#define SYNC_ERROR
|
||||||
|
|
||||||
/* define this macro to print result data to stdout */
|
/* define the following macro to print result data to stdout */
|
||||||
/*#define SHOW_RESULTS*/
|
/*#define SHOW_RESULTS*/
|
||||||
|
|
||||||
/* define this macro to have runtime measurement of certain profile scopes */
|
|
||||||
/*#define RUNTIME_MEASUREMENT*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef RUNTIME_MEASUREMENT
|
|
||||||
|
|
||||||
struct MeasureBlock {
|
|
||||||
|
|
||||||
# define GETTIME() MPI_Wtime()
|
|
||||||
|
|
||||||
double start_time;
|
|
||||||
double stop_time;
|
|
||||||
|
|
||||||
MeasureBlock() : start_time(-1.0), stop_time(-1.0) {}
|
|
||||||
|
|
||||||
void start() {
|
|
||||||
start_time= GETTIME();
|
|
||||||
}
|
|
||||||
void stop() {
|
|
||||||
assert( -1.0 != start_time );
|
|
||||||
stop_time= GETTIME();
|
|
||||||
}
|
|
||||||
double duration() const {
|
|
||||||
assert( -1.0 != start_time && -1.0 != stop_time );
|
|
||||||
return stop_time - start_time;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* store per-measure block runtimes */
|
|
||||||
map < string, MeasureBlock > MeasureBlocksMap;
|
|
||||||
|
|
||||||
#endif /* RUNTIME_MEASUREMENT */
|
|
||||||
|
|
||||||
|
|
||||||
/* parse command line options
|
/* parse command line options
|
||||||
return 0 if succeeded, 1 if help text or version showed, 2 if failed */
|
return 0 if succeeded, 1 if help text or version showed, -1 if failed */
|
||||||
static int parse_command_line( uint32_t my_rank, int argc, char** argv,
|
static int parse_command_line( int argc, char** argv, AllData& alldata );
|
||||||
AllData& alldata );
|
|
||||||
|
|
||||||
/* assign trace processes to analysis processes explicitly in order to allow
|
/* assign trace processes to analysis processes explicitly in order to allow
|
||||||
sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc.
|
sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc.
|
||||||
in the future, return true if succeeded */
|
in the future, return true if succeeded */
|
||||||
static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
|
static bool assign_procs_to_ranks( AllData& alldata );
|
||||||
AllData& alldata );
|
|
||||||
|
|
||||||
#ifdef SHOW_RESULTS
|
#ifdef SHOW_RESULTS
|
||||||
/* show results on stdout */
|
/* show results on stdout */
|
||||||
@ -97,22 +67,33 @@ int main( int argc, char** argv ) {
|
|||||||
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank );
|
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank );
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks );
|
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks );
|
||||||
|
|
||||||
|
AllData alldata( my_rank, num_ranks );
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
|
||||||
AllData alldata;
|
|
||||||
|
|
||||||
/* step 0: parse command line options */
|
/* step 0: parse command line options */
|
||||||
if ( 0 !=
|
if ( 0 != ( ret= parse_command_line( argc, argv, alldata ) ) ) {
|
||||||
( ret= parse_command_line( my_rank, argc, argv, alldata ) ) ) {
|
|
||||||
|
if ( 1 == ret ) {
|
||||||
|
|
||||||
|
ret= 0;
|
||||||
|
|
||||||
|
} else { /* -1 == ret */
|
||||||
|
|
||||||
|
ret= 1;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 1, true, "initializing\n" );
|
||||||
|
|
||||||
MPI_Barrier( MPI_COMM_WORLD );
|
MPI_Barrier( MPI_COMM_WORLD );
|
||||||
|
|
||||||
/* step 1: assign trace processes to analysis processes */
|
/* step 1: assign trace processes to analysis processes */
|
||||||
if ( !assign_procs_to_ranks( my_rank, num_ranks, alldata ) ) {
|
if ( !assign_procs_to_ranks( alldata ) ) {
|
||||||
|
|
||||||
ret= 1;
|
ret= 1;
|
||||||
break;
|
break;
|
||||||
@ -121,16 +102,14 @@ int main( int argc, char** argv ) {
|
|||||||
|
|
||||||
MPI_Barrier( MPI_COMM_WORLD );
|
MPI_Barrier( MPI_COMM_WORLD );
|
||||||
|
|
||||||
#ifdef RUNTIME_MEASUREMENT
|
if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
|
||||||
if ( 0 == my_rank ) {
|
|
||||||
|
|
||||||
MeasureBlocksMap[ "analyze data" ].start();
|
alldata.measureBlockMap[ "analyze data" ].start();
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif /* RUNTIME_MEASUREMENT */
|
|
||||||
|
|
||||||
/* step 2: collect data by reading input trace file */
|
/* step 2: collect data by reading input trace file */
|
||||||
if ( !collectData( my_rank, num_ranks, alldata ) ) {
|
if ( !CollectData( alldata ) ) {
|
||||||
|
|
||||||
ret= 1;
|
ret= 1;
|
||||||
break;
|
break;
|
||||||
@ -141,7 +120,7 @@ int main( int argc, char** argv ) {
|
|||||||
|
|
||||||
/* step 3: summarize data; every analysis rank summarizes it's local
|
/* step 3: summarize data; every analysis rank summarizes it's local
|
||||||
data independently */
|
data independently */
|
||||||
if ( !summarizeData( my_rank, num_ranks, alldata ) ) {
|
if ( !SummarizeData( alldata ) ) {
|
||||||
|
|
||||||
ret= 1;
|
ret= 1;
|
||||||
break;
|
break;
|
||||||
@ -151,7 +130,7 @@ int main( int argc, char** argv ) {
|
|||||||
MPI_Barrier( MPI_COMM_WORLD );
|
MPI_Barrier( MPI_COMM_WORLD );
|
||||||
|
|
||||||
/* step 4: reduce data to master */
|
/* step 4: reduce data to master */
|
||||||
if ( !reduceData( my_rank, num_ranks, alldata ) ) {
|
if ( !ReduceData( alldata ) ) {
|
||||||
|
|
||||||
ret= 1;
|
ret= 1;
|
||||||
break;
|
break;
|
||||||
@ -160,13 +139,11 @@ int main( int argc, char** argv ) {
|
|||||||
|
|
||||||
MPI_Barrier( MPI_COMM_WORLD );
|
MPI_Barrier( MPI_COMM_WORLD );
|
||||||
|
|
||||||
#ifdef RUNTIME_MEASUREMENT
|
if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
|
||||||
if ( 0 == my_rank ) {
|
|
||||||
|
|
||||||
MeasureBlocksMap[ "analyze data" ].stop();
|
alldata.measureBlockMap[ "analyze data" ].stop();
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif /* RUNTIME_MEASUREMENT */
|
|
||||||
|
|
||||||
/* step 5: produce outputs */
|
/* step 5: produce outputs */
|
||||||
|
|
||||||
@ -190,50 +167,44 @@ int main( int argc, char** argv ) {
|
|||||||
show_results( alldata );
|
show_results( alldata );
|
||||||
#endif /* SHOW_RESULTS */
|
#endif /* SHOW_RESULTS */
|
||||||
|
|
||||||
#ifdef RUNTIME_MEASUREMENT
|
alldata.measureBlockMap[ "produce output" ].start();
|
||||||
MeasureBlocksMap[ "write tex" ].start();
|
|
||||||
#endif /* RUNTIME_MEASUREMENT */
|
|
||||||
|
|
||||||
/* step 5.3: generate PGF output */
|
/* step 5.3: generate PGF output */
|
||||||
if ( !createTex( alldata ) ) {
|
if ( !CreateTex( alldata ) ) {
|
||||||
|
|
||||||
ret= 1;
|
ret= 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef RUNTIME_MEASUREMENT
|
alldata.measureBlockMap[ "produce output" ].stop();
|
||||||
MeasureBlocksMap[ "write tex" ].stop();
|
|
||||||
#endif /* RUNTIME_MEASUREMENT */
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} while( false );
|
} while( false );
|
||||||
|
|
||||||
#ifdef RUNTIME_MEASUREMENT
|
|
||||||
|
|
||||||
/* show runtime measurement results */
|
|
||||||
|
|
||||||
if ( 0 == my_rank && 0 == ret ) {
|
|
||||||
|
|
||||||
cout << endl << "runtime measurement results:" << endl;
|
|
||||||
for ( map < string, MeasureBlock >::const_iterator it=
|
|
||||||
MeasureBlocksMap.begin(); it != MeasureBlocksMap.end(); it++ ) {
|
|
||||||
|
|
||||||
cout << " " << it->first << ": " << it->second.duration()
|
|
||||||
<< "s" << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* RUNTIME_MEASUREMENT */
|
|
||||||
|
|
||||||
/* either finalize or abort on error */
|
/* either finalize or abort on error */
|
||||||
|
|
||||||
if ( 0 == ret || 1 == ret ) {
|
if ( 0 == ret ) {
|
||||||
|
|
||||||
|
/* show runtime measurement results */
|
||||||
|
if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
|
||||||
|
|
||||||
|
cout << "runtime measurement results:" << endl;
|
||||||
|
for ( map < string, MeasureBlock >::const_iterator it=
|
||||||
|
alldata.measureBlockMap.begin();
|
||||||
|
it != alldata.measureBlockMap.end(); it++ ) {
|
||||||
|
|
||||||
|
cout << " " << it->first << ": " << it->second.duration()
|
||||||
|
<< "s" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 1, true, "done\n" );
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
MPI_Abort( MPI_COMM_WORLD, ret );
|
MPI_Abort( MPI_COMM_WORLD, ret );
|
||||||
@ -244,37 +215,12 @@ int main( int argc, char** argv ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static int parse_command_line( uint32_t my_rank, int argc, char** argv,
|
static int parse_command_line( int argc, char** argv, AllData& alldata ) {
|
||||||
AllData& alldata ) {
|
|
||||||
|
|
||||||
int ret= 0;
|
int ret= 0;
|
||||||
|
|
||||||
Params& params= alldata.params;
|
Params& params= alldata.params;
|
||||||
|
|
||||||
/* show help text if no options are given */
|
|
||||||
if ( 1 == argc ) {
|
|
||||||
|
|
||||||
if ( 0 == my_rank ) {
|
|
||||||
|
|
||||||
show_helptext();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* read environment variables */
|
|
||||||
|
|
||||||
char* env;
|
|
||||||
|
|
||||||
env= getenv( "OTF_PROFILE_LATEX" );
|
|
||||||
if ( env && 0 < strlen( env ) )
|
|
||||||
params.latex_command= env;
|
|
||||||
env= getenv( "OTF_PROFILE_DVIPDF" );
|
|
||||||
if ( env && 0 < strlen( env ) )
|
|
||||||
params.dvipdf_command= env;
|
|
||||||
|
|
||||||
/* parse command line options */
|
/* parse command line options */
|
||||||
|
|
||||||
enum { ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID };
|
enum { ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID };
|
||||||
@ -288,7 +234,7 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
|
|||||||
if ( 0 == strcmp( "-h", argv[i] ) ||
|
if ( 0 == strcmp( "-h", argv[i] ) ||
|
||||||
0 == strcmp( "--help", argv[i] ) ) {
|
0 == strcmp( "--help", argv[i] ) ) {
|
||||||
|
|
||||||
if ( 0 == my_rank ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
show_helptext();
|
show_helptext();
|
||||||
|
|
||||||
@ -300,13 +246,27 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
|
|||||||
/* -V */
|
/* -V */
|
||||||
} else if ( 0 == strcmp( "-V", argv[i] ) ) {
|
} else if ( 0 == strcmp( "-V", argv[i] ) ) {
|
||||||
|
|
||||||
FPRINTF_ROOT( stdout, "%u.%u.%u \"%s\"\n",
|
if ( 0 == alldata.myRank ) {
|
||||||
OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB,
|
|
||||||
OTF_VERSION_STRING );
|
printf( "%u.%u.%u \"%s\"\n",
|
||||||
|
OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB,
|
||||||
|
OTF_VERSION_STRING );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
ret= 1;
|
ret= 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/* -v */
|
||||||
|
} else if ( 0 == strcmp( "-v", argv[i] ) ) {
|
||||||
|
|
||||||
|
params.verbose_level++;
|
||||||
|
|
||||||
|
/* -p */
|
||||||
|
} else if ( 0 == strcmp( "-p", argv[i] ) ) {
|
||||||
|
|
||||||
|
params.progress= true;
|
||||||
|
|
||||||
/* -f */
|
/* -f */
|
||||||
} else if ( 0 == strcmp( "-f", argv[i] ) ) {
|
} else if ( 0 == strcmp( "-f", argv[i] ) ) {
|
||||||
|
|
||||||
@ -364,11 +324,15 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
|
|||||||
|
|
||||||
params.read_from_stats= true;
|
params.read_from_stats= true;
|
||||||
|
|
||||||
|
#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
|
||||||
|
|
||||||
/* --nopdf */
|
/* --nopdf */
|
||||||
} else if ( 0 == strcmp( "--nopdf", argv[i] ) ) {
|
} else if ( 0 == strcmp( "--nopdf", argv[i] ) ) {
|
||||||
|
|
||||||
params.create_pdf= false;
|
params.create_pdf= false;
|
||||||
|
|
||||||
|
#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
|
||||||
|
|
||||||
/* input file or unknown option */
|
/* input file or unknown option */
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
@ -394,74 +358,102 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* show specific message on error */
|
/* show specific message on error */
|
||||||
|
|
||||||
if ( ERR_OK != parse_error ) {
|
if ( ERR_OK != parse_error ) {
|
||||||
|
|
||||||
switch( parse_error ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
case ERR_OPT_UNKNOWN:
|
switch( parse_error ) {
|
||||||
|
|
||||||
FPRINTF_ROOT( stderr, "ERROR: Unknown option '%s'.\n", argv[i] );
|
case ERR_OPT_UNKNOWN:
|
||||||
break;
|
|
||||||
|
|
||||||
case ERR_ARG_MISSING:
|
cerr << "ERROR: Unknown option '" << argv[i] << "'."
|
||||||
|
<< endl;
|
||||||
|
break;
|
||||||
|
|
||||||
FPRINTF_ROOT( stderr, "ERROR: Expected argument for option '%s'.\n",
|
case ERR_ARG_MISSING:
|
||||||
argv[i] );
|
|
||||||
break;
|
|
||||||
|
|
||||||
case ERR_ARG_INVALID:
|
cerr << "ERROR: Expected argument for option '" << argv[i]
|
||||||
|
<< "'." << endl;
|
||||||
|
break;
|
||||||
|
|
||||||
FPRINTF_ROOT( stderr, "ERROR: Invalid argument for option '%s'.\n",
|
case ERR_ARG_INVALID:
|
||||||
argv[i] );
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
cerr << "ERROR: Invalid argument for option '" << argv[i]
|
||||||
|
<< "'." << endl;
|
||||||
|
break;
|
||||||
|
|
||||||
break;
|
default:
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ret= 2;
|
ret= -1;
|
||||||
|
|
||||||
|
/* show help text if no input trace file is given */
|
||||||
|
} else if ( 0 == params.input_file_prefix.length() ) {
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
show_helptext();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ret= 1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
|
static bool assign_procs_to_ranks( AllData& alldata ) {
|
||||||
AllData& alldata ) {
|
|
||||||
|
|
||||||
bool ret= true;
|
bool error= false;
|
||||||
|
|
||||||
if ( 0 == my_rank ) {
|
OTF_FileManager* manager= NULL;
|
||||||
|
OTF_MasterControl* master= NULL;
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
/* rank 0 reads OTF master control of input trace file */
|
/* rank 0 reads OTF master control of input trace file */
|
||||||
|
|
||||||
OTF_FileManager* manager= OTF_FileManager_open( 1 );
|
manager= OTF_FileManager_open( 1 );
|
||||||
assert( manager );
|
assert( manager );
|
||||||
|
|
||||||
OTF_MasterControl* master= OTF_MasterControl_new( manager );
|
master= OTF_MasterControl_new( manager );
|
||||||
assert( master );
|
assert( master );
|
||||||
|
|
||||||
|
int master_read_ret=
|
||||||
|
OTF_MasterControl_read( master,
|
||||||
|
alldata.params.input_file_prefix.c_str() );
|
||||||
|
|
||||||
|
/* that's the first access to the input trace file; show tidy error
|
||||||
|
message if failed */
|
||||||
|
if ( 0 == master_read_ret ) {
|
||||||
|
|
||||||
|
cerr << "ERROR: Unable to open file '"
|
||||||
|
<< alldata.params.input_file_prefix << ".otf' for reading."
|
||||||
|
<< endl;
|
||||||
|
error= true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* broadcast error indicator to workers because Open MPI had all
|
||||||
|
ranks except rank 0 waiting endlessly in the MPI_Recv, when the '.otf' file
|
||||||
|
was absent. */
|
||||||
|
if ( SyncError( alldata, error, 0 ) ) {
|
||||||
|
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
|
||||||
int master_read_ret=
|
|
||||||
OTF_MasterControl_read( master,
|
|
||||||
alldata.params.input_file_prefix.c_str() );
|
|
||||||
|
|
||||||
/* that's the first access to the input trace file; show tidy error
|
|
||||||
message if failed */
|
|
||||||
if ( 0 == master_read_ret ) {
|
|
||||||
|
|
||||||
cerr << "ERROR: Unable to open file '"
|
|
||||||
<< alldata.params.input_file_prefix << ".otf' for reading."
|
|
||||||
<< endl;
|
|
||||||
ret= false;
|
|
||||||
break;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/* fill the global array of processes */
|
/* fill the global array of processes */
|
||||||
|
|
||||||
alldata.myProcessesNum= OTF_MasterControl_getrCount( master );
|
alldata.myProcessesNum= OTF_MasterControl_getrCount( master );
|
||||||
@ -507,19 +499,20 @@ static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
|
|
||||||
/* remaining ranks and remaining workers */
|
/* remaining ranks and remaining workers */
|
||||||
uint32_t r_ranks= alldata.myProcessesNum;
|
uint32_t r_ranks= alldata.myProcessesNum;
|
||||||
uint32_t r_workers= num_ranks;
|
uint32_t r_workers= alldata.numRanks;
|
||||||
|
|
||||||
uint32_t pos= 0;
|
uint32_t pos= 0;
|
||||||
bool warn_for_empty= true;
|
bool warn_for_empty= true;
|
||||||
for ( int w= 0; w < (int)num_ranks; w++ ) {
|
for ( int w= 0; w < (int)alldata.numRanks; w++ ) {
|
||||||
|
|
||||||
uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ?
|
uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ?
|
||||||
( r_ranks / r_workers +1 ) : ( r_ranks / r_workers );
|
( r_ranks / r_workers +1 ) : ( r_ranks / r_workers );
|
||||||
|
|
||||||
if ( ( 0 == n ) && warn_for_empty ) {
|
if ( ( 0 == n ) && warn_for_empty ) {
|
||||||
|
|
||||||
cerr << "Warning: more analysis ranks than trace processes, " <<
|
cerr << "Warning: more analysis ranks than trace processes, "
|
||||||
"ranks " << w << " to " << num_ranks -1 << " are unemployed" << endl;
|
<< "ranks " << w << " to " << alldata.numRanks -1
|
||||||
|
<< " are unemployed" << endl;
|
||||||
|
|
||||||
warn_for_empty= false;
|
warn_for_empty= false;
|
||||||
}
|
}
|
||||||
@ -578,7 +571,7 @@ static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
}
|
}
|
||||||
cerr << endl;*/
|
cerr << endl;*/
|
||||||
|
|
||||||
return ret;
|
return !error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -802,6 +795,9 @@ static void show_helptext() {
|
|||||||
<< " options:" << endl
|
<< " options:" << endl
|
||||||
<< " -h, --help show this help message" << endl
|
<< " -h, --help show this help message" << endl
|
||||||
<< " -V show OTF version" << endl
|
<< " -V show OTF version" << endl
|
||||||
|
<< " -v increase output verbosity" << endl
|
||||||
|
<< " (can be used more than once)" << endl
|
||||||
|
<< " -p show progress" << endl
|
||||||
<< " -f <n> max. number of filehandles available per rank" << endl
|
<< " -f <n> max. number of filehandles available per rank" << endl
|
||||||
<< " (default: " << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl
|
<< " (default: " << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl
|
||||||
<< " -b <size> set buffersize of the reader" << endl
|
<< " -b <size> set buffersize of the reader" << endl
|
||||||
@ -809,15 +805,102 @@ static void show_helptext() {
|
|||||||
<< " -o <prefix> specify the prefix of output file(s)" << endl
|
<< " -o <prefix> specify the prefix of output file(s)" << endl
|
||||||
<< " (default: " << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl
|
<< " (default: " << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl
|
||||||
<< " --stat read only summarized information, no events" << endl
|
<< " --stat read only summarized information, no events" << endl
|
||||||
|
#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
|
||||||
<< " --nopdf do not produce PDF output" << endl
|
<< " --nopdf do not produce PDF output" << endl
|
||||||
<< endl
|
#else /* PDFTEX && HAVE_PGFPLOTS_1_4 */
|
||||||
<< " environment variables:" << endl
|
|
||||||
<< " OTF_PROFILE_LATEX LaTeX command" << endl
|
|
||||||
<< " (default: " << Params::DEFAULT_LATEX_COMMAND() << ")" << endl
|
|
||||||
<< " OTF_PROFILE_DVIPDF DVI to PDF converter command" << endl
|
|
||||||
<< " (default: " << Params::DEFAULT_DVIPDF_COMMAND() << ")" << endl
|
|
||||||
<< endl
|
<< endl
|
||||||
<< " PDF creation requires the PGFPLOTS package version >1.4" << endl
|
<< " PDF creation requires the PGFPLOTS package version >1.4" << endl
|
||||||
<< " http://sourceforge.net/projects/pgfplots/ " << endl
|
<< " http://sourceforge.net/projects/pgfplots/ " << endl
|
||||||
|
#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
|
||||||
<< endl;
|
<< endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void VerbosePrint( AllData& alldata, uint8_t level, bool root_only,
|
||||||
|
const char* fmt, ... ) {
|
||||||
|
|
||||||
|
if ( alldata.params.verbose_level >= level ) {
|
||||||
|
|
||||||
|
va_list ap;
|
||||||
|
|
||||||
|
va_start( ap, fmt );
|
||||||
|
|
||||||
|
/* either only rank 0 print the message */
|
||||||
|
if ( root_only ) {
|
||||||
|
|
||||||
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
|
vprintf( fmt, ap );
|
||||||
|
}
|
||||||
|
|
||||||
|
/* or all ranks print the message */
|
||||||
|
} else {
|
||||||
|
|
||||||
|
char msg[1024];
|
||||||
|
|
||||||
|
/* prepend current rank to message */
|
||||||
|
snprintf( msg, sizeof( msg ) -1, "[%u] ", alldata.myRank );
|
||||||
|
vsnprintf( msg + strlen( msg ), sizeof( msg ) -1, fmt, ap );
|
||||||
|
|
||||||
|
/* print message */
|
||||||
|
printf( "%s ", msg );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
va_end( ap );
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool SyncError( AllData& alldata, bool& error, uint32_t root ) {
|
||||||
|
|
||||||
|
#ifdef SYNC_ERROR
|
||||||
|
|
||||||
|
if ( 1 < alldata.numRanks ) {
|
||||||
|
|
||||||
|
int buf= ( error ) ? 1 : 0;
|
||||||
|
|
||||||
|
/* either broadcast the error indicator from one rank (root)
|
||||||
|
or reduce them from all */
|
||||||
|
|
||||||
|
if ( root != (uint32_t)-1 ) {
|
||||||
|
|
||||||
|
MPI_Bcast( &buf, 1, MPI_INT, (int)root, MPI_COMM_WORLD );
|
||||||
|
|
||||||
|
error= ( 1 == buf );
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
int recv_buf;
|
||||||
|
|
||||||
|
MPI_Allreduce( &buf, &recv_buf, 1, MPI_INT, MPI_MAX,
|
||||||
|
MPI_COMM_WORLD );
|
||||||
|
|
||||||
|
error= ( 1 == recv_buf );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* SYNC_ERROR */
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint64_t Logi( uint64_t x, uint64_t b ) {
|
||||||
|
|
||||||
|
assert( b > 1 );
|
||||||
|
|
||||||
|
uint64_t c= 1;
|
||||||
|
uint64_t i= 0;
|
||||||
|
|
||||||
|
while( c <= x ) {
|
||||||
|
|
||||||
|
c*= b;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011.
|
||||||
|
Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OTFPROFILE_MPI_H
|
||||||
|
#define OTFPROFILE_MPI_H
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef HAVE_CONFIG_H
|
||||||
|
# include "config.h"
|
||||||
|
#endif /* HAVE_CONFIG_H */
|
||||||
|
|
||||||
|
#include "datastructs.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* print verbose message to stdout
|
||||||
|
(if root_only is true only rank 0 will print the message) */
|
||||||
|
void VerbosePrint( AllData& alldata, uint8_t level, bool root_only,
|
||||||
|
const char* fmt, ... );
|
||||||
|
|
||||||
|
/* synchronize error indicator with all worker ranks
|
||||||
|
(either broadcast from one rank (root) or reduce from all) */
|
||||||
|
bool SyncError( AllData& alldata, bool& error, uint32_t root= (uint32_t)-1 );
|
||||||
|
|
||||||
|
/* logarithm to base b for unsigned 64-bit integer x */
|
||||||
|
uint64_t Logi( uint64_t x, uint64_t b= 2 );
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* OTFPROFILE_MPI_H */
|
@ -8,8 +8,7 @@ using namespace std;
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "mpi.h"
|
#include "otfprofile-mpi.h"
|
||||||
|
|
||||||
#include "reduce_data.h"
|
#include "reduce_data.h"
|
||||||
|
|
||||||
|
|
||||||
@ -546,71 +545,103 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool reduceData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
bool ReduceData( AllData& alldata ) {
|
||||||
|
|
||||||
bool ret= true;
|
bool ret= true;
|
||||||
|
|
||||||
/* implement reduction myself because MPI and C++ STL don't play with each other */
|
if ( 1 < alldata.numRanks ) {
|
||||||
|
|
||||||
/* how many rounds until master has all the data? */
|
VerbosePrint( alldata, 1, true, "reducing data\n" );
|
||||||
uint32_t round= 1;
|
|
||||||
while ( round < num_ranks ) {
|
|
||||||
|
|
||||||
uint32_t peer= my_rank ^ round;
|
/* implement reduction myself because MPI and C++ STL don't play with
|
||||||
|
each other */
|
||||||
|
|
||||||
/* if peer rank is not there, do nothing but go on */
|
/* how many rounds until master has all the data? */
|
||||||
if ( peer >= num_ranks ) {
|
uint32_t num_rounds= Logi( alldata.numRanks ) -1;
|
||||||
|
uint32_t round_no= 0;
|
||||||
|
uint32_t round= 1;
|
||||||
|
while ( round < alldata.numRanks ) {
|
||||||
|
|
||||||
|
round_no++;
|
||||||
|
|
||||||
|
if ( 1 == alldata.params.verbose_level ) {
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 1, true, " round %u / %u\n",
|
||||||
|
round_no, num_rounds );
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t peer= alldata.myRank ^ round;
|
||||||
|
|
||||||
|
/* if peer rank is not there, do nothing but go on */
|
||||||
|
if ( peer >= alldata.numRanks ) {
|
||||||
|
|
||||||
|
round= round << 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* send to smaller peer, receive from larger one */
|
||||||
|
uint32_t sizes[10];
|
||||||
|
char* buffer;
|
||||||
|
|
||||||
|
if ( alldata.myRank < peer ) {
|
||||||
|
|
||||||
|
MPI_Status status;
|
||||||
|
|
||||||
|
MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD,
|
||||||
|
&status );
|
||||||
|
|
||||||
|
// DEBUG
|
||||||
|
//cout << " round " << round << " recv " << peer << "--> " <<
|
||||||
|
//my_rank << " with " <<
|
||||||
|
//sizes[0] << " bytes, " <<
|
||||||
|
//sizes[1] << ", " <<
|
||||||
|
//sizes[2] << ", " <<
|
||||||
|
//sizes[3] << ", " <<
|
||||||
|
//sizes[4] << "" << endl << flush;
|
||||||
|
|
||||||
|
buffer= prepare_worker_data( alldata, sizes );
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 2, false,
|
||||||
|
"round %u / %u: receiving %u bytes from rank %u\n",
|
||||||
|
round_no, num_rounds, sizes[0], peer );
|
||||||
|
|
||||||
|
MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD,
|
||||||
|
&status );
|
||||||
|
|
||||||
|
unpack_worker_data( alldata, sizes );
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
buffer= pack_worker_data( alldata, sizes );
|
||||||
|
|
||||||
|
// DEBUG
|
||||||
|
//cout << " round " << round << " send " << my_rank <<
|
||||||
|
//" --> " << peer << " with " <<
|
||||||
|
//sizes[0] << " bytes, " <<
|
||||||
|
//sizes[1] << ", " <<
|
||||||
|
//sizes[2] << ", " <<
|
||||||
|
//sizes[3] << ", " <<
|
||||||
|
//sizes[4] << "" << endl << flush;
|
||||||
|
|
||||||
|
VerbosePrint( alldata, 2, false,
|
||||||
|
"round %u / %u: sending %u bytes to rank %u\n",
|
||||||
|
round_no, num_rounds, sizes[0], peer );
|
||||||
|
|
||||||
|
MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
|
||||||
|
|
||||||
|
MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5,
|
||||||
|
MPI_COMM_WORLD );
|
||||||
|
|
||||||
|
/* every work has to send off its data at most once,
|
||||||
|
after that, break from the collective reduction operation */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
round= round << 1;
|
round= round << 1;
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* send to smaller peer, receive from larger one */
|
alldata.freePackBuffer();
|
||||||
uint32_t sizes[10];
|
|
||||||
char* buffer;
|
|
||||||
|
|
||||||
if ( my_rank < peer ) {
|
|
||||||
|
|
||||||
MPI_Status status;
|
|
||||||
|
|
||||||
MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD, &status );
|
|
||||||
|
|
||||||
// DEBUG
|
|
||||||
//cout << " round " << round << " recv " << peer << "--> "<< my_rank << " with " <<
|
|
||||||
//sizes[0] << " bytes, " <<
|
|
||||||
//sizes[1] << ", " <<
|
|
||||||
//sizes[2] << ", " <<
|
|
||||||
//sizes[3] << ", " <<
|
|
||||||
//sizes[4] << "" << endl << flush;
|
|
||||||
|
|
||||||
buffer= prepare_worker_data( alldata, sizes );
|
|
||||||
|
|
||||||
MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD, &status );
|
|
||||||
|
|
||||||
unpack_worker_data( alldata, sizes );
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
buffer= pack_worker_data( alldata, sizes );
|
|
||||||
|
|
||||||
// DEBUG
|
|
||||||
//cout << " round " << round << " send " << my_rank << " --> " << peer << " with " <<
|
|
||||||
//sizes[0] << " bytes, " <<
|
|
||||||
//sizes[1] << ", " <<
|
|
||||||
//sizes[2] << ", " <<
|
|
||||||
//sizes[3] << ", " <<
|
|
||||||
//sizes[4] << "" << endl << flush;
|
|
||||||
|
|
||||||
MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
|
|
||||||
|
|
||||||
MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD );
|
|
||||||
|
|
||||||
/* every work has to send off its data at most once,
|
|
||||||
after that, break from the collective reduction operation */
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
round= round << 1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,11 +6,12 @@
|
|||||||
#ifndef REDUCE_DATA_H
|
#ifndef REDUCE_DATA_H
|
||||||
#define REDUCE_DATA_H
|
#define REDUCE_DATA_H
|
||||||
|
|
||||||
|
|
||||||
#include "datastructs.h"
|
#include "datastructs.h"
|
||||||
|
|
||||||
|
|
||||||
/* reduce the data to the master process */
|
/* reduce the data to the master process */
|
||||||
bool reduceData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
|
bool ReduceData( AllData& alldata );
|
||||||
|
|
||||||
|
|
||||||
#endif /* REDUCE_DATA_H */
|
#endif /* REDUCE_DATA_H */
|
||||||
|
@ -8,13 +8,10 @@ using namespace std;
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "mpi.h"
|
|
||||||
|
|
||||||
#include "summarize_data.h"
|
#include "summarize_data.h"
|
||||||
|
|
||||||
|
|
||||||
static void get_clustering( uint32_t my_rank, uint32_t num_ranks,
|
static void get_clustering( AllData& alldata ) {
|
||||||
AllData& alldata ) {
|
|
||||||
|
|
||||||
uint32_t r_processes= alldata.allProcesses.size();
|
uint32_t r_processes= alldata.allProcesses.size();
|
||||||
uint32_t r_clusters= Clustering::MAX_CLUSTERS;
|
uint32_t r_clusters= Clustering::MAX_CLUSTERS;
|
||||||
@ -44,8 +41,7 @@ static void get_clustering( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
|
static void share_clustering( AllData& alldata ) {
|
||||||
AllData& alldata ) {
|
|
||||||
|
|
||||||
MPI_Barrier( MPI_COMM_WORLD );
|
MPI_Barrier( MPI_COMM_WORLD );
|
||||||
|
|
||||||
@ -53,7 +49,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
int buffer_size= 0;
|
int buffer_size= 0;
|
||||||
int buffer_pos= 0;
|
int buffer_pos= 0;
|
||||||
|
|
||||||
if ( my_rank == 0 ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
/* get size needed to send clustering information to workers */
|
/* get size needed to send clustering information to workers */
|
||||||
|
|
||||||
@ -86,7 +82,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
|
|
||||||
/* pack clustering information to buffer */
|
/* pack clustering information to buffer */
|
||||||
|
|
||||||
if ( my_rank == 0 ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
/* alldata.clustering.clustersToProcesses.size() */
|
/* alldata.clustering.clustersToProcesses.size() */
|
||||||
uint64_t clust_proc_map_size=
|
uint64_t clust_proc_map_size=
|
||||||
@ -128,7 +124,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
|
|
||||||
/* unpack clustering information from buffer */
|
/* unpack clustering information from buffer */
|
||||||
|
|
||||||
if ( my_rank != 0 ) {
|
if ( 0 != alldata.myRank ) {
|
||||||
|
|
||||||
/* alldata.clustering.clustersToProcesses.size() */
|
/* alldata.clustering.clustersToProcesses.size() */
|
||||||
uint64_t clust_proc_map_size;
|
uint64_t clust_proc_map_size;
|
||||||
@ -168,29 +164,29 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
bool SummarizeData( AllData& alldata ) {
|
||||||
|
|
||||||
bool ret= true;
|
bool error= false;
|
||||||
|
|
||||||
/* rank 0 gets clustering information */
|
/* rank 0 gets clustering information */
|
||||||
|
|
||||||
if ( my_rank == 0 ) {
|
if ( 0 == alldata.myRank ) {
|
||||||
|
|
||||||
get_clustering( my_rank, num_ranks, alldata );
|
get_clustering( alldata );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* share clustering information to workers */
|
/* share clustering information to workers */
|
||||||
|
|
||||||
if ( num_ranks > 1 ) {
|
if ( 1 < alldata.numRanks ) {
|
||||||
|
|
||||||
share_clustering( my_rank, num_ranks, alldata );
|
share_clustering( alldata );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* macro to set min, max to sum before summarizing */
|
/* macro to set min, max to sum before summarizing */
|
||||||
# define MINMAX2SUM(v) \
|
# define MINMAX2SUM(v) \
|
||||||
if( (v).cnt != 0 ) { \
|
if( 0 != (v).cnt ) { \
|
||||||
(v).cnt = 1; \
|
(v).cnt = 1; \
|
||||||
(v).min= (v).max= (v).sum; \
|
(v).min= (v).max= (v).sum; \
|
||||||
} else { \
|
} else { \
|
||||||
@ -243,9 +239,9 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
|||||||
if ( alldata.clustering.enabled ) {
|
if ( alldata.clustering.enabled ) {
|
||||||
|
|
||||||
cluster_a= alldata.clustering.process2cluster( it->first.a );
|
cluster_a= alldata.clustering.process2cluster( it->first.a );
|
||||||
assert( cluster_a != 0 );
|
assert( 0 != cluster_a );
|
||||||
cluster_b= alldata.clustering.process2cluster( it->first.b );
|
cluster_b= alldata.clustering.process2cluster( it->first.b );
|
||||||
assert( cluster_b != 0 );
|
assert( 0 != cluster_b );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -273,7 +269,7 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
|||||||
if ( alldata.clustering.enabled ) {
|
if ( alldata.clustering.enabled ) {
|
||||||
|
|
||||||
cluster= alldata.clustering.process2cluster( it->first );
|
cluster= alldata.clustering.process2cluster( it->first );
|
||||||
assert( cluster != 0 );
|
assert( 0 != cluster );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,7 +298,7 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
|||||||
if ( alldata.clustering.enabled ) {
|
if ( alldata.clustering.enabled ) {
|
||||||
|
|
||||||
cluster= alldata.clustering.process2cluster( it->first.b );
|
cluster= alldata.clustering.process2cluster( it->first.b );
|
||||||
assert( cluster != 0 );
|
assert( 0 != cluster );
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -319,5 +315,5 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
|
|||||||
alldata.collectiveMapPerRank.clear();
|
alldata.collectiveMapPerRank.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return !error;
|
||||||
}
|
}
|
||||||
|
@ -6,11 +6,12 @@
|
|||||||
#ifndef SUMMARIZE_DATA_H
|
#ifndef SUMMARIZE_DATA_H
|
||||||
#define SUMMARIZE_DATA_H
|
#define SUMMARIZE_DATA_H
|
||||||
|
|
||||||
|
|
||||||
#include "datastructs.h"
|
#include "datastructs.h"
|
||||||
|
|
||||||
|
|
||||||
/* summarize the data for all trace processes on the current worker */
|
/* summarize the data for all trace processes on the current worker */
|
||||||
bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
|
bool SummarizeData( AllData& alldata );
|
||||||
|
|
||||||
|
|
||||||
#endif /* SUMMARIZE_DATA_H */
|
#endif /* SUMMARIZE_DATA_H */
|
||||||
|
@ -812,10 +812,12 @@ parseCommandLine( int argc, char ** argv )
|
|||||||
Params.droprecvs = true;
|
Params.droprecvs = true;
|
||||||
}
|
}
|
||||||
#endif // VT_UNIFY_HOOKS_MSGMATCH
|
#endif // VT_UNIFY_HOOKS_MSGMATCH
|
||||||
|
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||||
else if( strcmp( argv[i], "--nocompress" ) == 0 )
|
else if( strcmp( argv[i], "--nocompress" ) == 0 )
|
||||||
{
|
{
|
||||||
Params.docompress = false;
|
Params.docompress = false;
|
||||||
}
|
}
|
||||||
|
#endif // HAVE_ZLIB
|
||||||
else if( strcmp( argv[i], "-k" ) == 0
|
else if( strcmp( argv[i], "-k" ) == 0
|
||||||
|| strcmp( argv[i], "--keeplocal" ) == 0 )
|
|| strcmp( argv[i], "--keeplocal" ) == 0 )
|
||||||
{
|
{
|
||||||
@ -1055,8 +1057,10 @@ showUsage()
|
|||||||
<< " -v, --verbose Increase output verbosity." << std::endl
|
<< " -v, --verbose Increase output verbosity." << std::endl
|
||||||
<< " (can be used more than once)" << std::endl
|
<< " (can be used more than once)" << std::endl
|
||||||
<< std::endl
|
<< std::endl
|
||||||
|
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||||
<< " --nocompress Don't compress output trace files." << std::endl
|
<< " --nocompress Don't compress output trace files." << std::endl
|
||||||
<< std::endl
|
<< std::endl
|
||||||
|
#endif // HAVE_ZLIB
|
||||||
#ifdef VT_UNIFY_HOOKS_MSGMATCH
|
#ifdef VT_UNIFY_HOOKS_MSGMATCH
|
||||||
<< " --nomsgmatch Don't match messages." << std::endl
|
<< " --nomsgmatch Don't match messages." << std::endl
|
||||||
<< std::endl
|
<< std::endl
|
||||||
|
@ -63,10 +63,19 @@
|
|||||||
struct ParamsS
|
struct ParamsS
|
||||||
{
|
{
|
||||||
ParamsS()
|
ParamsS()
|
||||||
: verbose_level( 0 ), docompress( true ), doclean( true ),
|
: verbose_level( 0 ), docompress( false ), doclean( true ),
|
||||||
showusage( false ), showversion( false ), showprogress( false ),
|
showusage( false ), showversion( false ), showprogress( false ),
|
||||||
bequiet( false ), domsgmatch( true ), droprecvs( false ),
|
bequiet( false ), domsgmatch( false ), droprecvs( false ),
|
||||||
prof_sort_flags( 0x22 ) {}
|
prof_sort_flags( 0x22 )
|
||||||
|
{
|
||||||
|
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||||
|
docompress = true;
|
||||||
|
#endif // HAVE_ZLIB
|
||||||
|
|
||||||
|
#ifdef VT_UNIFY_HOOKS_MSGMATCH
|
||||||
|
domsgmatch = true;
|
||||||
|
#endif // VT_UNIFY_HOOKS_MSGMATCH
|
||||||
|
}
|
||||||
|
|
||||||
std::string in_file_prefix; // input trace file prefix
|
std::string in_file_prefix; // input trace file prefix
|
||||||
std::string out_file_prefix; // output trace file prefix
|
std::string out_file_prefix; // output trace file prefix
|
||||||
|
@ -687,12 +687,28 @@ Wrapper::parseCommandLine( int argc, char ** argv )
|
|||||||
|| arg.compare( "-fopenmp" ) == 0
|
|| arg.compare( "-fopenmp" ) == 0
|
||||||
|| arg.compare( "-Popenmp" ) == 0
|
|| arg.compare( "-Popenmp" ) == 0
|
||||||
|| arg.compare( "-xopenmp" ) == 0
|
|| arg.compare( "-xopenmp" ) == 0
|
||||||
|| arg.compare( "-mp" ) == 0
|
|| arg.compare( "-mp" ) == 0 )
|
||||||
|| arg.compare( "-qsmp=omp" ) == 0 )
|
|
||||||
{
|
{
|
||||||
m_pConfig->setUsesThreads( true );
|
m_pConfig->setUsesThreads( true );
|
||||||
m_pConfig->setUsesOpenMP( true );
|
m_pConfig->setUsesOpenMP( true );
|
||||||
}
|
}
|
||||||
|
else if( arg.length() > 6 && arg.compare( 0, 6, "-qsmp=" ) == 0 )
|
||||||
|
{
|
||||||
|
char carg[128];
|
||||||
|
strncpy( carg, arg.substr(6).c_str(), sizeof( carg ) - 1 );
|
||||||
|
carg[sizeof(carg) - 1] = '\0';
|
||||||
|
|
||||||
|
char * token = strtok( carg, ":" );
|
||||||
|
do
|
||||||
|
{
|
||||||
|
if( strcmp( token, "omp" ) == 0 )
|
||||||
|
{
|
||||||
|
m_pConfig->setUsesThreads( true );
|
||||||
|
m_pConfig->setUsesOpenMP( true );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while( ( token = strtok( 0, ":" ) ) );
|
||||||
|
}
|
||||||
//
|
//
|
||||||
// nvcc's pthread/openmp flag
|
// nvcc's pthread/openmp flag
|
||||||
//
|
//
|
||||||
|
@ -1222,6 +1222,7 @@ int vt_env_max_threads()
|
|||||||
|
|
||||||
int vt_env_compression()
|
int vt_env_compression()
|
||||||
{
|
{
|
||||||
|
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||||
static int compression = -1;
|
static int compression = -1;
|
||||||
char* tmp;
|
char* tmp;
|
||||||
|
|
||||||
@ -1238,6 +1239,9 @@ int vt_env_compression()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return compression;
|
return compression;
|
||||||
|
#else /* HAVE_ZLIB */
|
||||||
|
return 0;
|
||||||
|
#endif /* HAVE_ZLIB */
|
||||||
}
|
}
|
||||||
|
|
||||||
int vt_env_java_native()
|
int vt_env_java_native()
|
||||||
|
@ -796,7 +796,7 @@ static void unify_traces(void)
|
|||||||
|
|
||||||
/* compose unify arguments */
|
/* compose unify arguments */
|
||||||
|
|
||||||
argv = (char**)calloc(10 + vt_env_verbose(), sizeof(char*));
|
argv = (char**)calloc(10 + vt_env_verbose()+1, sizeof(char*));
|
||||||
if (argv == NULL) vt_error();
|
if (argv == NULL) vt_error();
|
||||||
|
|
||||||
argv[0] = NULL;
|
argv[0] = NULL;
|
||||||
@ -816,12 +816,14 @@ static void unify_traces(void)
|
|||||||
}
|
}
|
||||||
argc++;
|
argc++;
|
||||||
|
|
||||||
|
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||||
if (!vt_env_compression()) argv[argc++] = strdup("--nocompress");
|
if (!vt_env_compression()) argv[argc++] = strdup("--nocompress");
|
||||||
|
#endif /* HAVE_ZLIB */
|
||||||
if (!vt_env_do_clean()) argv[argc++] = strdup("-k");
|
if (!vt_env_do_clean()) argv[argc++] = strdup("-k");
|
||||||
if (vt_env_verbose() == 0) argv[argc++] = strdup("-q");
|
if (vt_env_verbose() == 0) argv[argc++] = strdup("-q");
|
||||||
else if (vt_env_verbose() >= 2)
|
else if (vt_env_verbose() >= 2)
|
||||||
{
|
{
|
||||||
for (i=1;i<vt_env_verbose();i++)
|
for (i=0;i<vt_env_verbose()+1;i++)
|
||||||
argv[argc++] = strdup("-v");
|
argv[argc++] = strdup("-v");
|
||||||
argv[argc++] = strdup("-p");
|
argv[argc++] = strdup("-p");
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user