diff --git a/ompi/contrib/vt/vt/ChangeLog b/ompi/contrib/vt/vt/ChangeLog
index 71e5093e7a..4bef4d85d5 100644
--- a/ompi/contrib/vt/vt/ChangeLog
+++ b/ompi/contrib/vt/vt/ChangeLog
@@ -3,6 +3,12 @@
(see extlib/otf/ChangeLog)
- improved filtering of CUDA kernels
- fixed unification of local process group definitions
+ - fixed wrapper generation for MPI implementations which don't support
+ the MPI-2 standard
+ - fixed faulty cleanup of temporary files in vtunify which occurred if
+ VT is configured without trace compression support
+ - fixed detection of OpenMP flag '-qsmp=*:omp:*' in the compiler
+ wrappers
5.11
- updated version of internal OTF to 1.9sawfish
diff --git a/ompi/contrib/vt/vt/config/m4/acinclude.zlib.m4 b/ompi/contrib/vt/vt/config/m4/acinclude.zlib.m4
index 65d325fbeb..f024ecff88 100644
--- a/ompi/contrib/vt/vt/config/m4/acinclude.zlib.m4
+++ b/ompi/contrib/vt/vt/config/m4/acinclude.zlib.m4
@@ -63,7 +63,10 @@ AC_DEFUN([ACVT_ZLIB],
])
AS_IF([test x"$ZLIBLIB" != x -a x"$zlib_error" = "xno"],
- [have_zlib="yes"])
+ [
+ have_zlib="yes"
+ AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the ZLIB.])
+ ])
AS_IF([test x"$force_zlib" = "xyes" -a x"$zlib_error" = "xyes"],
[exit 1])
diff --git a/ompi/contrib/vt/vt/config/mpigen/mk_c_wrapper.sh.in b/ompi/contrib/vt/vt/config/mpigen/mk_c_wrapper.sh.in
index f3a5b3c32f..f3a7111d80 100644
--- a/ompi/contrib/vt/vt/config/mpigen/mk_c_wrapper.sh.in
+++ b/ompi/contrib/vt/vt/config/mpigen/mk_c_wrapper.sh.in
@@ -11,8 +11,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
-have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
-if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
+have_mpi2=0
+if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
have_mpi2=1
fi
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
diff --git a/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in b/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in
index 0ec7a3566a..15c0505060 100644
--- a/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in
+++ b/ompi/contrib/vt/vt/config/mpigen/mk_fortran_wrapper.sh.in
@@ -23,8 +23,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
-have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
-if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
+have_mpi2=0
+if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
have_mpi2=1
fi
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
diff --git a/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in b/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in
index 88df1db0b1..8a59b5149a 100644
--- a/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in
+++ b/ompi/contrib/vt/vt/config/mpigen/mk_registry.sh.in
@@ -11,7 +11,8 @@ have_mpi2_1sided=@VT_MPIGEN_HAVE_MPI2_1SIDED@
have_mpi2_extcoll=@VT_MPIGEN_HAVE_MPI2_EXTCOLL@
have_mpi2_file=@VT_MPIGEN_HAVE_MPI2_IO@
have_mpi2_proc=0 #@VT_MPIGEN_HAVE_MPI2_PROC@
-if [ $have_mpi2_thread -o $have_mpi2_1sided -o $have_mpi2_extcoll -o $have_mpi2_proc ] ; then
+have_mpi2=0
+if [ $have_mpi2_thread = 1 -o $have_mpi2_1sided = 1 -o $have_mpi2_extcoll = 1 -o $have_mpi2_proc = 1 ] ; then
have_mpi2=1
fi
mpi2_src1=""; if [ $have_mpi2 = 1 ] ; then mpi2_src1="mpi2_standard.h"; fi
diff --git a/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h b/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h
index 099bfadd73..728a5b6cea 100644
--- a/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h
+++ b/ompi/contrib/vt/vt/config/mpigen/mpi_standard.h
@@ -40,8 +40,8 @@ VT_MPI_INT MPI_Address(void* location, MPI_Aint* address_CLASS_SINGLE_OUT);
VT_MPI_INT MPI_Allgather(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Allgatherv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* displs, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Allreduce(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Alltoall(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
-VT_MPI_INT MPI_Alltoallv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Alltoall(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
+VT_MPI_INT MPI_Alltoallv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* sdispls, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT* recvcounts, VT_MPI_INT* rdispls, MPI_Datatype recvtype, MPI_Comm comm); /*COLL_ALL2ALL*/
VT_MPI_INT MPI_Attr_delete(MPI_Comm comm, VT_MPI_INT keyval);
VT_MPI_INT MPI_Attr_get(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val, VT_MPI_INT* flag);
VT_MPI_INT MPI_Attr_put(MPI_Comm comm, VT_MPI_INT keyval, void* attribute_val);
@@ -129,8 +129,8 @@ VT_MPI_INT MPI_Request_free(MPI_Request* request_CLASS_SINGLE_IO);
VT_MPI_INT MPI_Rsend(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
VT_MPI_INT MPI_Rsend_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
VT_MPI_INT MPI_Scan(void* sendbuf_CLASS_BUFFER_IN_PLACE, void* recvbuf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm); /*COLL_OTHER*/
-VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
-VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
+VT_MPI_INT MPI_Scatter(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
+VT_MPI_INT MPI_Scatterv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT* sendcounts, VT_MPI_INT* displs, MPI_Datatype sendtype, void* recvbuf_CLASS_BUFFER_IN_PLACE, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT root, MPI_Comm comm); /*COLL_ONE2ALL*/
VT_MPI_INT MPI_Send(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm);
VT_MPI_INT MPI_Send_init(void* buf_CLASS_BUFFER, VT_MPI_INT count, MPI_Datatype datatype, VT_MPI_INT dest, VT_MPI_INT tag, MPI_Comm comm, MPI_Request* request_CLASS_SINGLE_OUT);
VT_MPI_INT MPI_Sendrecv(void* sendbuf_CLASS_BUFFER, VT_MPI_INT sendcount, MPI_Datatype sendtype, VT_MPI_INT dest, VT_MPI_INT sendtag, void* recvbuf_CLASS_BUFFER, VT_MPI_INT recvcount, MPI_Datatype recvtype, VT_MPI_INT source, VT_MPI_INT recvtag, MPI_Comm comm, MPI_Status* status_CLASS_SINGLE_OUT);
diff --git a/ompi/contrib/vt/vt/doc/UserManual.html b/ompi/contrib/vt/vt/doc/UserManual.html
index 38295f73ee..fdafaf2ba9 100644
--- a/ompi/contrib/vt/vt/doc/UserManual.html
+++ b/ompi/contrib/vt/vt/doc/UserManual.html
@@ -66,7 +66,7 @@ BODY { font-family: sans-serif; }
-VampirTrace 5.11 User Manual
+VampirTrace 5.11.1 User Manual
@@ -252,20 +252,20 @@ OpenMP events, and performance counters.
After a successful tracing run, VampirTrace writes all collected data to a
trace file in the Open Trace Format (OTF).
As a result, the information is available for post-mortem analysis and
visualization by various tools.
Most notably, VampirTrace provides the input data for the Vampir analysis
and visualization tool.
VampirTrace is included in OpenMPI 1.3 and later versions.
If not disabled explicitly, VampirTrace is built automatically when installing
OpenMPI.
@@ -1083,7 +1083,7 @@ in a single file, that
The names in between may contain wildcards as ``?'', ``*', and ``#'', each entry gets a new line.
The lists end with END[_FILE]_<INCLUDE|EXCLUDE>_LIST. For further information on selective
profiling have a look at the TAU documentation.
To announce the file through the compiler wrapper use the option -vt:tau:
@@ -1100,7 +1100,7 @@ Binary Instrumentation Using Dyninst
The option -vt:inst dyninst is used with the compiler wrapper to
instrument the application during runtime (binary instrumentation), by using
Dyninst.
Recompiling is not necessary for this kind of instrumentation,
but relinking:
@@ -1820,7 +1820,7 @@ for the enhanced timer synchronization:
- CLAPACK
- AMD ACML
@@ -2190,6 +2190,14 @@ Controls how VampirTrace handles synchronizing CUDA API calls, especially
introduces a minimal overhead but increases timer precision and prevents
flushes elsewhere in the trace.
+
+
+
- VT_CUDATRACE_ERROR (default: no)
+
+Print out an error message and exit the program, if a CUDA wrapper call
+ does not return 'cudaSuccess'. The default is just a warning message
+ without program exit.
+
- VT_CUPTI_METRICS (default: "")
@@ -3373,21 +3381,21 @@ by the Linux 2.6 kernel are shown in the table.
Footnotes
-- ... (OTF)... (OTF)
- http://www.tu-dresden.de/zih/otf
-- ... tool ... tool
- http://www.vampir.eu
-- ...
+
- ...
Open MPI
@@ -3395,14 +3403,14 @@ Open MPI http://www.open-mpi.org/faq/?category=vampirtrace
-- ... documentation ... documentation
- http://www.cs.uoregon.edu/research/tau/docs/newguide/ch03s03.html#ManualSelectiveProfiling
-- ...
+
- ...
Dyninst
@@ -3410,7 +3418,7 @@ Dyninst http://www.dyninst.org
-- ... CLAPACK... CLAPACK
- = 1.4])
+
+ cat << EOF >conftest.tex
+\documentclass[[a4paper,10pt]]{article}
+\nonstopmode
+\usepackage{pgfplots}
+\begin{document}
+\pgfplotstableread{
+col1 col2
+1 2
+}\testtable
+test
+\end{document}
+EOF
+
+ $PDFTEX conftest.tex >/dev/null 2>&1
+ if test $? -eq 0; then
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_PGFPLOTS_1_4], [1], [Define to 1 if you have the TeX package PGFPLOTS version >=1.4.])
+ else
+ AC_MSG_RESULT([no])
+ fi
+
+ rm -f conftest.*
+
+ fi
+])
diff --git a/ompi/contrib/vt/vt/extlib/otf/configure.in b/ompi/contrib/vt/vt/extlib/otf/configure.in
index 3b235a03e3..0c02ad6de5 100644
--- a/ompi/contrib/vt/vt/extlib/otf/configure.in
+++ b/ompi/contrib/vt/vt/extlib/otf/configure.in
@@ -69,6 +69,9 @@ CHECK_SWIG_PYTHON
if test x"$force_swig_python" = "xyes" -a x"$swig_python_error" = "xyes"; then exit 1; fi
AM_CONDITIONAL(AMHAVESWIGPYTHON, test x"$have_swig_python" = xyes)
+# Checks for pdflatex and PGFPLOTS needed for otfprofile-mpi to convert TeX output to PDF
+CHECK_PDFLATEX_PGFPLOTS
+
WITH_DEBUG
WITH_VERBOSE
diff --git a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf
index 2af66cdbfa..c51c8cf290 100644
Binary files a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf and b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otfprofile.pdf differ
diff --git a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf
index 9117e7e43c..477b242f25 100644
Binary files a/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf and b/ompi/contrib/vt/vt/extlib/otf/docu/tools/otftools.pdf differ
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/Makefile.am b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/Makefile.am
index 64b7c55dc8..44723403af 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/Makefile.am
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/Makefile.am
@@ -17,6 +17,7 @@ otfprofile_mpi_SOURCES = \
collect_data.h \
create_latex.h \
datastructs.h \
+ otfprofile-mpi.h \
reduce_data.h \
summarize_data.h \
collect_data.cpp \
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.cpp
index 331e0c591f..e1fa00bdc0 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.cpp
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.cpp
@@ -15,26 +15,245 @@ using namespace std;
#include "otf.h"
#include "otfaux.h"
-#include "mpi.h"
-
#include "collect_data.h"
+#include "otfprofile-mpi.h"
-/* logarithm to base b for unsigned 64-bit integer x */
-static uint64_t logi( uint64_t x, uint64_t b= 2 ) {
+static void prepare_progress( AllData& alldata, uint64_t max_bytes ) {
- assert( b > 1 );
+ Progress& progress= alldata.progress;
- uint64_t c= 1;
- uint64_t i= 0;
+ progress.cur_bytes= 0;
+ progress.max_bytes= max_bytes;
+ progress.ranks_left= alldata.numRanks -1;
- while( c <= x ) {
+ if ( 1 < alldata.numRanks ) {
- c*= b;
- i++;
+ /* reduce max. bytes to rank 0 */
+ uint64_t sum_max_bytes;
+ MPI_Reduce( &max_bytes, &sum_max_bytes, 1, MPI_LONG_LONG_INT, MPI_SUM,
+ 0, MPI_COMM_WORLD );
+
+ if ( 0 == alldata.myRank ) {
+
+ progress.max_bytes= sum_max_bytes;
+
+ progress.recv_buffers= new uint64_t[alldata.numRanks-1];
+ assert( progress.recv_buffers );
+ progress.recv_requests= new MPI_Request[alldata.numRanks-1];
+ assert( progress.recv_requests );
+ progress.recv_statuses= new MPI_Status[alldata.numRanks-1];
+ assert( progress.recv_statuses );
+ progress.recv_indices= new int[alldata.numRanks-1];
+ assert( progress.recv_indices );
+
+ /* initialize array of current bytes read and start
+ persistent communication */
+
+ for ( uint32_t i= 0; i < alldata.numRanks; i++ ) {
+
+ if ( 0 < i ) {
+
+ /* create persistent request handle */
+ MPI_Recv_init( &(progress.recv_buffers[i-1]), 1,
+ MPI_LONG_LONG_INT, i, Progress::MSG_TAG,
+ MPI_COMM_WORLD,
+ &(progress.recv_requests[i-1]) );
+
+ /* start persistent communication */
+ MPI_Start( &(progress.recv_requests[i-1]) );
+
+ }
+ }
+
+ } else { /* 0 != my_rank */
+
+ /* initialize request handle for sending progress to rank 0 */
+ progress.send_request = MPI_REQUEST_NULL;
+
+ }
+
+ /* block until all worker ranks have reached this point to avoid that the
+ progress does a big jump at beginning */
+ MPI_Barrier( MPI_COMM_WORLD );
}
- return i;
+ if ( 0 == alldata.myRank ) {
+
+ /* show initial progress */
+ printf( "%7.2f %%\r", 0.0 );
+ fflush( stdout );
+ }
+
+}
+
+
+static void update_progress( AllData& alldata, uint64_t delta_bytes,
+ bool wait= false ) {
+
+ Progress& progress= alldata.progress;
+
+ if ( 0 == alldata.myRank ) {
+
+ progress.cur_bytes += delta_bytes;
+
+ } else {
+
+ progress.cur_bytes= delta_bytes;
+ }
+
+ if ( 1 < alldata.numRanks ) {
+
+ if ( 0 == alldata.myRank ) {
+
+ /* get current bytes read from all worker ranks */
+
+ int out_count;
+
+ /* either wait or test for one or more updates from worker ranks */
+
+ if ( wait )
+ {
+
+ MPI_Waitsome( alldata.numRanks - 1, progress.recv_requests,
+ &out_count, progress.recv_indices,
+ progress.recv_statuses );
+
+ } else {
+
+ MPI_Testsome( alldata.numRanks - 1, progress.recv_requests,
+ &out_count, progress.recv_indices,
+ progress.recv_statuses );
+
+ }
+
+ if ( MPI_UNDEFINED != out_count ) {
+
+ int index;
+ uint32_t i;
+
+ for ( i= 0; i < (uint32_t) out_count; i++ ) {
+
+ index= progress.recv_indices[i];
+
+ /* worker rank (index+1) is finished? */
+ if ( (uint64_t)-1 != progress.recv_buffers[index] ) {
+
+ /* update rank's current bytes read and restart
+ persistent communication */
+
+ progress.cur_bytes += progress.recv_buffers[index];
+
+ MPI_Start( &(progress.recv_requests[progress.recv_indices[i]]) );
+
+ } else {
+
+ /* this rank is finished */
+ progress.ranks_left -= 1;
+ }
+ }
+ }
+
+ } else { /* 0 != my_rank */
+
+ int do_send = 1;
+ MPI_Status status;
+
+ /* send only if it's the first send or the request handle isn't
+ currently in use */
+
+ if ( MPI_REQUEST_NULL != progress.send_request ) {
+
+ MPI_Test( &(progress.send_request), &do_send, &status );
+
+ }
+
+ if ( do_send ) {
+
+ MPI_Issend( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
+ Progress::MSG_TAG, MPI_COMM_WORLD,
+ &progress.send_request );
+ }
+
+ }
+
+ }
+
+ if ( 0 == alldata.myRank ) {
+
+ /* show progress */
+
+ double percent =
+ 100.0 * (double) progress.cur_bytes / (double) progress.max_bytes;
+
+ static const char signs[2]= { '.',' ' };
+ static int signi= 0;
+
+ printf( "%7.2f %% %c\r", percent, signs[signi] );
+ fflush( stdout );
+
+ signi^= 1;
+
+ }
+}
+
+
+static void finish_progress( AllData& alldata ) {
+
+ Progress& progress= alldata.progress;
+
+ if ( 1 < alldata.numRanks ) {
+
+ if ( 0 == alldata.myRank ) {
+
+ /* update progress until all worker ranks are
+ finished / all bytes are read */
+
+ while ( 0 < progress.ranks_left ) {
+
+ update_progress( alldata, 0, true );
+ }
+
+ } else { /* 0 != my_rank */
+
+ MPI_Status status;
+ MPI_Wait( &(progress.send_request), &status );
+
+ /* send last current bytes read to rank 0 */
+ MPI_Send( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
+ Progress::MSG_TAG, MPI_COMM_WORLD );
+
+ /* send marker (-1) to rank 0 which indicates that this worker rank
+ is finished */
+
+ progress.cur_bytes = (uint64_t) -1;
+ MPI_Send( &(progress.cur_bytes), 1, MPI_LONG_LONG_INT, 0,
+ Progress::MSG_TAG, MPI_COMM_WORLD );
+
+ }
+
+ }
+
+ if ( 0 == alldata.myRank ) {
+
+ /* show final progress */
+ printf( "%7.2f %% done\n", 100.0 );
+
+ }
+
+ if( 1 < alldata.numRanks && 0 == alldata.myRank ) {
+
+ /* ensure that all requests are inactive before freeing memory */
+ MPI_Waitall( alldata.numRanks - 1, progress.recv_requests,
+ progress.recv_statuses );
+
+ /* free memory */
+ delete [] progress.recv_buffers;
+ delete [] progress.recv_requests;
+ delete [] progress.recv_statuses;
+ delete [] progress.recv_indices;
+
+ }
}
@@ -80,12 +299,39 @@ static int handle_def_comment( void* fha, uint32_t stream, const char* comment,
AllData* alldata= (AllData*) fha;
+ /* add new-line between each comment record */
if ( 0 < alldata->comments.length() ) {
alldata->comments+= "\n";
}
- alldata->comments+= comment;
+
+
+ /* wrap lines after 80 characters */
+
+ const string::size_type LINE_WRAP= 80;
+
+ string tmp= comment;
+
+ do {
+
+ if ( tmp.length() <= LINE_WRAP ) {
+
+ alldata->comments+= tmp;
+ break;
+
+ } else {
+
+ string::size_type next_wrap=
+ tmp.find_last_of( " .!?:;,", LINE_WRAP -1 );
+ next_wrap= ( string::npos == next_wrap ) ? LINE_WRAP : next_wrap +1;
+
+ alldata->comments+= tmp.substr( 0, next_wrap ) + '\n';
+ tmp= tmp.substr( next_wrap );
+
+ }
+
+ } while( 0 != tmp.length() );
return OTF_RETURN_OK;
}
@@ -315,8 +561,8 @@ static int handle_send( void* fha, uint64_t time, uint32_t sender,
if ( 0 != alldata->recvTimeKey ) {
uint64_t recv_time;
- if ( OTF_KeyValueList_getUint64( kvlist, alldata->recvTimeKey,
- &recv_time ) == 0 ) {
+ if ( 0 == OTF_KeyValueList_getUint64( kvlist, alldata->recvTimeKey,
+ &recv_time ) ) {
duration= (double) ( recv_time - time );
@@ -331,11 +577,11 @@ static int handle_send( void* fha, uint64_t time, uint32_t sender,
if ( length > 0 && duration > 0.0 ) {
uint64_t speed_bin=
- logi( (uint64_t)(
+ Logi( (uint64_t)(
( (double)length * (double)alldata->timerResolution ) /
duration ), MessageSpeedData::BIN_LOG_BASE );
- uint64_t length_bin= logi( length, MessageSpeedData::BIN_LOG_BASE );
+ uint64_t length_bin= Logi( length, MessageSpeedData::BIN_LOG_BASE );
alldata->messageSpeedMapPerLength[ Pair( speed_bin, length_bin ) ]
.add( 1 );
@@ -444,7 +690,18 @@ static int handle_function_summary( void* fha, uint64_t time, uint32_t func,
/* add/overwrite function statistics */
FunctionData tmp;
- tmp.add( count, exclTime, inclTime );
+
+ tmp.count.cnt = tmp.count.sum = count;
+ tmp.count.min = tmp.count.max = 0;
+
+ tmp.excl_time.cnt = count;
+ tmp.excl_time.sum = exclTime;
+ tmp.excl_time.min = tmp.excl_time.max = 0;
+
+ tmp.incl_time.cnt = count;
+ tmp.incl_time.sum = inclTime;
+ tmp.incl_time.min = tmp.incl_time.max = 0;
+
alldata->functionMapPerRank[ Pair( func, process ) ]= tmp;
return OTF_RETURN_OK;
@@ -550,7 +807,9 @@ static int handle_collop_summary( void* fha, uint64_t time, uint32_t process,
}
-static void read_definitions( OTF_Reader* reader, AllData& alldata ) {
+static bool read_definitions( AllData& alldata, OTF_Reader* reader ) {
+
+ bool error= false;
/* open OTF handler array */
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
@@ -608,16 +867,22 @@ static void read_definitions( OTF_Reader* reader, AllData& alldata ) {
OTF_DEFKEYVALUE_RECORD );
/* read definitions */
- uint64_t defs_read_ret= OTF_Reader_readDefinitions( reader, handlers );
- assert( OTF_READ_ERROR != defs_read_ret );
+ uint64_t read_ret= OTF_Reader_readDefinitions( reader, handlers );
+ if ( OTF_READ_ERROR == read_ret ) {
+
+ cerr << "ERROR: Could not read definitions." << endl;
+ error= true;
+
+ }
/* close OTF handler array */
OTF_HandlerArray_close( handlers );
+
+ return !error;
}
-static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
- AllData& alldata ) {
+static void share_definitions( AllData& alldata ) {
MPI_Barrier( MPI_COMM_WORLD );
@@ -627,7 +892,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
/* get size needed to send definitions to workers */
- if ( my_rank == 0 ) {
+ if ( 0 == alldata.myRank ) {
MPI_Pack_size( 1 + alldata.collectiveOperationsToClasses.size() * 2 +
1 + alldata.countersOfInterest.size() +
@@ -646,7 +911,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
/* pack definitions to buffer */
- if ( my_rank == 0 ) {
+ if ( 0 == alldata.myRank ) {
/* collectiveOperationsToClasses.size() */
uint64_t collop_classes_map_size=
@@ -701,7 +966,7 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
/* unpack definitions from buffer */
- if ( my_rank != 0 ) {
+ if ( 0 != alldata.myRank ) {
/* collectiveOperationsToClasses.size() */
uint64_t collop_classes_map_size;
@@ -756,7 +1021,9 @@ static void share_definitions( uint32_t my_rank, uint32_t num_ranks,
}
-static void read_events( OTF_Reader* reader, AllData& alldata ) {
+static bool read_events( AllData& alldata, OTF_Reader* reader ) {
+
+ bool error= false;
/* open OTF handler array */
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
@@ -810,16 +1077,67 @@ static void read_events( OTF_Reader* reader, AllData& alldata ) {
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
}
+ /* prepare progress */
+ if ( alldata.params.progress ) {
+
+ OTF_Reader_setRecordLimit( reader, 0 );
+
+ if ( OTF_READ_ERROR != OTF_Reader_readEvents( reader, handlers ) ) {
+
+ uint64_t min, cur, max;
+
+ OTF_Reader_eventBytesProgress( reader, &min, &cur, &max );
+ prepare_progress( alldata, max );
+
+ }
+
+ OTF_Reader_setRecordLimit( reader, Progress::EVENTS_RECORD_LIMIT );
+
+ }
+
/* read events */
- uint64_t events_read_ret= OTF_Reader_readEvents( reader, handlers );
- assert( OTF_READ_ERROR != events_read_ret );
+
+ uint64_t records_read= 0;
+
+ while ( OTF_READ_ERROR !=
+ ( records_read= OTF_Reader_readEvents( reader, handlers ) ) ) {
+
+ /* update progress */
+ if ( alldata.params.progress ) {
+
+ uint64_t min, cur, max;
+ static uint64_t last_cur= 0;
+
+ OTF_Reader_eventBytesProgress( reader, &min, &cur, &max );
+ update_progress( alldata, cur - last_cur );
+
+ last_cur = cur;
+
+ }
+
+ /* stop reading if done */
+ if ( 0 == records_read )
+ break;
+ }
+
+ /* show error message if reading failed */
+ if ( OTF_READ_ERROR == records_read ) {
+
+ cerr << "ERROR: Could not read events." << endl;
+ error= true;
+
+ }
/* close OTF handler array */
OTF_HandlerArray_close( handlers );
+
+ return !error;
}
-static void read_statistics( OTF_Reader* reader, AllData& alldata ) {
+static bool read_statistics( AllData& alldata, OTF_Reader* reader ) {
+
+ bool error= false;
/* open OTF handler array */
OTF_HandlerArray* handlers= OTF_HandlerArray_open( );
@@ -853,18 +1171,66 @@ static void read_statistics( OTF_Reader* reader, AllData& alldata ) {
OTF_Reader_enableProcess( reader, alldata.myProcessesList[ i ] );
}
- /* read events */
- uint64_t stats_read_ret= OTF_Reader_readStatistics( reader, handlers );
- assert( OTF_READ_ERROR != stats_read_ret );
+ /* prepare progress */
+ if ( alldata.params.progress ) {
+
+ OTF_Reader_setRecordLimit( reader, 0 );
+
+ if ( OTF_READ_ERROR != OTF_Reader_readStatistics( reader, handlers ) ) {
+
+ uint64_t min, cur, max;
+ OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max );
+ prepare_progress( alldata, max );
+
+ }
+
+ OTF_Reader_setRecordLimit( reader, Progress::STATS_RECORD_LIMIT );
+
+ }
+
+ /* read statistics */
+
+ uint64_t records_read= 0;
+
+ while ( OTF_READ_ERROR !=
+ ( records_read= OTF_Reader_readStatistics( reader, handlers ) ) ) {
+
+ /* update progress */
+ if ( alldata.params.progress ) {
+
+ uint64_t min, cur, max;
+ static uint64_t last_cur= 0;
+
+ OTF_Reader_statisticBytesProgress( reader, &min, &cur, &max );
+ update_progress( alldata, cur - last_cur );
+
+ last_cur = cur;
+
+ }
+
+ /* stop reading if done */
+ if ( 0 == records_read )
+ break;
+ }
+
+ /* show error message if reading failed */
+ if ( OTF_READ_ERROR == records_read ) {
+
+ cerr << "ERROR: Could not read statistics." << endl;
+ error= true;
+
+ }
/* close OTF handler array */
OTF_HandlerArray_close( handlers );
+
+ return !error;
}
-bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
+bool CollectData( AllData& alldata ) {
- bool ret= true;
+ bool error= false;
/* open OTF file manager and reader */
@@ -876,37 +1242,65 @@ bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
OTF_Reader_open( alldata.params.input_file_prefix.c_str(), manager );
assert( reader );
- if ( my_rank == 0 ) {
+ do {
- /* read definitions */
- read_definitions( reader, alldata );
+ if ( 0 == alldata.myRank ) {
- }
+ /* read definitions */
- /* share definitions needed for reading events to workers */
+ VerbosePrint( alldata, 1, true, "reading definitions\n" );
- if ( num_ranks > 1 ) {
+ error= !read_definitions( alldata, reader );
- share_definitions( my_rank, num_ranks, alldata );
+ }
- }
+ /* broadcast error indicator to workers */
+ if ( SyncError( alldata, error, 0 ) ) {
- /* either read data from events or statistics */
+ break;
- if ( alldata.params.read_from_stats ) {
+ }
- read_statistics( reader, alldata );
+ /* share definitions needed for reading events to workers */
- } else {
+ if ( 1 < alldata.numRanks ) {
- read_events( reader, alldata );
+ share_definitions( alldata );
- }
+ }
+
+ /* either read data from events or statistics */
+
+ if ( alldata.params.read_from_stats ) {
+
+ VerbosePrint( alldata, 1, true, "reading statistics\n" );
+
+ error= !read_statistics( alldata, reader );
+
+ } else {
+
+ VerbosePrint( alldata, 1, true, "reading events\n" );
+
+ error= !read_events( alldata, reader );
+
+ }
+
+ /* finish progress */
+ if ( alldata.params.progress ) {
+
+ finish_progress( alldata );
+
+ }
+
+ /* synchronize error indicator with workers */
+ SyncError( alldata, error );
+
+ } while( false );
/* close OTF file manager and reader */
OTF_Reader_close( reader );
OTF_FileManager_close( manager );
- return ret;
+ return !error;
}
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.h
index afbab06b2b..974ef2b25d 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.h
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/collect_data.h
@@ -12,6 +12,7 @@
/* collect the data for the assigned trace processes from the given
trace file name */
-bool collectData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
+bool CollectData( AllData& alldata );
+
#endif /* COLLECT_DATA_H */
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.cpp
index 429e992ef3..24ef8ba83d 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.cpp
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.cpp
@@ -19,6 +19,8 @@
using namespace std;
#include "create_latex.h"
+#include "otfprofile-mpi.h"
+
#include "OTF_inttypes.h"
#include "OTF_Definitions.h"
@@ -251,6 +253,7 @@ static void collectiveId2String(uint64_t id, string& name)
static void write_header(fstream& tex)
{
tex << "\\documentclass[a4paper,10pt]{article}" << endl;
+ tex << "\\nonstopmode" << endl;
tex << "\\usepackage{amssymb}" << endl;
tex << "\\usepackage{longtable}" << endl;
tex << "\\usepackage{ifthen}" << endl;
@@ -2015,9 +2018,11 @@ static void write_p2pMsgRateHist(fstream& tex, struct AllData& alldata)
*
* @param alldata data structure containing summarized profiling information
*/
-bool createTex( AllData& alldata ) {
+bool CreateTex( AllData& alldata ) {
- bool ret= true;
+ bool error= false;
+
+ VerbosePrint( alldata, 1, true, "producing LaTeX output\n" );
string tex_file_name= alldata.params.output_file_prefix + ".tex";
fstream tex_file;
@@ -2057,59 +2062,39 @@ bool createTex( AllData& alldata ) {
write_footer(tex_file);
tex_file.close();
+ VerbosePrint( alldata, 2, true, " created file: %s\n",
+ tex_file_name.c_str() );
+
+#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
/* create PDF file, if desired */
if ( alldata.params.create_pdf ) {
- int rc;
+ VerbosePrint( alldata, 1, true, "producing PDF output\n" );
+
+ /* compose pdflatex command */
ostringstream cmd;
+ cmd << PDFTEX << " " << tex_file_name << " >/dev/null 2>&1";
- /* compose latex command */
- cmd << alldata.params.latex_command << " " << tex_file_name
- << " >/dev/null 2>&1";
-
- /* execute latex command (two times) on TEX file */
+ /* execute pdflatex command (two times) on TeX file */
for ( uint8_t i = 0; i < 2; i++ ) {
- rc= system( cmd.str().c_str() );
+ VerbosePrint( alldata, 2, true, " %srunning command: %s\n",
+ (0 == i) ? "" : "re-", cmd.str().c_str() );
+
+ int rc= system( cmd.str().c_str() );
if ( 0 != WEXITSTATUS( rc ) ) {
- cerr << "ERROR: Could not create DVI file from '"
+ cerr << "ERROR: Could not create PDF file from '"
<< tex_file_name << "'." << endl;
- ret= false;
+ error= true;
break;
}
}
- if ( 0 == rc ) {
-
- /* compose DVI file name */
- string dvi_file_name= tex_file_name;
- dvi_file_name.replace( tex_file_name.length() - 4, 4, ".dvi" );
-
- /* compose PDF file name */
- string pdf_file_name= tex_file_name;
- pdf_file_name.replace( tex_file_name.length() - 4, 4, ".pdf" );
-
- /* compose DVI to PDF convert command */
- cmd.str(""); cmd.clear();
- cmd << alldata.params.dvipdf_command << " " << dvi_file_name
- << " >/dev/null 2>&1";
-
- /* execute DVI to PDF command */
- rc= system( cmd.str().c_str() );
- if ( 0 != WEXITSTATUS( rc ) ) {
-
- cerr << "ERROR: Could not convert '" << dvi_file_name
- << "' to '" << pdf_file_name << "'." << endl;
- ret= false;
-
- }
-
- }
-
}
+#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
- return ret;
+ return !error;
}
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.h
index 5c81487954..f5705e3469 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.h
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/create_latex.h
@@ -6,11 +6,12 @@
#ifndef CREATE_LATEX_H
#define CREATE_LATEX_H
+
#include "datastructs.h"
/* generate PGF output */
-bool createTex( AllData& alldata );
+bool CreateTex( AllData& alldata );
#endif /* CREATE_LATEX_H */
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/datastructs.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/datastructs.h
index b259022a19..83f37ef91f 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/datastructs.h
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/datastructs.h
@@ -6,6 +6,7 @@
#ifndef DATASTRUCTS_H
#define DATASTRUCTS_H
+
using namespace std;
#include
@@ -16,6 +17,8 @@ using namespace std;
#include
#include
+#include "mpi.h"
+
#include "OTF_inttypes.h"
@@ -25,26 +28,84 @@ struct Params {
static const uint32_t DEFAULT_MAX_FILE_HANDLES= 50;
static const uint32_t DEFAULT_BUFFER_SIZE= 1024 * 1024;
- static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
- static const string DEFAULT_LATEX_COMMAND() { return "latex"; }
- static const string DEFAULT_DVIPDF_COMMAND() { return "dvipdf"; }
+ static const uint8_t DEFAULT_VERBOSE_LEVEL= 0;
+ static const string DEFAULT_OUTPUT_FILE_PREFIX() { return "result"; }
uint32_t max_file_handles;
uint32_t buffer_size;
+ uint8_t verbose_level;
+ bool progress;
bool read_from_stats;
+
bool create_pdf;
string input_file_prefix;
string output_file_prefix;
- string latex_command;
- string dvipdf_command;
Params()
: max_file_handles(DEFAULT_MAX_FILE_HANDLES),
buffer_size(DEFAULT_BUFFER_SIZE),
+ verbose_level(DEFAULT_VERBOSE_LEVEL), progress(false),
read_from_stats(false), create_pdf(true),
- output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()),
- latex_command(DEFAULT_LATEX_COMMAND()),
- dvipdf_command(DEFAULT_DVIPDF_COMMAND()) {}
+ output_file_prefix(DEFAULT_OUTPUT_FILE_PREFIX()) {}
+};
+
+
+/* *** progress information *** */
+
+struct Progress {
+
+ /* maximum number of records to read between progress updates */
+ static const uint64_t EVENTS_RECORD_LIMIT= 1000000;
+ static const uint64_t STATS_RECORD_LIMIT= 100;
+
+ /* message tag to use for communication */
+ static const int MSG_TAG= 500;
+
+ uint64_t cur_bytes; /* current bytes read */
+ uint64_t max_bytes; /* max. bytes readable */
+
+ MPI_Request send_request; /* sender request handle */
+
+ uint64_t* recv_buffers; /* receive buffers */
+ MPI_Request* recv_requests; /* persistent receive request handles */
+ MPI_Status* recv_statuses; /* receive statuses */
+ int* recv_indices; /* indices of completed recv. operations */
+
+ uint32_t ranks_left; /* root keeps track of ranks left to query */
+};
+
+
+/* *** runtime measurement *** */
+
+struct MeasureBlock {
+
+ /* routine to get a global timestamp */
+# define GETTIME() MPI_Wtime()
+
+ double start_time; /* start timestamp of measurement block */
+ double stop_time; /* stop timestamp of measurement block */
+
+ MeasureBlock() : start_time(-1.0), stop_time(-1.0) {}
+
+ /* start runtime measurment */
+ void start() {
+
+ start_time= GETTIME();
+ }
+
+ /* stop runtime measurment */
+ void stop() {
+
+ assert( -1.0 != start_time );
+ stop_time= GETTIME();
+ }
+
+ /* get result of runtime measurement */
+ double duration() const {
+
+ assert( -1.0 != start_time && -1.0 != stop_time );
+ return stop_time - start_time;
+ }
};
@@ -413,6 +474,9 @@ struct PendingCollective {
struct AllData {
+ const uint32_t myRank;
+ const uint32_t numRanks;
+
/* number and list of processes to be handled by every worker */
uint32_t myProcessesNum;
uint32_t* myProcessesList;
@@ -424,6 +488,12 @@ struct AllData {
/* program parameters */
Params params;
+ /* progress information */
+ Progress progress;
+
+ /* store per-measure block runtimes */
+ map< string, MeasureBlock > measureBlockMap;
+
/* clustering information for ranks */
Clustering clustering;
@@ -537,7 +607,9 @@ struct AllData {
- AllData() : myProcessesNum(0), myProcessesList(NULL),
+ AllData( uint32_t my_rank, uint32_t num_ranks ) :
+ myRank(my_rank), numRanks(num_ranks),
+ myProcessesNum(0), myProcessesList(NULL),
packbuffersize(0), packbuffer(NULL), timerResolution(0),
recvTimeKey(0) {}
@@ -565,6 +637,16 @@ struct AllData {
}
+ char* freePackBuffer( ) {
+
+ free( packbuffer );
+ packbuffer= NULL;
+ packbuffersize= 0;
+
+ return NULL;
+ }
+
+
char* getPackBuffer( ) {
return packbuffer;
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.cpp
index f8df133700..29c4c908ee 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.cpp
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.cpp
@@ -7,72 +7,42 @@ using namespace std;
#include
#include
+#include
#include
#include
#include
-#include
-
-#include "mpi.h"
#include "otf.h"
+#include "OTF_Platform.h"
-#include "datastructs.h"
#include "collect_data.h"
+#include "otfprofile-mpi.h"
#include "summarize_data.h"
#include "reduce_data.h"
#include "create_latex.h"
-#define FPRINTF_ROOT if(my_rank == 0) fprintf
+/* define the following macro to synchronize the error indicator with all
+ worker ranks
+ This enforces that all ranks will be terminated by calling MPI_Abort if
+ anyone fails. This is necessary to work around a bug that appears at least
+ with Open MPI where calling MPI_Abort on one task doesn't terminate all
+ other ranks. */
+#define SYNC_ERROR
-/* define this macro to print result data to stdout */
+/* define the following macro to print result data to stdout */
/*#define SHOW_RESULTS*/
-/* define this macro to have runtime measurement of certain profile scopes */
-/*#define RUNTIME_MEASUREMENT*/
-
-
-#ifdef RUNTIME_MEASUREMENT
-
- struct MeasureBlock {
-
-# define GETTIME() MPI_Wtime()
-
- double start_time;
- double stop_time;
-
- MeasureBlock() : start_time(-1.0), stop_time(-1.0) {}
-
- void start() {
- start_time= GETTIME();
- }
- void stop() {
- assert( -1.0 != start_time );
- stop_time= GETTIME();
- }
- double duration() const {
- assert( -1.0 != start_time && -1.0 != stop_time );
- return stop_time - start_time;
- }
- };
-
- /* store per-measure block runtimes */
- map < string, MeasureBlock > MeasureBlocksMap;
-
-#endif /* RUNTIME_MEASUREMENT */
-
/* parse command line options
-return 0 if succeeded, 1 if help text or version showed, 2 if failed */
-static int parse_command_line( uint32_t my_rank, int argc, char** argv,
- AllData& alldata );
+return 0 if succeeded, 1 if help text or version showed, -1 if failed */
+static int parse_command_line( int argc, char** argv, AllData& alldata );
/* assign trace processes to analysis processes explicitly in order to allow
sophisticated grouping of MPI ranks/processes/threads/GPU threads/etc.
in the future, return true if succeeded */
-static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
- AllData& alldata );
+static bool assign_procs_to_ranks( AllData& alldata );
#ifdef SHOW_RESULTS
/* show results on stdout */
@@ -97,22 +67,33 @@ int main( int argc, char** argv ) {
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank );
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks );
+ AllData alldata( my_rank, num_ranks );
+
do {
- AllData alldata;
-
/* step 0: parse command line options */
- if ( 0 !=
- ( ret= parse_command_line( my_rank, argc, argv, alldata ) ) ) {
+ if ( 0 != ( ret= parse_command_line( argc, argv, alldata ) ) ) {
+
+ if ( 1 == ret ) {
+
+ ret= 0;
+
+ } else { /* -1 == ret */
+
+ ret= 1;
+
+ }
break;
}
+ VerbosePrint( alldata, 1, true, "initializing\n" );
+
MPI_Barrier( MPI_COMM_WORLD );
/* step 1: assign trace processes to analysis processes */
- if ( !assign_procs_to_ranks( my_rank, num_ranks, alldata ) ) {
+ if ( !assign_procs_to_ranks( alldata ) ) {
ret= 1;
break;
@@ -121,16 +102,14 @@ int main( int argc, char** argv ) {
MPI_Barrier( MPI_COMM_WORLD );
-#ifdef RUNTIME_MEASUREMENT
- if ( 0 == my_rank ) {
+ if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
- MeasureBlocksMap[ "analyze data" ].start();
+ alldata.measureBlockMap[ "analyze data" ].start();
}
-#endif /* RUNTIME_MEASUREMENT */
/* step 2: collect data by reading input trace file */
- if ( !collectData( my_rank, num_ranks, alldata ) ) {
+ if ( !CollectData( alldata ) ) {
ret= 1;
break;
@@ -141,7 +120,7 @@ int main( int argc, char** argv ) {
/* step 3: summarize data; every analysis rank summarizes it's local
data independently */
- if ( !summarizeData( my_rank, num_ranks, alldata ) ) {
+ if ( !SummarizeData( alldata ) ) {
ret= 1;
break;
@@ -151,7 +130,7 @@ int main( int argc, char** argv ) {
MPI_Barrier( MPI_COMM_WORLD );
/* step 4: reduce data to master */
- if ( !reduceData( my_rank, num_ranks, alldata ) ) {
+ if ( !ReduceData( alldata ) ) {
ret= 1;
break;
@@ -160,13 +139,11 @@ int main( int argc, char** argv ) {
MPI_Barrier( MPI_COMM_WORLD );
-#ifdef RUNTIME_MEASUREMENT
- if ( 0 == my_rank ) {
+ if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
- MeasureBlocksMap[ "analyze data" ].stop();
+ alldata.measureBlockMap[ "analyze data" ].stop();
}
-#endif /* RUNTIME_MEASUREMENT */
/* step 5: produce outputs */
@@ -190,50 +167,44 @@ int main( int argc, char** argv ) {
show_results( alldata );
#endif /* SHOW_RESULTS */
-#ifdef RUNTIME_MEASUREMENT
- MeasureBlocksMap[ "write tex" ].start();
-#endif /* RUNTIME_MEASUREMENT */
+ alldata.measureBlockMap[ "produce output" ].start();
/* step 5.3: generate PGF output */
- if ( !createTex( alldata ) ) {
+ if ( !CreateTex( alldata ) ) {
ret= 1;
break;
}
-#ifdef RUNTIME_MEASUREMENT
- MeasureBlocksMap[ "write tex" ].stop();
-#endif /* RUNTIME_MEASUREMENT */
+ alldata.measureBlockMap[ "produce output" ].stop();
}
} while( false );
-#ifdef RUNTIME_MEASUREMENT
-
- /* show runtime measurement results */
-
- if ( 0 == my_rank && 0 == ret ) {
-
- cout << endl << "runtime measurement results:" << endl;
- for ( map < string, MeasureBlock >::const_iterator it=
- MeasureBlocksMap.begin(); it != MeasureBlocksMap.end(); it++ ) {
-
- cout << " " << it->first << ": " << it->second.duration()
- << "s" << endl;
- }
-
- }
-
-#endif /* RUNTIME_MEASUREMENT */
-
/* either finalize or abort on error */
- if ( 0 == ret || 1 == ret ) {
+ if ( 0 == ret ) {
+
+ /* show runtime measurement results */
+ if ( 1 <= alldata.params.verbose_level && 0 == my_rank ) {
+
+ cout << "runtime measurement results:" << endl;
+ for ( map < string, MeasureBlock >::const_iterator it=
+ alldata.measureBlockMap.begin();
+ it != alldata.measureBlockMap.end(); it++ ) {
+
+ cout << " " << it->first << ": " << it->second.duration()
+ << "s" << endl;
+ }
+
+ }
MPI_Finalize();
+ VerbosePrint( alldata, 1, true, "done\n" );
+
} else {
MPI_Abort( MPI_COMM_WORLD, ret );
@@ -244,37 +215,12 @@ int main( int argc, char** argv ) {
}
-static int parse_command_line( uint32_t my_rank, int argc, char** argv,
- AllData& alldata ) {
+static int parse_command_line( int argc, char** argv, AllData& alldata ) {
int ret= 0;
Params& params= alldata.params;
- /* show help text if no options are given */
- if ( 1 == argc ) {
-
- if ( 0 == my_rank ) {
-
- show_helptext();
-
- }
-
- return 1;
-
- }
-
- /* read environment variables */
-
- char* env;
-
- env= getenv( "OTF_PROFILE_LATEX" );
- if ( env && 0 < strlen( env ) )
- params.latex_command= env;
- env= getenv( "OTF_PROFILE_DVIPDF" );
- if ( env && 0 < strlen( env ) )
- params.dvipdf_command= env;
-
/* parse command line options */
enum { ERR_OK, ERR_OPT_UNKNOWN, ERR_ARG_MISSING, ERR_ARG_INVALID };
@@ -288,7 +234,7 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
if ( 0 == strcmp( "-h", argv[i] ) ||
0 == strcmp( "--help", argv[i] ) ) {
- if ( 0 == my_rank ) {
+ if ( 0 == alldata.myRank ) {
show_helptext();
@@ -300,13 +246,27 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
/* -V */
} else if ( 0 == strcmp( "-V", argv[i] ) ) {
- FPRINTF_ROOT( stdout, "%u.%u.%u \"%s\"\n",
- OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB,
- OTF_VERSION_STRING );
+ if ( 0 == alldata.myRank ) {
+
+ printf( "%u.%u.%u \"%s\"\n",
+ OTF_VERSION_MAJOR, OTF_VERSION_MINOR, OTF_VERSION_SUB,
+ OTF_VERSION_STRING );
+
+ }
ret= 1;
break;
+ /* -v */
+ } else if ( 0 == strcmp( "-v", argv[i] ) ) {
+
+ params.verbose_level++;
+
+ /* -p */
+ } else if ( 0 == strcmp( "-p", argv[i] ) ) {
+
+ params.progress= true;
+
/* -f */
} else if ( 0 == strcmp( "-f", argv[i] ) ) {
@@ -364,11 +324,15 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
params.read_from_stats= true;
+#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
+
/* --nopdf */
} else if ( 0 == strcmp( "--nopdf", argv[i] ) ) {
params.create_pdf= false;
+#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
+
/* input file or unknown option */
} else {
@@ -394,74 +358,102 @@ static int parse_command_line( uint32_t my_rank, int argc, char** argv,
}
/* show specific message on error */
-
if ( ERR_OK != parse_error ) {
- switch( parse_error ) {
+ if ( 0 == alldata.myRank ) {
- case ERR_OPT_UNKNOWN:
+ switch( parse_error ) {
- FPRINTF_ROOT( stderr, "ERROR: Unknown option '%s'.\n", argv[i] );
- break;
+ case ERR_OPT_UNKNOWN:
- case ERR_ARG_MISSING:
+ cerr << "ERROR: Unknown option '" << argv[i] << "'."
+ << endl;
+ break;
- FPRINTF_ROOT( stderr, "ERROR: Expected argument for option '%s'.\n",
- argv[i] );
- break;
+ case ERR_ARG_MISSING:
- case ERR_ARG_INVALID:
+ cerr << "ERROR: Expected argument for option '" << argv[i]
+ << "'." << endl;
+ break;
- FPRINTF_ROOT( stderr, "ERROR: Invalid argument for option '%s'.\n",
- argv[i] );
- break;
+ case ERR_ARG_INVALID:
- default:
+ cerr << "ERROR: Invalid argument for option '" << argv[i]
+ << "'." << endl;
+ break;
- break;
+ default:
+
+ break;
+
+ }
}
- ret= 2;
+ ret= -1;
+
+ /* show help text if no input trace file is given */
+ } else if ( 0 == params.input_file_prefix.length() ) {
+
+ if ( 0 == alldata.myRank ) {
+
+ show_helptext();
+
+ }
+
+ ret= 1;
+
}
return ret;
}
-static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
- AllData& alldata ) {
+static bool assign_procs_to_ranks( AllData& alldata ) {
- bool ret= true;
+ bool error= false;
- if ( 0 == my_rank ) {
+ OTF_FileManager* manager= NULL;
+ OTF_MasterControl* master= NULL;
+
+ if ( 0 == alldata.myRank ) {
/* rank 0 reads OTF master control of input trace file */
- OTF_FileManager* manager= OTF_FileManager_open( 1 );
+ manager= OTF_FileManager_open( 1 );
assert( manager );
- OTF_MasterControl* master= OTF_MasterControl_new( manager );
+ master= OTF_MasterControl_new( manager );
assert( master );
+ int master_read_ret=
+ OTF_MasterControl_read( master,
+ alldata.params.input_file_prefix.c_str() );
+
+ /* that's the first access to the input trace file; show tidy error
+ message if failed */
+ if ( 0 == master_read_ret ) {
+
+ cerr << "ERROR: Unable to open file '"
+ << alldata.params.input_file_prefix << ".otf' for reading."
+ << endl;
+ error= true;
+ }
+ }
+
+ /* broadcast error indicator to workers because Open MPI had all
+ ranks except rank 0 waiting endlessly in the MPI_Recv, when the '.otf' file
+ was absent. */
+ if ( SyncError( alldata, error, 0 ) ) {
+
+ return false;
+
+ }
+
+ if ( 0 == alldata.myRank ) {
+
do {
- int master_read_ret=
- OTF_MasterControl_read( master,
- alldata.params.input_file_prefix.c_str() );
-
- /* that's the first access to the input trace file; show tidy error
- message if failed */
- if ( 0 == master_read_ret ) {
-
- cerr << "ERROR: Unable to open file '"
- << alldata.params.input_file_prefix << ".otf' for reading."
- << endl;
- ret= false;
- break;
-
- }
-
/* fill the global array of processes */
alldata.myProcessesNum= OTF_MasterControl_getrCount( master );
@@ -507,19 +499,20 @@ static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
/* remaining ranks and remaining workers */
uint32_t r_ranks= alldata.myProcessesNum;
- uint32_t r_workers= num_ranks;
+ uint32_t r_workers= alldata.numRanks;
uint32_t pos= 0;
bool warn_for_empty= true;
- for ( int w= 0; w < (int)num_ranks; w++ ) {
+ for ( int w= 0; w < (int)alldata.numRanks; w++ ) {
uint32_t n= ( ( r_ranks / r_workers ) * r_workers < r_ranks) ?
( r_ranks / r_workers +1 ) : ( r_ranks / r_workers );
if ( ( 0 == n ) && warn_for_empty ) {
- cerr << "Warning: more analysis ranks than trace processes, " <<
- "ranks " << w << " to " << num_ranks -1 << " are unemployed" << endl;
+ cerr << "Warning: more analysis ranks than trace processes, "
+ << "ranks " << w << " to " << alldata.numRanks -1
+ << " are unemployed" << endl;
warn_for_empty= false;
}
@@ -578,7 +571,7 @@ static bool assign_procs_to_ranks( uint32_t my_rank, uint32_t num_ranks,
}
cerr << endl;*/
- return ret;
+ return !error;
}
@@ -802,6 +795,9 @@ static void show_helptext() {
<< " options:" << endl
<< " -h, --help show this help message" << endl
<< " -V show OTF version" << endl
+ << " -v increase output verbosity" << endl
+ << " (can be used more than once)" << endl
+ << " -p show progress" << endl
<< " -f max. number of filehandles available per rank" << endl
<< " (default: " << Params::DEFAULT_MAX_FILE_HANDLES << ")" << endl
<< " -b set buffersize of the reader" << endl
@@ -809,15 +805,102 @@ static void show_helptext() {
<< " -o specify the prefix of output file(s)" << endl
<< " (default: " << Params::DEFAULT_OUTPUT_FILE_PREFIX() << ")" << endl
<< " --stat read only summarized information, no events" << endl
+#if defined(PDFTEX) && defined(HAVE_PGFPLOTS_1_4) && HAVE_PGFPLOTS_1_4
<< " --nopdf do not produce PDF output" << endl
- << endl
- << " environment variables:" << endl
- << " OTF_PROFILE_LATEX LaTeX command" << endl
- << " (default: " << Params::DEFAULT_LATEX_COMMAND() << ")" << endl
- << " OTF_PROFILE_DVIPDF DVI to PDF converter command" << endl
- << " (default: " << Params::DEFAULT_DVIPDF_COMMAND() << ")" << endl
+#else /* PDFTEX && HAVE_PGFPLOTS_1_4 */
<< endl
<< " PDF creation requires the PGFPLOTS package version >1.4" << endl
<< " http://sourceforge.net/projects/pgfplots/ " << endl
+#endif /* PDFTEX && HAVE_PGFPLOTS_1_4 */
<< endl;
}
+
+
+void VerbosePrint( AllData& alldata, uint8_t level, bool root_only,
+ const char* fmt, ... ) {
+
+ if ( alldata.params.verbose_level >= level ) {
+
+ va_list ap;
+
+ va_start( ap, fmt );
+
+ /* either only rank 0 print the message */
+ if ( root_only ) {
+
+ if ( 0 == alldata.myRank ) {
+
+ vprintf( fmt, ap );
+ }
+
+ /* or all ranks print the message */
+ } else {
+
+ char msg[1024];
+
+ /* prepend current rank to message */
+ snprintf( msg, sizeof( msg ) -1, "[%u] ", alldata.myRank );
+ vsnprintf( msg + strlen( msg ), sizeof( msg ) -1, fmt, ap );
+
+ /* print message */
+ printf( "%s ", msg );
+
+ }
+
+ va_end( ap );
+
+ }
+}
+
+
+bool SyncError( AllData& alldata, bool& error, uint32_t root ) {
+
+#ifdef SYNC_ERROR
+
+ if ( 1 < alldata.numRanks ) {
+
+ int buf= ( error ) ? 1 : 0;
+
+ /* either broadcast the error indicator from one rank (root)
+ or reduce them from all */
+
+ if ( root != (uint32_t)-1 ) {
+
+ MPI_Bcast( &buf, 1, MPI_INT, (int)root, MPI_COMM_WORLD );
+
+ error= ( 1 == buf );
+
+ } else {
+
+ int recv_buf;
+
+ MPI_Allreduce( &buf, &recv_buf, 1, MPI_INT, MPI_MAX,
+ MPI_COMM_WORLD );
+
+ error= ( 1 == recv_buf );
+
+ }
+
+ }
+
+#endif /* SYNC_ERROR */
+
+ return error;
+}
+
+
+uint64_t Logi( uint64_t x, uint64_t b ) {
+
+ assert( b > 1 );
+
+ uint64_t c= 1;
+ uint64_t i= 0;
+
+ while( c <= x ) {
+
+ c*= b;
+ i++;
+ }
+
+ return i;
+}
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.h
new file mode 100644
index 0000000000..ea8de61f0b
--- /dev/null
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/otfprofile-mpi.h
@@ -0,0 +1,30 @@
+/*
+ This is part of the OTF library. Copyright by ZIH, TU Dresden 2005-2011.
+ Authors: Andreas Knuepfer, Robert Dietrich, Matthias Jurenz
+*/
+
+#ifndef OTFPROFILE_MPI_H
+#define OTFPROFILE_MPI_H
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#include "datastructs.h"
+
+
+/* print verbose message to stdout
+ (if root_only is true only rank 0 will print the message) */
+void VerbosePrint( AllData& alldata, uint8_t level, bool root_only,
+ const char* fmt, ... );
+
+/* synchronize error indicator with all worker ranks
+ (either broadcast from one rank (root) or reduce from all) */
+bool SyncError( AllData& alldata, bool& error, uint32_t root= (uint32_t)-1 );
+
+/* logarithm to base b for unsigned 64-bit integer x */
+uint64_t Logi( uint64_t x, uint64_t b= 2 );
+
+
+#endif /* OTFPROFILE_MPI_H */
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.cpp
index 5a4f79eda8..c0023a9ee0 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.cpp
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.cpp
@@ -8,8 +8,7 @@ using namespace std;
#include
#include
-#include "mpi.h"
-
+#include "otfprofile-mpi.h"
#include "reduce_data.h"
@@ -546,71 +545,103 @@ static void unpack_worker_data( AllData& alldata, uint32_t sizes[10] ) {
}
-bool reduceData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
+bool ReduceData( AllData& alldata ) {
bool ret= true;
- /* implement reduction myself because MPI and C++ STL don't play with each other */
+ if ( 1 < alldata.numRanks ) {
- /* how many rounds until master has all the data? */
- uint32_t round= 1;
- while ( round < num_ranks ) {
+ VerbosePrint( alldata, 1, true, "reducing data\n" );
- uint32_t peer= my_rank ^ round;
+ /* implement reduction myself because MPI and C++ STL don't play with
+ each other */
- /* if peer rank is not there, do nothing but go on */
- if ( peer >= num_ranks ) {
+ /* how many rounds until master has all the data? */
+ uint32_t num_rounds= Logi( alldata.numRanks ) -1;
+ uint32_t round_no= 0;
+ uint32_t round= 1;
+ while ( round < alldata.numRanks ) {
+
+ round_no++;
+
+ if ( 1 == alldata.params.verbose_level ) {
+
+ VerbosePrint( alldata, 1, true, " round %u / %u\n",
+ round_no, num_rounds );
+ }
+
+ uint32_t peer= alldata.myRank ^ round;
+
+ /* if peer rank is not there, do nothing but go on */
+ if ( peer >= alldata.numRanks ) {
+
+ round= round << 1;
+ continue;
+ }
+
+ /* send to smaller peer, receive from larger one */
+ uint32_t sizes[10];
+ char* buffer;
+
+ if ( alldata.myRank < peer ) {
+
+ MPI_Status status;
+
+ MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD,
+ &status );
+
+ // DEBUG
+ //cout << " round " << round << " recv " << peer << "--> " <<
+ //my_rank << " with " <<
+ //sizes[0] << " bytes, " <<
+ //sizes[1] << ", " <<
+ //sizes[2] << ", " <<
+ //sizes[3] << ", " <<
+ //sizes[4] << "" << endl << flush;
+
+ buffer= prepare_worker_data( alldata, sizes );
+
+ VerbosePrint( alldata, 2, false,
+ "round %u / %u: receiving %u bytes from rank %u\n",
+ round_no, num_rounds, sizes[0], peer );
+
+ MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD,
+ &status );
+
+ unpack_worker_data( alldata, sizes );
+
+ } else {
+
+ buffer= pack_worker_data( alldata, sizes );
+
+ // DEBUG
+ //cout << " round " << round << " send " << my_rank <<
+ //" --> " << peer << " with " <<
+ //sizes[0] << " bytes, " <<
+ //sizes[1] << ", " <<
+ //sizes[2] << ", " <<
+ //sizes[3] << ", " <<
+ //sizes[4] << "" << endl << flush;
+
+ VerbosePrint( alldata, 2, false,
+ "round %u / %u: sending %u bytes to rank %u\n",
+ round_no, num_rounds, sizes[0], peer );
+
+ MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
+
+ MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5,
+ MPI_COMM_WORLD );
+
+ /* every work has to send off its data at most once,
+ after that, break from the collective reduction operation */
+ break;
+ }
round= round << 1;
- continue;
+
}
- /* send to smaller peer, receive from larger one */
- uint32_t sizes[10];
- char* buffer;
-
- if ( my_rank < peer ) {
-
- MPI_Status status;
-
- MPI_Recv( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD, &status );
-
- // DEBUG
- //cout << " round " << round << " recv " << peer << "--> "<< my_rank << " with " <<
- //sizes[0] << " bytes, " <<
- //sizes[1] << ", " <<
- //sizes[2] << ", " <<
- //sizes[3] << ", " <<
- //sizes[4] << "" << endl << flush;
-
- buffer= prepare_worker_data( alldata, sizes );
-
- MPI_Recv( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD, &status );
-
- unpack_worker_data( alldata, sizes );
-
- } else {
-
- buffer= pack_worker_data( alldata, sizes );
-
- // DEBUG
- //cout << " round " << round << " send " << my_rank << " --> " << peer << " with " <<
- //sizes[0] << " bytes, " <<
- //sizes[1] << ", " <<
- //sizes[2] << ", " <<
- //sizes[3] << ", " <<
- //sizes[4] << "" << endl << flush;
-
- MPI_Send( sizes, 10, MPI_UNSIGNED, peer, 4, MPI_COMM_WORLD );
-
- MPI_Send( buffer, sizes[0], MPI_PACKED, peer, 5, MPI_COMM_WORLD );
-
- /* every work has to send off its data at most once,
- after that, break from the collective reduction operation */
- break;
- }
-
- round= round << 1;
+ alldata.freePackBuffer();
}
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.h
index 067f41c06b..86b0e9e402 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.h
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/reduce_data.h
@@ -6,11 +6,12 @@
#ifndef REDUCE_DATA_H
#define REDUCE_DATA_H
+
#include "datastructs.h"
/* reduce the data to the master process */
-bool reduceData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
+bool ReduceData( AllData& alldata );
#endif /* REDUCE_DATA_H */
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.cpp b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.cpp
index 65ae976060..ea42e5d3bb 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.cpp
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.cpp
@@ -8,13 +8,10 @@ using namespace std;
#include
#include
-#include "mpi.h"
-
#include "summarize_data.h"
-static void get_clustering( uint32_t my_rank, uint32_t num_ranks,
- AllData& alldata ) {
+static void get_clustering( AllData& alldata ) {
uint32_t r_processes= alldata.allProcesses.size();
uint32_t r_clusters= Clustering::MAX_CLUSTERS;
@@ -44,8 +41,7 @@ static void get_clustering( uint32_t my_rank, uint32_t num_ranks,
}
-static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
- AllData& alldata ) {
+static void share_clustering( AllData& alldata ) {
MPI_Barrier( MPI_COMM_WORLD );
@@ -53,7 +49,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
int buffer_size= 0;
int buffer_pos= 0;
- if ( my_rank == 0 ) {
+ if ( 0 == alldata.myRank ) {
/* get size needed to send clustering information to workers */
@@ -86,7 +82,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
/* pack clustering information to buffer */
- if ( my_rank == 0 ) {
+ if ( 0 == alldata.myRank ) {
/* alldata.clustering.clustersToProcesses.size() */
uint64_t clust_proc_map_size=
@@ -128,7 +124,7 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
/* unpack clustering information from buffer */
- if ( my_rank != 0 ) {
+ if ( 0 != alldata.myRank ) {
/* alldata.clustering.clustersToProcesses.size() */
uint64_t clust_proc_map_size;
@@ -168,29 +164,29 @@ static void share_clustering( uint32_t my_rank, uint32_t num_ranks,
}
-bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
+bool SummarizeData( AllData& alldata ) {
- bool ret= true;
+ bool error= false;
/* rank 0 gets clustering information */
- if ( my_rank == 0 ) {
+ if ( 0 == alldata.myRank ) {
- get_clustering( my_rank, num_ranks, alldata );
+ get_clustering( alldata );
}
/* share clustering information to workers */
- if ( num_ranks > 1 ) {
+ if ( 1 < alldata.numRanks ) {
- share_clustering( my_rank, num_ranks, alldata );
+ share_clustering( alldata );
}
/* macro to set min, max to sum before summarizing */
# define MINMAX2SUM(v) \
- if( (v).cnt != 0 ) { \
+ if( 0 != (v).cnt ) { \
(v).cnt = 1; \
(v).min= (v).max= (v).sum; \
} else { \
@@ -243,9 +239,9 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
if ( alldata.clustering.enabled ) {
cluster_a= alldata.clustering.process2cluster( it->first.a );
- assert( cluster_a != 0 );
+ assert( 0 != cluster_a );
cluster_b= alldata.clustering.process2cluster( it->first.b );
- assert( cluster_b != 0 );
+ assert( 0 != cluster_b );
}
@@ -273,7 +269,7 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
if ( alldata.clustering.enabled ) {
cluster= alldata.clustering.process2cluster( it->first );
- assert( cluster != 0 );
+ assert( 0 != cluster );
}
@@ -302,7 +298,7 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
if ( alldata.clustering.enabled ) {
cluster= alldata.clustering.process2cluster( it->first.b );
- assert( cluster != 0 );
+ assert( 0 != cluster );
}
@@ -319,5 +315,5 @@ bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata ) {
alldata.collectiveMapPerRank.clear();
}
- return ret;
+ return !error;
}
diff --git a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.h b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.h
index 0eb6598744..4e1b927387 100644
--- a/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.h
+++ b/ompi/contrib/vt/vt/extlib/otf/tools/otfprofile-mpi/summarize_data.h
@@ -6,11 +6,12 @@
#ifndef SUMMARIZE_DATA_H
#define SUMMARIZE_DATA_H
+
#include "datastructs.h"
/* summarize the data for all trace processes on the current worker */
-bool summarizeData( uint32_t my_rank, uint32_t num_ranks, AllData& alldata );
+bool SummarizeData( AllData& alldata );
#endif /* SUMMARIZE_DATA_H */
diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc
index 6499a95660..2aa5ae7db4 100644
--- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc
+++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.cc
@@ -812,10 +812,12 @@ parseCommandLine( int argc, char ** argv )
Params.droprecvs = true;
}
#endif // VT_UNIFY_HOOKS_MSGMATCH
+#if defined(HAVE_ZLIB) && HAVE_ZLIB
else if( strcmp( argv[i], "--nocompress" ) == 0 )
{
Params.docompress = false;
}
+#endif // HAVE_ZLIB
else if( strcmp( argv[i], "-k" ) == 0
|| strcmp( argv[i], "--keeplocal" ) == 0 )
{
@@ -1055,8 +1057,10 @@ showUsage()
<< " -v, --verbose Increase output verbosity." << std::endl
<< " (can be used more than once)" << std::endl
<< std::endl
+#if defined(HAVE_ZLIB) && HAVE_ZLIB
<< " --nocompress Don't compress output trace files." << std::endl
<< std::endl
+#endif // HAVE_ZLIB
#ifdef VT_UNIFY_HOOKS_MSGMATCH
<< " --nomsgmatch Don't match messages." << std::endl
<< std::endl
diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h
index c60ff5d534..698cb9956a 100644
--- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h
+++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify.h
@@ -63,10 +63,19 @@
struct ParamsS
{
ParamsS()
- : verbose_level( 0 ), docompress( true ), doclean( true ),
+ : verbose_level( 0 ), docompress( false ), doclean( true ),
showusage( false ), showversion( false ), showprogress( false ),
- bequiet( false ), domsgmatch( true ), droprecvs( false ),
- prof_sort_flags( 0x22 ) {}
+ bequiet( false ), domsgmatch( false ), droprecvs( false ),
+ prof_sort_flags( 0x22 )
+ {
+#if defined(HAVE_ZLIB) && HAVE_ZLIB
+ docompress = true;
+#endif // HAVE_ZLIB
+
+#ifdef VT_UNIFY_HOOKS_MSGMATCH
+ domsgmatch = true;
+#endif // VT_UNIFY_HOOKS_MSGMATCH
+ }
std::string in_file_prefix; // input trace file prefix
std::string out_file_prefix; // output trace file prefix
diff --git a/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc b/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc
index 9211dc850e..a3a5f7e769 100644
--- a/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc
+++ b/ompi/contrib/vt/vt/tools/vtwrapper/vt_wrapper.cc
@@ -687,12 +687,28 @@ Wrapper::parseCommandLine( int argc, char ** argv )
|| arg.compare( "-fopenmp" ) == 0
|| arg.compare( "-Popenmp" ) == 0
|| arg.compare( "-xopenmp" ) == 0
- || arg.compare( "-mp" ) == 0
- || arg.compare( "-qsmp=omp" ) == 0 )
+ || arg.compare( "-mp" ) == 0 )
{
m_pConfig->setUsesThreads( true );
m_pConfig->setUsesOpenMP( true );
}
+ else if( arg.length() > 6 && arg.compare( 0, 6, "-qsmp=" ) == 0 )
+ {
+ char carg[128];
+ strncpy( carg, arg.substr(6).c_str(), sizeof( carg ) - 1 );
+ carg[sizeof(carg) - 1] = '\0';
+
+ char * token = strtok( carg, ":" );
+ do
+ {
+ if( strcmp( token, "omp" ) == 0 )
+ {
+ m_pConfig->setUsesThreads( true );
+ m_pConfig->setUsesOpenMP( true );
+ break;
+ }
+ } while( ( token = strtok( 0, ":" ) ) );
+ }
//
// nvcc's pthread/openmp flag
//
diff --git a/ompi/contrib/vt/vt/vtlib/vt_env.c b/ompi/contrib/vt/vt/vtlib/vt_env.c
index 715ce70ee7..b7e5d88cf7 100644
--- a/ompi/contrib/vt/vt/vtlib/vt_env.c
+++ b/ompi/contrib/vt/vt/vtlib/vt_env.c
@@ -1222,6 +1222,7 @@ int vt_env_max_threads()
int vt_env_compression()
{
+#if defined(HAVE_ZLIB) && HAVE_ZLIB
static int compression = -1;
char* tmp;
@@ -1238,6 +1239,9 @@ int vt_env_compression()
}
}
return compression;
+#else /* HAVE_ZLIB */
+ return 0;
+#endif /* HAVE_ZLIB */
}
int vt_env_java_native()
diff --git a/ompi/contrib/vt/vt/vtlib/vt_trc.c b/ompi/contrib/vt/vt/vtlib/vt_trc.c
index 66bd4cb2d8..57bf8c9b7b 100644
--- a/ompi/contrib/vt/vt/vtlib/vt_trc.c
+++ b/ompi/contrib/vt/vt/vtlib/vt_trc.c
@@ -796,7 +796,7 @@ static void unify_traces(void)
/* compose unify arguments */
- argv = (char**)calloc(10 + vt_env_verbose(), sizeof(char*));
+ argv = (char**)calloc(10 + vt_env_verbose()+1, sizeof(char*));
if (argv == NULL) vt_error();
argv[0] = NULL;
@@ -816,12 +816,14 @@ static void unify_traces(void)
}
argc++;
+#if defined(HAVE_ZLIB) && HAVE_ZLIB
if (!vt_env_compression()) argv[argc++] = strdup("--nocompress");
+#endif /* HAVE_ZLIB */
if (!vt_env_do_clean()) argv[argc++] = strdup("-k");
if (vt_env_verbose() == 0) argv[argc++] = strdup("-q");
else if (vt_env_verbose() >= 2)
{
- for (i=1;i