diff --git a/ompi/contrib/vt/vt/CONFIG-EXAMPLES b/ompi/contrib/vt/vt/CONFIG-EXAMPLES index aff448742a..f64a04e24d 100644 --- a/ompi/contrib/vt/vt/CONFIG-EXAMPLES +++ b/ompi/contrib/vt/vt/CONFIG-EXAMPLES @@ -17,3 +17,7 @@ +++ NEC SX-8 (High Performance Computing Center Stuttgart) ./configure CC=sxcc CXX=sxc++ F77=sxf90 FC=sxf90 AR=sxar MPICC=sxmpicc CXX_FOR_BUILD=c++ RANLIB="sxar st" OMPFLAG=-Popenmp --build=ia64-unknown-linux-gnu --host=sx8-nec-superux15.1 --with-otf-lib=-lotf + ++++ Sun Fire E6900 (RWTH Aachen) +./configure CC=cc CXX=CC F77=f90 F90=f90 CFLAGS="-xtarget=ultra4 -fast -xarch=v9a" CXXFLAGS="-xtarget=ultra4 -fast -xarch=v9a" FCFLAGS="-xtarget=ultra4 -fast -xarch=v9a" FFLAGS="-xtarget=ultra4 -fast -xarch=v9a" --with-mpi-lib=-lmpi_mt --with-pmpi-lib=-lmpi_mt --enable-fmpi-lib + diff --git a/ompi/contrib/vt/vt/ChangeLog b/ompi/contrib/vt/vt/ChangeLog index ce5cf6839a..608f514b65 100644 --- a/ompi/contrib/vt/vt/ChangeLog +++ b/ompi/contrib/vt/vt/ChangeLog @@ -198,8 +198,9 @@ maximum number of buffer flushes was reached - fixed a bug in the compiler wrappers: added check for SUN's OpenMP compiler flag (-xopenmp) - - cleanup call stack when maximum number of flushes was reached - fixed a bug for determining the timer-resolution for ITC + - added process group definition for MPI communicator MPI_COMM_SELF + - cleanup call stack when maximum number of flushes was reached - added check if we can link '-lz' in order to pass either '-lotf -lz' or just '-lotf' as OTF link option - renamed configure's option '--with[out]-mpi-io' to diff --git a/ompi/contrib/vt/vt/acinclude.m4 b/ompi/contrib/vt/vt/acinclude.m4 index dfc88a328e..561d0ef520 100644 --- a/ompi/contrib/vt/vt/acinclude.m4 +++ b/ompi/contrib/vt/vt/acinclude.m4 @@ -307,11 +307,12 @@ AC_DEFUN([ACVT_CONF_SUBTITLE], AC_DEFUN([ACVT_CONF_EXPAND_VARS], [ - $2=$1 - var=`echo $$2 | sed s/^.*\$\{// | sed s/\}.*$//` - while test x"$var" != x"$$2"; do - $2=`echo $$2 | sed s:\$\{$var\}:${!var}:g` - var=`echo $$2 | sed s/^.*\$\{// | sed s/\}.*$//` + var=$1 + + while : + do + $2=`eval echo $var` + AS_IF([test $$2 == $var], [break], [var=$$2]) done ]) diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc index 34f479181c..a67c06aef3 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.cc @@ -134,6 +134,7 @@ GlobDefsCmp( Definitions::DefRec_Base_struct * a, // ... sort to this order: // Nodes // MPI_COMM_WORLD + // MPI_COMM_SELFs // remaining MPI communicators // OpenMP Thread Teams // Rest @@ -165,17 +166,31 @@ GlobDefsCmp( Definitions::DefRec_Base_struct * a, { return false; } - // p1 == TYPE_MPI_COMM && p2 != TYPE_MPI_COMM + // p1 == TYPE_MPI_COMM_SELF && p2 != TYPE_MPI_COMM_SELF else if( - p1->type == Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM - && p2->type != Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM ) + p1->type == Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_SELF + && p2->type != Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_SELF ) { return true; } - // p1 != TYPE_MPI_COMM && p2 == TYPE_MPI_COMM + // p1 != TYPE_MPI_COMM_SELF && p2 == TYPE_MPI_COMM_SELF else if( - p1->type != Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM - && p2->type == Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM ) + p1->type != Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_SELF + && p2->type == Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_SELF ) + { + return false; + } + // p1 == TYPE_MPI_COMM_USER && p2 != TYPE_MPI_COMM_USER + else if( + p1->type == Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_USER + && p2->type != Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_USER ) + { + return true; + } + // p1 != TYPE_MPI_COMM_USER && p2 == TYPE_MPI_COMM_USER + else if( + p1->type != Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_USER + && p2->type == Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_USER ) { return false; } @@ -195,13 +210,8 @@ GlobDefsCmp( Definitions::DefRec_Base_struct * a, } else { - // sort by names if not equal; otherwise sort by token - // - int cmprc = p1->name.compare( p2->name ); - if( cmprc == 0 ) - return p1->deftoken < p2->deftoken; - else - return cmprc < 0 ? true : false; + // sort by token, if process group types are equal + return p1->deftoken < p2->deftoken; } } // both record types are Definitions::DEF_REC_TYPE__DefinitionComment ? ... @@ -510,6 +520,7 @@ Definitions::createGlobal( const std::vector * bool error = false; uint32_t omp_comm_idx = 0; + uint32_t mpi_comm_self_idx = 0; for( uint32_t i = 0; i < p_vecLocDefs->size(); i++ ) { @@ -593,9 +604,9 @@ Definitions::createGlobal( const std::vector * addProc2NodeGroup( p_loc_def_entry->name.substr(9), p_loc_def_entry->members[0] ); } - // MPI communicator (except MPI_COMM_WORLD)? + // MPI communicator (except MPI_COMM_WORLD and MPI_COMM_SELF) else if( p_loc_def_entry->type == - DefRec_DefProcessGroup_struct::TYPE_MPI_COMM ) + DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_USER ) { addMPIComm( p_loc_def_entry->loccpuid, p_loc_def_entry->deftoken, @@ -625,12 +636,28 @@ Definitions::createGlobal( const std::vector * p_loc_def_entry->members ); char new_name[256]; - if( p_loc_def_entry->name.compare( "__OMP_TEAM__" ) == 0 ) + if( p_loc_def_entry->type == + DefRec_DefProcessGroup_struct::TYPE_OMP_TEAM ) + { snprintf( new_name, sizeof( new_name ), "OMP Thread Team %d", omp_comm_idx++ ); + } + else if( p_loc_def_entry->type == + DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_WORLD ) + { + strcpy( new_name, "MPI_COMM_WORLD" ); + } + else if( p_loc_def_entry->type == + DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_SELF ) + { + snprintf( new_name, sizeof( new_name ), + "MPI_COMM_SELF %d", mpi_comm_self_idx++ ); + } else + { strncpy( new_name, p_loc_def_entry->name.c_str(), sizeof( new_name ) ); + } // add new definition to vector of global definitions p_vecGlobDefs->push_back( new DefRec_DefProcessGroup_struct( @@ -1598,7 +1625,7 @@ Definitions::addMPIComms2Global( std::vector * p_vecGlobDefs->push_back( new DefRec_DefProcessGroup_struct( 0, global_token, - DefRec_DefProcessGroup_struct::TYPE_MPI_COMM, + DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_USER, comm_name, vec_members ) ); diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h index b4c73a9459..694121f203 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs.h @@ -31,8 +31,8 @@ public: // definition record types // - typedef enum { DEF_REC_TYPE__DefinitionComment, - DEF_REC_TYPE__DefCreator, + typedef enum { DEF_REC_TYPE__DefCreator, + DEF_REC_TYPE__DefinitionComment, DEF_REC_TYPE__DefTimerResolution, DEF_REC_TYPE__DefProcess, DEF_REC_TYPE__DefProcessGroup, @@ -126,8 +126,8 @@ public: // struct DefRec_DefProcessGroup_struct : DefRec_Base_struct { - typedef enum { TYPE_NODE, TYPE_MPI_COMM_WORLD, - TYPE_MPI_COMM, TYPE_OMP_TEAM, TYPE_OTHER } + typedef enum { TYPE_NODE, TYPE_MPI_COMM_WORLD, TYPE_MPI_COMM_SELF, + TYPE_MPI_COMM_USER, TYPE_OMP_TEAM, TYPE_OTHER } ProcessGroupTypeT; DefRec_DefProcessGroup_struct() diff --git a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_hdlr.cc b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_hdlr.cc index 56c43915fa..63d16e8c57 100644 --- a/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_hdlr.cc +++ b/ompi/contrib/vt/vt/tools/vtunify/vt_unify_defs_hdlr.cc @@ -80,10 +80,12 @@ Handle_DefProcessGroup( std::vector* if( strncmp( name, "__NODE__", 8 ) == 0 ) type = Definitions::DefRec_DefProcessGroup_struct::TYPE_NODE; - else if( strcmp( name, "__MPI_COMM__" ) == 0 ) - type = Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM; - else if( strcmp( name, "MPI_COMM_WORLD" ) == 0 ) + else if( strcmp( name, "__MPI_COMM_USER__" ) == 0 ) + type = Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_USER; + else if( strcmp( name, "__MPI_COMM_WORLD__" ) == 0 ) type = Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_WORLD; + else if( strcmp( name, "__MPI_COMM_SELF__" ) == 0 ) + type = Definitions::DefRec_DefProcessGroup_struct::TYPE_MPI_COMM_SELF; else if( strcmp( name, "__OMP_TEAM__" ) == 0 ) type = Definitions::DefRec_DefProcessGroup_struct::TYPE_OMP_TEAM; else diff --git a/ompi/contrib/vt/vt/vtlib/vt_mpicom.c b/ompi/contrib/vt/vt/vtlib/vt_mpicom.c index 03917fb010..d28db21deb 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_mpicom.c +++ b/ompi/contrib/vt/vt/vtlib/vt_mpicom.c @@ -65,7 +65,7 @@ struct VTComm uint32_t cid; }; -static int currcid = 1; +static int currcid = 2; /* 0/1 reserved for MPI_COMM_WORLD/MPI_COMM_SELF */ static int last_comm = 0; static int* ranks; static struct VTComm comms[VT_MAX_COMM]; diff --git a/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c b/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c index 5a9d46f757..248038398e 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c +++ b/ompi/contrib/vt/vt/vtlib/vt_mpiwrap.c @@ -25,6 +25,7 @@ #include #include +#include #include static MPI_Status *my_status_array = 0; @@ -75,8 +76,9 @@ int vt_mpi_trace_is_on = 1; int MPI_Init( int *argc, char ***argv ) { - int returnVal, numprocs, i; + int returnVal, numprocs, me, i; unsigned char* grpv; + uint32_t grpc; uint64_t time; /* shall I trace MPI events? */ @@ -105,12 +107,21 @@ int MPI_Init( int *argc, char ***argv ) vt_mpi_init(); PMPI_Comm_size(MPI_COMM_WORLD, &numprocs); + PMPI_Comm_rank(MPI_COMM_WORLD, &me); + + grpc = numprocs / 8 + (numprocs % 8 ? 1 : 0); /* define communicator for MPI_COMM_WORLD */ - grpv = (unsigned char*)calloc(numprocs / 8 + (numprocs % 8 ? 1 : 0), sizeof(unsigned char)); + grpv = (unsigned char*)calloc(grpc, sizeof(unsigned char)); for (i = 0; i < numprocs; i++) grpv[i / 8] |= (1 << (i % 8)); - vt_def_mpi_comm(0, numprocs / 8 + (numprocs % 8 ? 1 : 0), grpv); + vt_def_mpi_comm(0, grpc, grpv); + + memset(grpv, 0, grpc); + + /* define communicator for MPI_COMM_SELF */ + grpv[me / 8] |= (1 << (me % 8)); + vt_def_mpi_comm(1, grpc, grpv); free(grpv); @@ -130,12 +141,21 @@ int MPI_Init( int *argc, char ***argv ) vt_mpi_init(); PMPI_Comm_size(MPI_COMM_WORLD, &numprocs); + PMPI_Comm_rank(MPI_COMM_WORLD, &me); + + grpc = numprocs / 8 + (numprocs % 8 ? 1 : 0); /* define communicator for MPI_COMM_WORLD */ - grpv = (unsigned char*)calloc(numprocs / 8 + (numprocs % 8 ? 1 : 0), sizeof(unsigned char)); + grpv = (unsigned char*)calloc(grpc, sizeof(unsigned char)); for (i = 0; i < numprocs; i++) grpv[i / 8] |= (1 << (i % 8)); - vt_def_mpi_comm(0, numprocs / 8 + (numprocs % 8 ? 1 : 0), grpv); + vt_def_mpi_comm(0, grpc, grpv); + + memset(grpv, 0, grpc); + + /* define communicator for MPI_COMM_SELF */ + grpv[me / 8] |= (1 << (me % 8)); + vt_def_mpi_comm(1, grpc, grpv); free(grpv); diff --git a/ompi/contrib/vt/vt/vtlib/vt_otf_trc.c b/ompi/contrib/vt/vt/vtlib/vt_otf_trc.c index 1f8c3bb88d..bf1e0aefa8 100644 --- a/ompi/contrib/vt/vt/vtlib/vt_otf_trc.c +++ b/ompi/contrib/vt/vt/vtlib/vt_otf_trc.c @@ -1152,13 +1152,14 @@ uint32_t vt_def_counter(const char* cname, } void vt_def_mpi_comm(uint32_t cid, - uint32_t grpc, - uint8_t grpv[]) + uint32_t grpc, + uint8_t grpv[]) { int i; uint32_t cgrpc; uint32_t* cgrpv; + char cname[20]; vt_check_thrd_id(VT_MY_THREAD); @@ -1178,10 +1179,15 @@ void vt_def_mpi_comm(uint32_t cid, if(grpv[i] & 0x80) cgrpv[cgrpc++] = (i * 8) + 8; } + if(cid == 0) + strcpy(cname, "__MPI_COMM_WORLD__"); + else if(cid == 1) + strcpy(cname, "__MPI_COMM_SELF__"); + else + strcpy(cname, "__MPI_COMM_USER__"); + VTGen_write_DEF_PROCESS_GROUP(VTTHRD_GEN(thrdv[VT_MY_THREAD]), - cid+1, - cid == 0 ? "MPI_COMM_WORLD" : "__MPI_COMM__", - cgrpc, cgrpv); + cid+1, cname, cgrpc, cgrpv); } /*