1
1
- fixed a bug (potential segfault) in the MPI wrapper functions MPI_Gatherv and MPI_Scatterv which occurred due to illegal access to insignificant parameters on non-root ranks
	- vtdyn:
		- stop instrumenting if an error occurred during finalizing instrumentation set
	- vtunify-mpi:
		- added option '--stats' to unify only summarized information, no events
		- reduced memory usage on rank 0: immediately send token translation tables to the corresponding worker ranks when they are complete
		- send the "finished-flag" together with the last set of definitions read to rank 0 instead of sending an extra message
	- OPARI:
		- fixed detection of DO loop beginnings; If there is a variable which contains "do" in its name it was detected as DO loop :-(
		- fixed processing of Fortran line-continuation appearing after a complete OpenMP directive

This commit was SVN r25584.
Этот коммит содержится в:
Matthias Jurenz 2011-12-07 12:26:38 +00:00
родитель 3e7ab1212a
Коммит 05549e889b
17 изменённых файлов: 440 добавлений и 356 удалений

Просмотреть файл

@ -1,5 +1,15 @@
5.12openmpi
- updated version of internal OTF to 1.10openmpi
5.12.1openmpi
- fixed a bug in the MPI wrapper functions MPI_Gatherv and MPI_Scatterv
which occurred due to illegal access to insignificant parameters on
non-root ranks
- vtunify-mpi:
- added option '--stats' to unify only summarized information,
no events
- reduced memory usage
- OPARI: (see tools/opari/ChangeLog:20,21)
5.12
- updated version of internal OTF to 1.10coelacanth
(see extlib/otf/ChangeLog)
- added support for CUDA runtime tracing via CUPTI callbacks
- added support for process group counters

Просмотреть файл

@ -1 +1 @@
5.12openmpi
5.12.1openmpi

Просмотреть файл

@ -66,7 +66,7 @@ BODY { font-family: sans-serif; }
<P>
<P>
<B><BIG CLASS="XHUGE">VampirTrace 5.12&nbsp;User Manual</BIG></B>
<B><BIG CLASS="XHUGE">VampirTrace 5.12.1&nbsp;User Manual</BIG></B>
<BR>
<BR>
<BR>
@ -4148,6 +4148,8 @@ options:
-q, --quiet Enable quiet mode.
(only emergency output)
--stats Unify only summarized information (*.stats), no events
--nocompress Don't compress output trace files.
--nomsgmatch Don't match messages.

Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf

Двоичный файл не отображается.

Просмотреть файл

@ -1,3 +1,17 @@
21. Fixed processing of Fortran line-continuation
appearing after a complete OpenMP directive
For example
!$omp parallel
!$omp do private(a,b,c, &
!$omp d,e,f)
will not cause an "unbalanced pragma/directive nesting" error anymore.
20. Fixed Fortran parsing
for detecting DO loop beginnings
19. Revised 18.
to get back support for comma-separated OpenMP clauses.

Просмотреть файл

@ -1,3 +1,17 @@
21. Fixed processing of Fortran line-continuation
appearing after a complete OpenMP directive
For example
!$omp parallel
!$omp do private(a,b,c, &
!$omp d,e,f)
will not cause an "unbalanced pragma/directive nesting" error anymore.
20. Fixed Fortran parsing
for detecting DO loop beginnings
19. Revised 18.
to get back support for comma-separated OpenMP clauses.

Просмотреть файл

@ -206,7 +206,7 @@ namespace {
pragma->find_name();
pragma->pline=save_pline; // reset parse position
pragma->ppos=save_ppos;
if(pragma->name.find("do")!=string::npos) {
if(pragma->name.find("do ")!=string::npos) {
linetype=PRAGMA_LOOPSTART;
if(pragma->name == "enddo") linetype=PRAGMA_LOOPEND;
else if(pragma->name == "paralleldo") linetype=PRAGMA_PARLOOPSTART;
@ -416,10 +416,16 @@ void process_fortran(istream& is, const char* infile, ostream& os,
// continuation directive line
currPragma->lines.push_back(lowline);
} else {
if ( currPragma ) {
delete currPragma;
}
// new directive
if ( currPragma ) {
// if necessary process last complete directive
typeOfLastLine = check_pragma(currPragma);
test_and_insert_ompenddo(os, typeOfLastLine, waitforOMPEndDo,
infile, currPragma->lineno, pragma_indent,
pomp, addSharedDecl);
process_pragma(currPragma, os);
currPragma = 0;
}
currPragma
= new OMPragmaF(infile, lineno, pstart+5+pomp, lowline, pomp,
addSharedDecl);

Просмотреть файл

@ -353,13 +353,14 @@ MutatorC::run()
// finalize insertion set
//
if( !error && !m_appAddrSpace->finalizeInsertionSet( true, 0 ) )
if( !m_appAddrSpace->finalizeInsertionSet( true, 0 ) )
{
std::cerr << ExeName << ": [" << ExePid << "]: "
<< "Error: Could not finalize instrumentation set for "
<< "function '" << inst_funcs[i].name << "'. Aborting."
<< std::endl;
error = true;
break;
}
}
inst_funcs.clear();

Просмотреть файл

@ -113,6 +113,9 @@ std::vector<uint32_t> MyStreamIds;
// map stream id <-> processing MPI-rank
std::map<uint32_t, VT_MPI_INT> StreamId2Rank;
// map MPI-rank <-> stream ids
std::map<VT_MPI_INT, std::set<uint32_t> > Rank2StreamIds;
#endif // VT_MPI
int
@ -201,10 +204,6 @@ VTUNIFY_MAIN( int argc, char ** argv )
#ifdef VT_MPI
if( NumRanks > 1 )
{
// share token translations to all ranks
if( (error = !theTokenFactory->share()) )
break;
// share user communication ids to all ranks
if( (error = !theUserCom->share()) )
break;
@ -212,7 +211,7 @@ VTUNIFY_MAIN( int argc, char ** argv )
#endif // VT_MPI
// unify events
if( (error = !theEvents->run()) )
if( !Params.onlystats && (error = !theEvents->run()) )
break;
// unify statistics
@ -756,6 +755,9 @@ getUnifyControls()
// set stream id/rank mapping
StreamId2Rank[uctl->streamid] = rank;
// add stream id to processing rank
Rank2StreamIds[rank].insert( uctl->streamid );
// get rank for the next stream id
//
if( i < UnifyCtls.size() - 1 && UnifyCtls[i+1]->pstreamid == 0 )
@ -892,6 +894,10 @@ parseCommandLine( int argc, char ** argv )
Params.showprogress = false;
Params.verbose_level = 0;
}
else if( strcmp( argv[i], "--stats" ) == 0 )
{
Params.onlystats = true;
}
else if( strcmp( argv[i], "-v" ) == 0
|| strcmp( argv[i], "--verbose" ) == 0 )
{
@ -1044,7 +1050,7 @@ cleanUp()
break;
// rename temporary event output files
if( (error = !theEvents->cleanUp()) )
if( !Params.onlystats && (error = !theEvents->cleanUp()) )
break;
// rename temporary statistic output files
@ -1140,6 +1146,8 @@ showUsage()
<< " -q, --quiet Enable quiet mode." << std::endl
<< " (only emergency output)" << std::endl
<< std::endl
<< " --stats Unify only summarized information (*.stats), no events" << std::endl
<< std::endl
#if defined(HAVE_ZLIB) && HAVE_ZLIB
<< " --nocompress Don't compress output trace files." << std::endl
<< std::endl
@ -1174,8 +1182,8 @@ shareParams()
//
char **filenames;
char flags[8];
VT_MPI_INT blockcounts[4] = { 3*1024, 1, 1, 8 };
char flags[9];
VT_MPI_INT blockcounts[4] = { 3*1024, 1, 1, 9 };
MPI_Aint displ[4];
MPI_Datatype oldtypes[4] =
{ MPI_CHAR, MPI_UNSIGNED_SHORT, MPI_INT,
@ -1210,8 +1218,9 @@ shareParams()
flags[3] = (char)Params.showversion;
flags[4] = (char)Params.showprogress;
flags[5] = (char)Params.bequiet;
flags[6] = (char)Params.domsgmatch;
flags[7] = (char)Params.droprecvs;
flags[6] = (char)Params.onlystats;
flags[7] = (char)Params.domsgmatch;
flags[8] = (char)Params.droprecvs;
}
// share unify parameters
@ -1230,8 +1239,9 @@ shareParams()
Params.showversion = (flags[3] == 1);
Params.showprogress = (flags[4] == 1);
Params.bequiet = (flags[5] == 1);
Params.domsgmatch = (flags[6] == 1);
Params.droprecvs = (flags[7] == 1);
Params.onlystats = (flags[6] == 1);
Params.domsgmatch = (flags[7] == 1);
Params.droprecvs = (flags[8] == 1);
}
delete [] filenames[0];

Просмотреть файл

@ -27,6 +27,7 @@
#include "vt_inttypes.h"
#include <map>
#include <set>
#include <string>
#include <vector>
@ -65,8 +66,8 @@ struct ParamsS
ParamsS()
: verbose_level( 0 ), docompress( false ), doclean( true ),
showusage( false ), showversion( false ), showprogress( false ),
bequiet( false ), domsgmatch( false ), droprecvs( false ),
prof_sort_flags( 0x22 ), createthumb( false )
bequiet( false ), onlystats( false ), domsgmatch( false ),
droprecvs( false ), prof_sort_flags( 0x22 ), createthumb( false )
{
#if defined(HAVE_ZLIB) && HAVE_ZLIB
docompress = true;
@ -90,6 +91,7 @@ struct ParamsS
bool showversion; // flag: show VampirTrace version?
bool showprogress; // flag: show progress?
bool bequiet; // flag: print no messages?
bool onlystats; // flag: unify only summarized information?
// HooksMsgMatchC's parameters
//
@ -212,6 +214,9 @@ extern std::vector<uint32_t> MyStreamIds;
// map stream id <-> processing MPI-rank
extern std::map<uint32_t, VT_MPI_INT> StreamId2Rank;
// map MPI-rank <-> stream ids
extern std::map<VT_MPI_INT, std::set<uint32_t> > Rank2StreamIds;
#endif // VT_MPI
#endif // _VT_UNIFY_H_

Просмотреть файл

@ -396,8 +396,8 @@ DefinitionsC::readLocal()
if( (error = !readLocal( MyStreamIds[i], loc_defs )) )
break;
// abort loop, if next stream isn't a child
if( i < MyStreamIds.size() - 1 &&
// continue reading if the next stream is a child
if( i == MyStreamIds.size() - 1 ||
StreamId2UnifyCtl[MyStreamIds[i+1]]->pstreamid == 0 )
break;
}
@ -408,21 +408,23 @@ DefinitionsC::readLocal()
defs_read = loc_defs.size() - defs_read;
// continue, if nothing is read
if( ( i >= MyStreamIds.size() - 1 && loc_defs.empty() ) ||
( i < MyStreamIds.size() - 1 && defs_read == 0 ) )
if( i < MyStreamIds.size() - 1 && defs_read == 0 )
continue;
// pre-sort subset of local definitions
//
if( defs_read > 0 )
{
// pre-sort subset of local definitions
//
// get begin iterator of subset
//
LargeVectorC<DefRec_BaseS*>::iterator sort_begin_it = loc_defs.begin();
if( loc_defs.size() != defs_read )
sort_begin_it += ( loc_defs.size() - defs_read - 1 );
// get begin iterator of subset
//
LargeVectorC<DefRec_BaseS*>::iterator sort_begin_it = loc_defs.begin();
if( loc_defs.size() != defs_read )
sort_begin_it += ( loc_defs.size() - defs_read - 1 );
// pre-sort
std::sort( sort_begin_it, loc_defs.end(), DefRec_LocCmp );
// pre-sort
std::sort( sort_begin_it, loc_defs.end(), DefRec_LocCmp );
}
MASTER
{
@ -476,17 +478,15 @@ DefinitionsC::readLocal()
// loc_defs
//
for( uint32_t j = 0; j < loc_defs.size(); j++ )
{
// definition type (loc_defs[j]->dtype)
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
buffer_size += size;
// loc_defs[j]
buffer_size += loc_defs[j]->getPackSize();
}
// finished flag
//
CALL_MPI( MPI_Pack_size( 1, MPI_CHAR, MPI_COMM_WORLD, &size ) );
buffer_size += size;
// continue reading, if minimum buffer size isn't reached
if( i < MyStreamIds.size() && buffer_size < min_msg_size )
if( i < MyStreamIds.size() - 1 && buffer_size < min_msg_size )
continue;
// allocate memory for the send buffer
@ -500,6 +500,7 @@ DefinitionsC::readLocal()
buffer_pos = 0;
// loc_defs.size()
//
uint32_t loc_defs_size = loc_defs.size();
CALL_MPI( MPI_Pack( &loc_defs_size, 1, MPI_UNSIGNED, buffer,
buffer_size, &buffer_pos, MPI_COMM_WORLD ) );
@ -507,15 +508,13 @@ DefinitionsC::readLocal()
// loc_defs
//
for( uint32_t j = 0; j < loc_defs.size(); j++ )
{
// definition type (loc_defs[j]->dtype)
CALL_MPI( MPI_Pack( &(loc_defs[j]->dtype), 1, MPI_UNSIGNED,
buffer, buffer_size, &buffer_pos,
MPI_COMM_WORLD ) );
// loc_defs[j]
loc_defs[j]->pack( buffer, buffer_size, buffer_pos );
}
// finished flag
//
char finished = ( i == MyStreamIds.size() - 1 );
CALL_MPI( MPI_Pack( &finished, 1, MPI_CHAR, buffer, buffer_size,
&buffer_pos, MPI_COMM_WORLD ) );
// send buffer to rank 0
//
@ -539,25 +538,30 @@ DefinitionsC::readLocal()
}
#ifdef VT_MPI
SyncError( &error );
if( !error && NumRanks > 1 )
// all ranks are finished reading local definitions at this point
//
if( NumRanks > 1 && !SyncError( &error ) )
{
MASTER
{
// receive local definitions from all participating ranks
//
// number of ranks finished
VT_MPI_INT finished_ranks_num = 1; // 1=me
// get number of finished ranks
//
VT_MPI_INT finished_ranks_num = 1; // =rank 0
for( VT_MPI_INT i = 1; i < NumRanks; i++ )
{
if( Rank2StreamIds[i].empty() ) // rank i has nothing to do?
finished_ranks_num++;
}
// repeat until all ranks are finished reading local definitions
//
while( finished_ranks_num < NumRanks )
{
// source rank finished?
bool finished = false;
char * buffer;
VT_MPI_INT buffer_size;
VT_MPI_INT buffer_pos;
@ -579,6 +583,9 @@ DefinitionsC::readLocal()
buffer = new char[buffer_size];
assert( buffer );
PVPrint( 3, " Receiving local definitions from rank %d\n",
rank );
// receive buffer
CALL_MPI( MPI_Recv( buffer, buffer_size, MPI_PACKED, rank, msg_tag,
MPI_COMM_WORLD, &status ) );
@ -589,28 +596,23 @@ DefinitionsC::readLocal()
buffer_pos = 0;
// loc_defs.size()
//
uint32_t loc_defs_size;
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos,
&loc_defs_size, 1, MPI_UNSIGNED,
MPI_COMM_WORLD ) );
// is source rank finished?
if( loc_defs_size == 0 )
{
finished = true;
finished_ranks_num++;
}
else
{
PVPrint( 3, " Receiving local definitions from rank %d\n",
rank );
}
// loc_defs
//
for( uint32_t i = 0; i < loc_defs_size; i++ )
{
// definition type
// (don't increment current buffer position;
// def. type will be unpacked again by DefRec_*S::unpack())
//
DefRecTypeT def_type;
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos,
VT_MPI_INT tmp_buffer_pos = buffer_pos;
CALL_MPI( MPI_Unpack( buffer, buffer_size, &tmp_buffer_pos,
&def_type, 1, MPI_UNSIGNED,
MPI_COMM_WORLD ) );
@ -725,77 +727,65 @@ DefinitionsC::readLocal()
loc_defs.push_back( new_loc_def );
}
// finished flag
//
char finished;
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos, &finished,
1, MPI_CHAR, MPI_COMM_WORLD ) );
// free memory of receive buffer
delete [] buffer;
if( !finished )
{
// add local to global definitions
error = !processLocal( loc_defs );
// add local to global definitions
if( (error = !processLocal( loc_defs )) )
break;
// free vector of local definitions
//
for( uint32_t i = 0; i < loc_defs.size(); i++ )
delete loc_defs[i];
loc_defs.clear();
// free vector of local definitions
//
for( uint32_t i = 0; i < loc_defs.size(); i++ )
delete loc_defs[i];
loc_defs.clear();
// is source rank finished reading local definitions?
if( finished )
{
// increment number of finished ranks
finished_ranks_num++;
// send token translations to finished rank
if( (error =
!theTokenFactory->distTranslations( rank,
finished_ranks_num == NumRanks )) )
break;
}
}
}
else // SLAVE
{
// send a notification to rank 0 that my rank is finished reading
// local definitions
// (empty vector of local definitions)
if( !MyStreamIds.empty() )
{
char * buffer;
VT_MPI_INT buffer_size;
VT_MPI_INT buffer_pos = 0;
// complete all sends and remove request handles and send buffers
// from list
while( send_buffers.size() > 0 )
{
// get the first request handle and send buffer from list
//
MPI_Request & request = send_buffers.front().first;
char *& buffer = send_buffers.front().second;
// get size needed for the send buffer
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD,
&buffer_size ) );
// wait until send is completed
//
MPI_Status status;
CALL_MPI( MPI_Wait( &request, &status ) );
// allocate memory for the send buffer
//
buffer = new char[buffer_size];
assert( buffer );
// free memory of send buffer
delete [] buffer;
// remove request handle and send buffer from list
send_buffers.pop_front();
}
// pack send buffer
//
uint32_t finished = 0;
CALL_MPI( MPI_Pack( &finished, 1, MPI_UNSIGNED, buffer, buffer_size,
&buffer_pos, MPI_COMM_WORLD ) );
// send buffer to rank 0
//
MPI_Request request;
CALL_MPI( MPI_Isend( buffer, buffer_size, MPI_PACKED, 0, msg_tag,
MPI_COMM_WORLD, &request ) );
// add request handle and send buffer to list
send_buffers.push_back( std::make_pair( request, buffer ) );
}
// complete all sends and remove request handles and send buffers
// from list
while( send_buffers.size() > 0 )
{
// get the first request handle and send buffer from list
//
MPI_Request & request = send_buffers.front().first;
char *& buffer = send_buffers.front().second;
// wait until send is completed
//
MPI_Status status;
CALL_MPI( MPI_Wait( &request, &status ) );
// free memory of send buffer
delete [] buffer;
// remove request handle and send buffer from list
send_buffers.pop_front();
// receive token translations from rank 0
error = !theTokenFactory->distTranslations();
}
}
}

Просмотреть файл

@ -78,95 +78,181 @@ TokenFactoryC::getScope( const DefRecTypeT & type ) const
#ifdef VT_MPI
bool
TokenFactoryC::share()
TokenFactoryC::distTranslations( const VT_MPI_INT & destRank,
const bool wait )
{
bool error = false;
assert( NumRanks > 1 );
assert( !m_def2scope.empty() );
// block until all ranks have reached this point
CALL_MPI( MPI_Barrier( MPI_COMM_WORLD ) );
// message tag to use for p2p communication
const VT_MPI_INT msg_tag = 200;
VPrint( 1, "Sharing token translation tables\n" );
char * buffer;
VT_MPI_INT buffer_pos;
VT_MPI_INT buffer_size;
MPI_Status status;
MASTER
{
assert( destRank != 0 );
// send token translation tables to given destination rank
//
PVPrint( 3, " Sending token translation tables to rank %d\n", destRank );
// request handle for non-blocking send
static MPI_Request request = MPI_REQUEST_NULL;
// send buffer
static char * buffer = 0;
// get stream ids associated with given destination rank
const std::set<uint32_t> & stream_ids = Rank2StreamIds[destRank];
// convert stream ids to master process ids
// (=keys of token translation tables)
//
std::set<uint32_t> mprocess_ids;
for( std::set<uint32_t>::const_iterator stream_it = stream_ids.begin();
stream_it != stream_ids.end(); stream_it++ )
mprocess_ids.insert( *stream_it & VT_TRACEID_BITMASK );
// get size needed for the send buffer
//
VT_MPI_INT size;
buffer_size = 0;
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator it =
m_def2scope.begin(); it != m_def2scope.end(); it++ )
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator scope_it =
m_def2scope.begin(); scope_it != m_def2scope.end(); scope_it++ )
{
// get scope
TokenFactoryScopeC<DefRec_BaseS> * scope =
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( it->second );
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( scope_it->second );
// get size of token translation map
buffer_size += scope->getPackSize();
// get size needed to pack the number of translation tables into
// the send buffer
//
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
buffer_size += size;
// get size needed to pack the token translation tables into the
// send buffer
//
for( std::set<uint32_t>::const_iterator proc_it = mprocess_ids.begin();
proc_it != mprocess_ids.end(); proc_it++ )
buffer_size += scope->getPackSize( *proc_it );
}
}
// broadcast buffer size
CALL_MPI( MPI_Bcast( &buffer_size, 1, MPI_INT, 0, MPI_COMM_WORLD ) );
// wait until previous send is completed and free memory of the
// send buffer
//
if( request != MPI_REQUEST_NULL )
{
assert( buffer );
// allocate memory for the send/receive buffer
//
buffer = new char[buffer_size];
assert( buffer );
CALL_MPI( MPI_Wait( &request, &status ) );
delete [] buffer;
}
// allocate memory for the send buffer
//
buffer = new char[buffer_size];
assert( buffer );
MASTER
{
// pack send buffer
//
buffer_pos = 0;
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator it =
m_def2scope.begin(); it != m_def2scope.end(); it++ )
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator scope_it =
m_def2scope.begin(); scope_it != m_def2scope.end(); scope_it++ )
{
// get scope
TokenFactoryScopeC<DefRec_BaseS> * scope =
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( it->second );
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( scope_it->second );
// pack token translation map to buffer
scope->pack( buffer, buffer_size, buffer_pos );
// pack number of token translation tables into the send buffer
//
uint32_t mprocess_size = mprocess_ids.size();
CALL_MPI( MPI_Pack( &mprocess_size, 1, MPI_UNSIGNED, buffer,
buffer_size, &buffer_pos, MPI_COMM_WORLD ) );
// pack token translation tables into the send buffer
//
for( std::set<uint32_t>::const_iterator proc_it = mprocess_ids.begin();
proc_it != mprocess_ids.end(); proc_it++ )
scope->pack( *proc_it, buffer, buffer_size, buffer_pos );
}
// send buffer
CALL_MPI( MPI_Isend( buffer, buffer_size, MPI_PACKED, destRank, msg_tag,
MPI_COMM_WORLD, &request ) );
// if it's the last send, wait until completion and free memory of the
// send buffer
//
if( wait )
{
CALL_MPI( MPI_Wait( &request, &status ) );
delete [] buffer;
}
}
// broadcast buffer
CALL_MPI( MPI_Bcast( buffer, buffer_size, MPI_PACKED, 0, MPI_COMM_WORLD ) );
SLAVE
else // SLAVE
{
// receive token translation tables from rank 0
//
PVPrint( 3, " Receiving token translation tables from rank 0\n" );
// receive buffer
char * buffer;
// test for a message from rank 0
CALL_MPI( MPI_Probe( 0, msg_tag, MPI_COMM_WORLD, &status ) );
// get size needed for the receive buffer
CALL_MPI( MPI_Get_count( &status, MPI_PACKED, &buffer_size ) );
// allocate memory for the receive buffer
//
buffer = new char[buffer_size];
assert( buffer );
// receive buffer
CALL_MPI( MPI_Recv( buffer, buffer_size, MPI_PACKED, 0, msg_tag,
MPI_COMM_WORLD, &status ) );
// unpack receive buffer
//
buffer_pos = 0;
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator it =
m_def2scope.begin(); it != m_def2scope.end(); it++ )
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator scope_it =
m_def2scope.begin(); scope_it != m_def2scope.end(); scope_it++ )
{
// get scope
TokenFactoryScopeC<DefRec_BaseS> * scope =
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( it->second );
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( scope_it->second );
// unpack token translation map from buffer
scope->unpack( buffer, buffer_size, buffer_pos );
// unpack the number of token translation tables from the
// receive buffer
uint32_t mprocess_size;
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos, &mprocess_size, 1,
MPI_UNSIGNED, MPI_COMM_WORLD ) );
// unpack token translation tables from the receive buffer
//
for( uint32_t i = 0; i < mprocess_size; i++ )
scope->unpack( buffer, buffer_size, buffer_pos );
}
// free memory of the receive buffer
delete [] buffer;
}
// free memory of send/receive buffer
delete [] buffer;
// SyncError( &error );
return !error;
}

Просмотреть файл

@ -39,8 +39,11 @@ public:
TokenFactoryScopeI * getScope( const DefRecTypeT & type ) const;
#ifdef VT_MPI
// share token translations to all ranks
bool share();
// distribute token translation tables
bool distTranslations( const VT_MPI_INT & destRank = 0,
const bool wait = false );
#endif // VT_MPI
private:

Просмотреть файл

@ -45,21 +45,23 @@ public:
// translate local to global token
virtual uint32_t translate( const uint32_t & process,
const uint32_t & localToken,
const bool & showError = true ) const = 0;
const bool showError = true ) const = 0;
// get next unused global token
virtual uint32_t getNextToken() = 0;
#ifdef VT_MPI
// get size needed to pack token translation map
virtual VT_MPI_INT getPackSize() = 0;
// get size needed to pack token translation tables of certain process into
// a buffer
virtual VT_MPI_INT getPackSize( const uint32_t & process ) = 0;
// pack token translations into a buffer
virtual void pack( char *& buffer, const VT_MPI_INT & bufferSize,
VT_MPI_INT & bufferPos ) = 0;
// pack token translation tables of certain process into a buffer
virtual void pack( const uint32_t & process, char *& buffer,
const VT_MPI_INT & bufferSize, VT_MPI_INT & bufferPos,
const bool clear = true ) = 0;
// unpack token translations from a buffer
// unpack token translation tables from a buffer
virtual void unpack( char *& buffer, const VT_MPI_INT & bufferSize,
VT_MPI_INT & bufferPos ) = 0;
@ -91,30 +93,32 @@ public:
// translate local to global token
inline uint32_t translate( const uint32_t & process,
const uint32_t & localToken,
const bool & showError = true ) const;
const bool showError = true ) const;
// get next unused global token
inline uint32_t getNextToken();
#ifdef VT_MPI
// get size needed to pack token translation map
VT_MPI_INT getPackSize();
// get size needed to pack token translation tables of certain process into
// a buffer
VT_MPI_INT getPackSize( const uint32_t & process );
// pack token translations into a buffer
void pack( char *& buffer, const VT_MPI_INT & bufferSize,
VT_MPI_INT & bufferPos );
// pack token translation tables of certain process into a buffer
void pack( const uint32_t & process, char *& buffer,
const VT_MPI_INT & bufferSize, VT_MPI_INT & bufferPos,
const bool clear = true );
// unpack token translations from a buffer
// unpack token translation tables from a buffer
void unpack( char *& buffer, const VT_MPI_INT & bufferSize,
VT_MPI_INT & bufferPos );
VT_MPI_INT & bufferPos );
#endif // VT_MPI
private:
// map process id <-> local/global token
std::map<uint32_t, std::map<uint32_t, uint32_t> > m_mapLocGlobToken;
// map process id <-> map local/global token
std::map<uint32_t, std::map<uint32_t, uint32_t> > m_proc2TokenMap;
// pointer to target global definitions
std::set<T> * m_globDefs;

Просмотреть файл

@ -79,13 +79,13 @@ TokenFactoryScopeC<T>::setTranslation( const uint32_t & process,
uint32_t mprocess = process & VT_TRACEID_BITMASK;
// set token translation
m_mapLocGlobToken[mprocess][localToken] = globalToken;
m_proc2TokenMap[mprocess][localToken] = globalToken;
}
template <class T>
uint32_t
TokenFactoryScopeC<T>::translate( const uint32_t & process,
const uint32_t & localToken, const bool & showError ) const
const uint32_t & localToken, const bool showError ) const
{
uint32_t global_token = 0;
@ -94,10 +94,10 @@ TokenFactoryScopeC<T>::translate( const uint32_t & process,
// search token mappings of process
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator
proc_it = m_mapLocGlobToken.find( mprocess );
proc_it = m_proc2TokenMap.find( mprocess );
// found?
if( proc_it != m_mapLocGlobToken.end() )
if( proc_it != m_proc2TokenMap.end() )
{
// search token mapping by local token
std::map<uint32_t, uint32_t>::const_iterator map_it =
@ -131,37 +131,28 @@ TokenFactoryScopeC<T>::getNextToken()
template <class T>
VT_MPI_INT
TokenFactoryScopeC<T>::getPackSize()
TokenFactoryScopeC<T>::getPackSize( const uint32_t & process )
{
VT_MPI_INT buffer_size;
VT_MPI_INT buffer_size = 0;
VT_MPI_INT size;
// m_mapLocGlobToken.size()
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &buffer_size ) );
// m_mapLocGlobToken
// process + m_proc2TokenMap[process].size()
//
if( m_mapLocGlobToken.size() > 0 )
CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
buffer_size += size;
// get token translation table of process
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator
token_map_it = m_proc2TokenMap.find( process );
// m_proc2TokenMap[process]
//
if( token_map_it != m_proc2TokenMap.end() &&
!token_map_it->second.empty() )
{
VT_MPI_INT size;
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator proc_it;
for( proc_it = m_mapLocGlobToken.begin();
proc_it != m_mapLocGlobToken.end(); proc_it++ )
{
// m_mapLocGlobToken[].first + m_mapLocGlobToken[].second.size()
//
CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
buffer_size += size;
if( proc_it->second.size() > 0 )
{
// m_mapLocGlobToken[].second
//
CALL_MPI( MPI_Pack_size( (VT_MPI_INT)proc_it->second.size() * 2,
MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
buffer_size += size;
}
}
CALL_MPI( MPI_Pack_size( token_map_it->second.size() * 2, MPI_UNSIGNED,
MPI_COMM_WORLD, &size ) );
buffer_size += size;
}
return buffer_size;
@ -169,64 +160,43 @@ TokenFactoryScopeC<T>::getPackSize()
template <class T>
void
TokenFactoryScopeC<T>::pack( char *& buffer, const VT_MPI_INT & bufferSize,
VT_MPI_INT & bufferPos )
TokenFactoryScopeC<T>::pack( const uint32_t & process,
char *& buffer, const VT_MPI_INT & bufferSize, VT_MPI_INT & bufferPos,
const bool clear )
{
// m_mapLocGlobToken.size()
// process
CALL_MPI( MPI_Pack( const_cast<uint32_t*>( &process ), 1, MPI_UNSIGNED,
buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) );
// get token translation table of process
std::map<uint32_t, std::map<uint32_t, uint32_t> >::iterator token_map_it =
m_proc2TokenMap.find( process );
// m_proc2TokenMap[process].size()
//
uint32_t proc_map_size = m_mapLocGlobToken.size();
CALL_MPI( MPI_Pack( &proc_map_size, 1, MPI_UNSIGNED, buffer, bufferSize,
uint32_t token_map_size =
( token_map_it != m_proc2TokenMap.end() ) ?
token_map_it->second.size() : 0;
CALL_MPI( MPI_Pack( &token_map_size, 1, MPI_UNSIGNED, buffer, bufferSize,
&bufferPos, MPI_COMM_WORLD ) );
// m_mapLocGlobToken
// m_proc2TokenMap[process]
//
if( proc_map_size > 0 )
if( token_map_it != m_proc2TokenMap.end() )
{
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator proc_it;
for( proc_it = m_mapLocGlobToken.begin();
proc_it != m_mapLocGlobToken.end(); proc_it++ )
for( std::map<uint32_t, uint32_t>::const_iterator token_pair_it =
token_map_it->second.begin();
token_pair_it != token_map_it->second.end(); token_pair_it++ )
{
// m_mapLocGlobToken[].first
//
uint32_t proc = proc_it->first;
CALL_MPI( MPI_Pack( &proc, 1, MPI_UNSIGNED, buffer, bufferSize,
&bufferPos, MPI_COMM_WORLD ) );
// m_mapLocGlobToken[].second.size()
//
uint32_t token_map_size = proc_it->second.size();
CALL_MPI( MPI_Pack( &token_map_size, 1, MPI_UNSIGNED, buffer,
uint32_t token_pair[2] =
{ token_pair_it->first, token_pair_it->second };
CALL_MPI( MPI_Pack( token_pair, 2, MPI_UNSIGNED, buffer,
bufferSize, &bufferPos, MPI_COMM_WORLD ) );
// m_mapLocGlobToken[].second
//
if( token_map_size > 0 )
{
uint32_t * token_map_firsts = new uint32_t[token_map_size];
uint32_t * token_map_seconds = new uint32_t[token_map_size];
std::map<uint32_t, uint32_t>::const_iterator tk_it;
uint32_t i;
for( tk_it = proc_it->second.begin(), i = 0;
tk_it != proc_it->second.end(), i < token_map_size;
tk_it++, i++ )
{
token_map_firsts[i] = tk_it->first;
token_map_seconds[i] = tk_it->second;
}
CALL_MPI( MPI_Pack( token_map_firsts, (VT_MPI_INT)token_map_size,
MPI_UNSIGNED, buffer, bufferSize, &bufferPos,
MPI_COMM_WORLD ) );
CALL_MPI( MPI_Pack( token_map_seconds, (VT_MPI_INT)token_map_size,
MPI_UNSIGNED, buffer, bufferSize, &bufferPos,
MPI_COMM_WORLD ) );
delete [] token_map_firsts;
delete [] token_map_seconds;
}
}
// clear token translation table of certain process id
if( clear )
m_proc2TokenMap.erase( token_map_it );
}
}
@ -235,52 +205,30 @@ void
TokenFactoryScopeC<T>::unpack( char *& buffer, const VT_MPI_INT & bufferSize,
VT_MPI_INT & bufferPos )
{
// m_mapLocGlobToken.size()
// process
//
uint32_t proc_map_size;
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &proc_map_size, 1,
uint32_t process;
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &process, 1,
MPI_UNSIGNED, MPI_COMM_WORLD ) );
// m_mapLocGlobToken
// m_proc2TokenMap[process].size()
//
if( proc_map_size > 0 )
uint32_t token_map_size;
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &token_map_size, 1,
MPI_UNSIGNED, MPI_COMM_WORLD ) );
// m_proc2TokenMap[process]
//
if( token_map_size > 0 )
{
for( uint32_t i = 0; i < proc_map_size; i++ )
for( uint32_t i = 0; i < token_map_size; i++ )
{
// m_mapLocGlobToken[].first
//
uint32_t proc;
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &proc, 1,
uint32_t token_pair[2];
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, token_pair, 2,
MPI_UNSIGNED, MPI_COMM_WORLD ) );
// m_mapLocGlobToken[].second.size()
//
uint32_t token_map_size;
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &token_map_size,
1, MPI_UNSIGNED, MPI_COMM_WORLD ) );
// m_mapLocGlobToken[].second
//
if( token_map_size > 0 )
{
uint32_t * token_map_firsts = new uint32_t[token_map_size];
uint32_t * token_map_seconds = new uint32_t[token_map_size];
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos,
token_map_firsts, (VT_MPI_INT)token_map_size,
MPI_UNSIGNED, MPI_COMM_WORLD ) );
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos,
token_map_seconds, (VT_MPI_INT)token_map_size,
MPI_UNSIGNED, MPI_COMM_WORLD ) );
// set token translations for process
//
for( uint32_t j = 0; j < token_map_size; j++ )
setTranslation( proc, token_map_firsts[j], token_map_seconds[j] );
delete [] token_map_firsts;
delete [] token_map_seconds;
}
// set token translation
m_proc2TokenMap[process][token_pair[0]] = token_pair[1];
}
}
}

Просмотреть файл

@ -1968,9 +1968,8 @@ VT_MPI_INT MPI_Waitall( VT_MPI_INT count,
time = vt_pform_wtime();
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_WAITALL]);
if (array_of_statuses == MPI_STATUSES_IGNORE) {
if (array_of_statuses == MPI_STATUSES_IGNORE)
array_of_statuses = vt_get_status_array(count);
}
vt_save_request_array(requests, count);
CALL_PMPI_3(MPI_Waitall, count, requests, array_of_statuses,
@ -2062,9 +2061,8 @@ VT_MPI_INT MPI_Waitsome( VT_MPI_INT incount,
time = vt_pform_wtime();
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_WAITSOME]);
if (array_of_statuses == MPI_STATUSES_IGNORE) {
if (array_of_statuses == MPI_STATUSES_IGNORE)
array_of_statuses = vt_get_status_array(incount);
}
vt_save_request_array(array_of_requests, incount);
CALL_PMPI_5(MPI_Waitsome, incount, array_of_requests, outcount,
@ -2202,9 +2200,8 @@ VT_MPI_INT MPI_Testall( VT_MPI_INT count,
time = vt_pform_wtime();
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_TESTALL]);
if (array_of_statuses == MPI_STATUSES_IGNORE) {
if (array_of_statuses == MPI_STATUSES_IGNORE)
array_of_statuses = vt_get_status_array(count);
}
vt_save_request_array(array_of_requests, count);
CALL_PMPI_4(MPI_Testall, count, array_of_requests, flag,
@ -2256,9 +2253,8 @@ VT_MPI_INT MPI_Testsome( VT_MPI_INT incount,
time = vt_pform_wtime();
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_TESTSOME]);
if (array_of_statuses == MPI_STATUSES_IGNORE) {
if (array_of_statuses == MPI_STATUSES_IGNORE)
array_of_statuses = vt_get_status_array(incount);
}
vt_save_request_array(array_of_requests, incount);
CALL_PMPI_5(MPI_Testsome, incount, array_of_requests, outcount,
@ -2911,12 +2907,15 @@ VT_MPI_INT MPI_Gather( void* sendbuf,
PMPI_Type_size(sendtype, &ssz);
PMPI_Comm_rank(comm, &me);
if ( me == root ) {
PMPI_Comm_size(comm, &N);
PMPI_Type_size(recvtype, &rsz);
} else {
N = rsz = 0;
}
if ( me == root )
{
PMPI_Comm_size(comm, &N);
PMPI_Type_size(recvtype, &rsz);
}
else
{
N = rsz = 0;
}
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
vt_mpi_regid[VT__MPI_GATHER], matchid,
@ -3035,10 +3034,15 @@ VT_MPI_INT MPI_Gatherv( void* sendbuf,
{
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTTHRD_MY_VTTHRD);
PMPI_Comm_size(comm, &N);
PMPI_Comm_rank(comm, &me);
recvcount = 0;
for(i = 0; i<N; i++) recvcount += recvcounts[i];
recvcount = recvsz = 0;
if (me == root)
{
PMPI_Comm_size(comm, &N);
PMPI_Type_size(recvtype, &recvsz);
for(i = 0; i<N; i++) recvcount += recvcounts[i];
}
#if defined(HAVE_DECL_MPI_IN_PLACE) && HAVE_DECL_MPI_IN_PLACE
if (sendbuf == MPI_IN_PLACE)
@ -3048,19 +3052,7 @@ VT_MPI_INT MPI_Gatherv( void* sendbuf,
}
#endif /* HAVE_DECL_MPI_IN_PLACE */
PMPI_Type_size(recvtype, &recvsz);
PMPI_Type_size(sendtype, &sendsz);
PMPI_Comm_rank(comm, &me);
recvsz = 0;
if ( me == root )
{
PMPI_Type_size(recvtype, &recvsz);
}
else
{
recvcount = 0;
}
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
vt_mpi_regid[VT__MPI_GATHERV], matchid,
@ -3441,12 +3433,15 @@ VT_MPI_INT MPI_Scatter( void* sendbuf,
PMPI_Type_size(recvtype, &recvsz);
PMPI_Comm_rank(comm, &me);
if ( me == root ) {
PMPI_Comm_size(comm, &N);
PMPI_Type_size(sendtype, &sendsz);
} else {
N = sendsz = 0;
}
if ( me == root )
{
PMPI_Comm_size(comm, &N);
PMPI_Type_size(sendtype, &sendsz);
}
else
{
N = sendsz = 0;
}
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
vt_mpi_regid[VT__MPI_SCATTER], matchid,
@ -3505,10 +3500,15 @@ VT_MPI_INT MPI_Scatterv( void* sendbuf,
{
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTTHRD_MY_VTTHRD);
PMPI_Comm_size(comm, &N);
PMPI_Comm_rank(comm, &me);
sendcount = 0;
for(i = 0; i<N; i++) sendcount += sendcounts[i];
sendcount = sendsz = 0;
if (me == root)
{
PMPI_Comm_size(comm, &N);
PMPI_Type_size(sendtype, &sendsz);
for(i = 0; i<N; i++) sendcount += sendcounts[i];
}
#if defined(HAVE_DECL_MPI_IN_PLACE) && HAVE_DECL_MPI_IN_PLACE
if (recvbuf == MPI_IN_PLACE)
@ -3518,17 +3518,7 @@ VT_MPI_INT MPI_Scatterv( void* sendbuf,
}
#endif /* HAVE_DECL_MPI_IN_PLACE */
sendsz = 0;
PMPI_Type_size(recvtype, &recvsz);
PMPI_Comm_rank(comm, &me);
if ( me == root )
{
PMPI_Type_size(sendtype, &sendsz);
}
else
{
sendcount = 0;
}
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
vt_mpi_regid[VT__MPI_SCATTERV], matchid,

Просмотреть файл

@ -839,6 +839,7 @@ static void unify_traces(void)
}
argc++;
if ((vt_env_mode() & VT_MODE_TRACE) == 0) argv[argc++] = strdup("--stats");
#if defined(HAVE_ZLIB) && HAVE_ZLIB
if (!vt_env_compression()) argv[argc++] = strdup("--nocompress");
#endif /* HAVE_ZLIB */