Changes to VT:
- fixed a bug (potential segfault) in the MPI wrapper functions MPI_Gatherv and MPI_Scatterv which occurred due to illegal access to insignificant parameters on non-root ranks - vtdyn: - stop instrumenting if an error occurred during finalizing instrumentation set - vtunify-mpi: - added option '--stats' to unify only summarized information, no events - reduced memory usage on rank 0: immediately send token translation tables to the corresponding worker ranks when they are complete - send the "finished-flag" together with the last set of definitions read to rank 0 instead of sending an extra message - OPARI: - fixed detection of DO loop beginnings; If there is a variable which contains "do" in its name it was detected as DO loop :-( - fixed processing of Fortran line-continuation appearing after a complete OpenMP directive This commit was SVN r25584.
Этот коммит содержится в:
родитель
3e7ab1212a
Коммит
05549e889b
@ -1,5 +1,15 @@
|
||||
5.12openmpi
|
||||
- updated version of internal OTF to 1.10openmpi
|
||||
5.12.1openmpi
|
||||
- fixed a bug in the MPI wrapper functions MPI_Gatherv and MPI_Scatterv
|
||||
which occurred due to illegal access to insignificant parameters on
|
||||
non-root ranks
|
||||
- vtunify-mpi:
|
||||
- added option '--stats' to unify only summarized information,
|
||||
no events
|
||||
- reduced memory usage
|
||||
- OPARI: (see tools/opari/ChangeLog:20,21)
|
||||
|
||||
5.12
|
||||
- updated version of internal OTF to 1.10coelacanth
|
||||
(see extlib/otf/ChangeLog)
|
||||
- added support for CUDA runtime tracing via CUPTI callbacks
|
||||
- added support for process group counters
|
||||
|
@ -1 +1 @@
|
||||
5.12openmpi
|
||||
5.12.1openmpi
|
||||
|
@ -66,7 +66,7 @@ BODY { font-family: sans-serif; }
|
||||
<P>
|
||||
|
||||
<P>
|
||||
<B><BIG CLASS="XHUGE">VampirTrace 5.12 User Manual</BIG></B>
|
||||
<B><BIG CLASS="XHUGE">VampirTrace 5.12.1 User Manual</BIG></B>
|
||||
<BR>
|
||||
<BR>
|
||||
<BR>
|
||||
@ -4148,6 +4148,8 @@ options:
|
||||
-q, --quiet Enable quiet mode.
|
||||
(only emergency output)
|
||||
|
||||
--stats Unify only summarized information (*.stats), no events
|
||||
|
||||
--nocompress Don't compress output trace files.
|
||||
|
||||
--nomsgmatch Don't match messages.
|
||||
|
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичные данные
ompi/contrib/vt/vt/doc/UserManual.pdf
Двоичный файл не отображается.
@ -1,3 +1,17 @@
|
||||
21. Fixed processing of Fortran line-continuation
|
||||
appearing after a complete OpenMP directive
|
||||
|
||||
For example
|
||||
|
||||
!$omp parallel
|
||||
!$omp do private(a,b,c, &
|
||||
!$omp d,e,f)
|
||||
|
||||
will not cause an "unbalanced pragma/directive nesting" error anymore.
|
||||
|
||||
20. Fixed Fortran parsing
|
||||
for detecting DO loop beginnings
|
||||
|
||||
19. Revised 18.
|
||||
to get back support for comma-separated OpenMP clauses.
|
||||
|
||||
|
@ -1,3 +1,17 @@
|
||||
21. Fixed processing of Fortran line-continuation
|
||||
appearing after a complete OpenMP directive
|
||||
|
||||
For example
|
||||
|
||||
!$omp parallel
|
||||
!$omp do private(a,b,c, &
|
||||
!$omp d,e,f)
|
||||
|
||||
will not cause an "unbalanced pragma/directive nesting" error anymore.
|
||||
|
||||
20. Fixed Fortran parsing
|
||||
for detecting DO loop beginnings
|
||||
|
||||
19. Revised 18.
|
||||
to get back support for comma-separated OpenMP clauses.
|
||||
|
||||
|
@ -206,7 +206,7 @@ namespace {
|
||||
pragma->find_name();
|
||||
pragma->pline=save_pline; // reset parse position
|
||||
pragma->ppos=save_ppos;
|
||||
if(pragma->name.find("do")!=string::npos) {
|
||||
if(pragma->name.find("do ")!=string::npos) {
|
||||
linetype=PRAGMA_LOOPSTART;
|
||||
if(pragma->name == "enddo") linetype=PRAGMA_LOOPEND;
|
||||
else if(pragma->name == "paralleldo") linetype=PRAGMA_PARLOOPSTART;
|
||||
@ -416,10 +416,16 @@ void process_fortran(istream& is, const char* infile, ostream& os,
|
||||
// continuation directive line
|
||||
currPragma->lines.push_back(lowline);
|
||||
} else {
|
||||
if ( currPragma ) {
|
||||
delete currPragma;
|
||||
}
|
||||
// new directive
|
||||
if ( currPragma ) {
|
||||
// if necessary process last complete directive
|
||||
typeOfLastLine = check_pragma(currPragma);
|
||||
test_and_insert_ompenddo(os, typeOfLastLine, waitforOMPEndDo,
|
||||
infile, currPragma->lineno, pragma_indent,
|
||||
pomp, addSharedDecl);
|
||||
process_pragma(currPragma, os);
|
||||
currPragma = 0;
|
||||
}
|
||||
currPragma
|
||||
= new OMPragmaF(infile, lineno, pstart+5+pomp, lowline, pomp,
|
||||
addSharedDecl);
|
||||
|
@ -353,13 +353,14 @@ MutatorC::run()
|
||||
|
||||
// finalize insertion set
|
||||
//
|
||||
if( !error && !m_appAddrSpace->finalizeInsertionSet( true, 0 ) )
|
||||
if( !m_appAddrSpace->finalizeInsertionSet( true, 0 ) )
|
||||
{
|
||||
std::cerr << ExeName << ": [" << ExePid << "]: "
|
||||
<< "Error: Could not finalize instrumentation set for "
|
||||
<< "function '" << inst_funcs[i].name << "'. Aborting."
|
||||
<< std::endl;
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
inst_funcs.clear();
|
||||
|
@ -113,6 +113,9 @@ std::vector<uint32_t> MyStreamIds;
|
||||
|
||||
// map stream id <-> processing MPI-rank
|
||||
std::map<uint32_t, VT_MPI_INT> StreamId2Rank;
|
||||
|
||||
// map MPI-rank <-> stream ids
|
||||
std::map<VT_MPI_INT, std::set<uint32_t> > Rank2StreamIds;
|
||||
#endif // VT_MPI
|
||||
|
||||
int
|
||||
@ -201,10 +204,6 @@ VTUNIFY_MAIN( int argc, char ** argv )
|
||||
#ifdef VT_MPI
|
||||
if( NumRanks > 1 )
|
||||
{
|
||||
// share token translations to all ranks
|
||||
if( (error = !theTokenFactory->share()) )
|
||||
break;
|
||||
|
||||
// share user communication ids to all ranks
|
||||
if( (error = !theUserCom->share()) )
|
||||
break;
|
||||
@ -212,7 +211,7 @@ VTUNIFY_MAIN( int argc, char ** argv )
|
||||
#endif // VT_MPI
|
||||
|
||||
// unify events
|
||||
if( (error = !theEvents->run()) )
|
||||
if( !Params.onlystats && (error = !theEvents->run()) )
|
||||
break;
|
||||
|
||||
// unify statistics
|
||||
@ -756,6 +755,9 @@ getUnifyControls()
|
||||
// set stream id/rank mapping
|
||||
StreamId2Rank[uctl->streamid] = rank;
|
||||
|
||||
// add stream id to processing rank
|
||||
Rank2StreamIds[rank].insert( uctl->streamid );
|
||||
|
||||
// get rank for the next stream id
|
||||
//
|
||||
if( i < UnifyCtls.size() - 1 && UnifyCtls[i+1]->pstreamid == 0 )
|
||||
@ -892,6 +894,10 @@ parseCommandLine( int argc, char ** argv )
|
||||
Params.showprogress = false;
|
||||
Params.verbose_level = 0;
|
||||
}
|
||||
else if( strcmp( argv[i], "--stats" ) == 0 )
|
||||
{
|
||||
Params.onlystats = true;
|
||||
}
|
||||
else if( strcmp( argv[i], "-v" ) == 0
|
||||
|| strcmp( argv[i], "--verbose" ) == 0 )
|
||||
{
|
||||
@ -1044,7 +1050,7 @@ cleanUp()
|
||||
break;
|
||||
|
||||
// rename temporary event output files
|
||||
if( (error = !theEvents->cleanUp()) )
|
||||
if( !Params.onlystats && (error = !theEvents->cleanUp()) )
|
||||
break;
|
||||
|
||||
// rename temporary statistic output files
|
||||
@ -1140,6 +1146,8 @@ showUsage()
|
||||
<< " -q, --quiet Enable quiet mode." << std::endl
|
||||
<< " (only emergency output)" << std::endl
|
||||
<< std::endl
|
||||
<< " --stats Unify only summarized information (*.stats), no events" << std::endl
|
||||
<< std::endl
|
||||
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||
<< " --nocompress Don't compress output trace files." << std::endl
|
||||
<< std::endl
|
||||
@ -1174,8 +1182,8 @@ shareParams()
|
||||
//
|
||||
|
||||
char **filenames;
|
||||
char flags[8];
|
||||
VT_MPI_INT blockcounts[4] = { 3*1024, 1, 1, 8 };
|
||||
char flags[9];
|
||||
VT_MPI_INT blockcounts[4] = { 3*1024, 1, 1, 9 };
|
||||
MPI_Aint displ[4];
|
||||
MPI_Datatype oldtypes[4] =
|
||||
{ MPI_CHAR, MPI_UNSIGNED_SHORT, MPI_INT,
|
||||
@ -1210,8 +1218,9 @@ shareParams()
|
||||
flags[3] = (char)Params.showversion;
|
||||
flags[4] = (char)Params.showprogress;
|
||||
flags[5] = (char)Params.bequiet;
|
||||
flags[6] = (char)Params.domsgmatch;
|
||||
flags[7] = (char)Params.droprecvs;
|
||||
flags[6] = (char)Params.onlystats;
|
||||
flags[7] = (char)Params.domsgmatch;
|
||||
flags[8] = (char)Params.droprecvs;
|
||||
}
|
||||
|
||||
// share unify parameters
|
||||
@ -1230,8 +1239,9 @@ shareParams()
|
||||
Params.showversion = (flags[3] == 1);
|
||||
Params.showprogress = (flags[4] == 1);
|
||||
Params.bequiet = (flags[5] == 1);
|
||||
Params.domsgmatch = (flags[6] == 1);
|
||||
Params.droprecvs = (flags[7] == 1);
|
||||
Params.onlystats = (flags[6] == 1);
|
||||
Params.domsgmatch = (flags[7] == 1);
|
||||
Params.droprecvs = (flags[8] == 1);
|
||||
}
|
||||
|
||||
delete [] filenames[0];
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "vt_inttypes.h"
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -65,8 +66,8 @@ struct ParamsS
|
||||
ParamsS()
|
||||
: verbose_level( 0 ), docompress( false ), doclean( true ),
|
||||
showusage( false ), showversion( false ), showprogress( false ),
|
||||
bequiet( false ), domsgmatch( false ), droprecvs( false ),
|
||||
prof_sort_flags( 0x22 ), createthumb( false )
|
||||
bequiet( false ), onlystats( false ), domsgmatch( false ),
|
||||
droprecvs( false ), prof_sort_flags( 0x22 ), createthumb( false )
|
||||
{
|
||||
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||
docompress = true;
|
||||
@ -90,6 +91,7 @@ struct ParamsS
|
||||
bool showversion; // flag: show VampirTrace version?
|
||||
bool showprogress; // flag: show progress?
|
||||
bool bequiet; // flag: print no messages?
|
||||
bool onlystats; // flag: unify only summarized information?
|
||||
|
||||
// HooksMsgMatchC's parameters
|
||||
//
|
||||
@ -212,6 +214,9 @@ extern std::vector<uint32_t> MyStreamIds;
|
||||
|
||||
// map stream id <-> processing MPI-rank
|
||||
extern std::map<uint32_t, VT_MPI_INT> StreamId2Rank;
|
||||
|
||||
// map MPI-rank <-> stream ids
|
||||
extern std::map<VT_MPI_INT, std::set<uint32_t> > Rank2StreamIds;
|
||||
#endif // VT_MPI
|
||||
|
||||
#endif // _VT_UNIFY_H_
|
||||
|
@ -396,8 +396,8 @@ DefinitionsC::readLocal()
|
||||
if( (error = !readLocal( MyStreamIds[i], loc_defs )) )
|
||||
break;
|
||||
|
||||
// abort loop, if next stream isn't a child
|
||||
if( i < MyStreamIds.size() - 1 &&
|
||||
// continue reading if the next stream is a child
|
||||
if( i == MyStreamIds.size() - 1 ||
|
||||
StreamId2UnifyCtl[MyStreamIds[i+1]]->pstreamid == 0 )
|
||||
break;
|
||||
}
|
||||
@ -408,21 +408,23 @@ DefinitionsC::readLocal()
|
||||
defs_read = loc_defs.size() - defs_read;
|
||||
|
||||
// continue, if nothing is read
|
||||
if( ( i >= MyStreamIds.size() - 1 && loc_defs.empty() ) ||
|
||||
( i < MyStreamIds.size() - 1 && defs_read == 0 ) )
|
||||
if( i < MyStreamIds.size() - 1 && defs_read == 0 )
|
||||
continue;
|
||||
|
||||
// pre-sort subset of local definitions
|
||||
//
|
||||
if( defs_read > 0 )
|
||||
{
|
||||
// pre-sort subset of local definitions
|
||||
//
|
||||
|
||||
// get begin iterator of subset
|
||||
//
|
||||
LargeVectorC<DefRec_BaseS*>::iterator sort_begin_it = loc_defs.begin();
|
||||
if( loc_defs.size() != defs_read )
|
||||
sort_begin_it += ( loc_defs.size() - defs_read - 1 );
|
||||
// get begin iterator of subset
|
||||
//
|
||||
LargeVectorC<DefRec_BaseS*>::iterator sort_begin_it = loc_defs.begin();
|
||||
if( loc_defs.size() != defs_read )
|
||||
sort_begin_it += ( loc_defs.size() - defs_read - 1 );
|
||||
|
||||
// pre-sort
|
||||
std::sort( sort_begin_it, loc_defs.end(), DefRec_LocCmp );
|
||||
// pre-sort
|
||||
std::sort( sort_begin_it, loc_defs.end(), DefRec_LocCmp );
|
||||
}
|
||||
|
||||
MASTER
|
||||
{
|
||||
@ -476,17 +478,15 @@ DefinitionsC::readLocal()
|
||||
// loc_defs
|
||||
//
|
||||
for( uint32_t j = 0; j < loc_defs.size(); j++ )
|
||||
{
|
||||
// definition type (loc_defs[j]->dtype)
|
||||
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
|
||||
// loc_defs[j]
|
||||
buffer_size += loc_defs[j]->getPackSize();
|
||||
}
|
||||
|
||||
// finished flag
|
||||
//
|
||||
CALL_MPI( MPI_Pack_size( 1, MPI_CHAR, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
|
||||
// continue reading, if minimum buffer size isn't reached
|
||||
if( i < MyStreamIds.size() && buffer_size < min_msg_size )
|
||||
if( i < MyStreamIds.size() - 1 && buffer_size < min_msg_size )
|
||||
continue;
|
||||
|
||||
// allocate memory for the send buffer
|
||||
@ -500,6 +500,7 @@ DefinitionsC::readLocal()
|
||||
buffer_pos = 0;
|
||||
|
||||
// loc_defs.size()
|
||||
//
|
||||
uint32_t loc_defs_size = loc_defs.size();
|
||||
CALL_MPI( MPI_Pack( &loc_defs_size, 1, MPI_UNSIGNED, buffer,
|
||||
buffer_size, &buffer_pos, MPI_COMM_WORLD ) );
|
||||
@ -507,15 +508,13 @@ DefinitionsC::readLocal()
|
||||
// loc_defs
|
||||
//
|
||||
for( uint32_t j = 0; j < loc_defs.size(); j++ )
|
||||
{
|
||||
// definition type (loc_defs[j]->dtype)
|
||||
CALL_MPI( MPI_Pack( &(loc_defs[j]->dtype), 1, MPI_UNSIGNED,
|
||||
buffer, buffer_size, &buffer_pos,
|
||||
MPI_COMM_WORLD ) );
|
||||
|
||||
// loc_defs[j]
|
||||
loc_defs[j]->pack( buffer, buffer_size, buffer_pos );
|
||||
}
|
||||
|
||||
// finished flag
|
||||
//
|
||||
char finished = ( i == MyStreamIds.size() - 1 );
|
||||
CALL_MPI( MPI_Pack( &finished, 1, MPI_CHAR, buffer, buffer_size,
|
||||
&buffer_pos, MPI_COMM_WORLD ) );
|
||||
|
||||
// send buffer to rank 0
|
||||
//
|
||||
@ -539,25 +538,30 @@ DefinitionsC::readLocal()
|
||||
}
|
||||
|
||||
#ifdef VT_MPI
|
||||
SyncError( &error );
|
||||
|
||||
if( !error && NumRanks > 1 )
|
||||
// all ranks are finished reading local definitions at this point
|
||||
//
|
||||
|
||||
if( NumRanks > 1 && !SyncError( &error ) )
|
||||
{
|
||||
MASTER
|
||||
{
|
||||
// receive local definitions from all participating ranks
|
||||
//
|
||||
|
||||
// number of ranks finished
|
||||
VT_MPI_INT finished_ranks_num = 1; // 1=me
|
||||
// get number of finished ranks
|
||||
//
|
||||
VT_MPI_INT finished_ranks_num = 1; // =rank 0
|
||||
for( VT_MPI_INT i = 1; i < NumRanks; i++ )
|
||||
{
|
||||
if( Rank2StreamIds[i].empty() ) // rank i has nothing to do?
|
||||
finished_ranks_num++;
|
||||
}
|
||||
|
||||
// repeat until all ranks are finished reading local definitions
|
||||
//
|
||||
while( finished_ranks_num < NumRanks )
|
||||
{
|
||||
// source rank finished?
|
||||
bool finished = false;
|
||||
|
||||
char * buffer;
|
||||
VT_MPI_INT buffer_size;
|
||||
VT_MPI_INT buffer_pos;
|
||||
@ -579,6 +583,9 @@ DefinitionsC::readLocal()
|
||||
buffer = new char[buffer_size];
|
||||
assert( buffer );
|
||||
|
||||
PVPrint( 3, " Receiving local definitions from rank %d\n",
|
||||
rank );
|
||||
|
||||
// receive buffer
|
||||
CALL_MPI( MPI_Recv( buffer, buffer_size, MPI_PACKED, rank, msg_tag,
|
||||
MPI_COMM_WORLD, &status ) );
|
||||
@ -589,28 +596,23 @@ DefinitionsC::readLocal()
|
||||
buffer_pos = 0;
|
||||
|
||||
// loc_defs.size()
|
||||
//
|
||||
uint32_t loc_defs_size;
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos,
|
||||
&loc_defs_size, 1, MPI_UNSIGNED,
|
||||
MPI_COMM_WORLD ) );
|
||||
|
||||
// is source rank finished?
|
||||
if( loc_defs_size == 0 )
|
||||
{
|
||||
finished = true;
|
||||
finished_ranks_num++;
|
||||
}
|
||||
else
|
||||
{
|
||||
PVPrint( 3, " Receiving local definitions from rank %d\n",
|
||||
rank );
|
||||
}
|
||||
|
||||
// loc_defs
|
||||
//
|
||||
for( uint32_t i = 0; i < loc_defs_size; i++ )
|
||||
{
|
||||
// definition type
|
||||
// (don't increment current buffer position;
|
||||
// def. type will be unpacked again by DefRec_*S::unpack())
|
||||
//
|
||||
DefRecTypeT def_type;
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos,
|
||||
VT_MPI_INT tmp_buffer_pos = buffer_pos;
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &tmp_buffer_pos,
|
||||
&def_type, 1, MPI_UNSIGNED,
|
||||
MPI_COMM_WORLD ) );
|
||||
|
||||
@ -725,77 +727,65 @@ DefinitionsC::readLocal()
|
||||
loc_defs.push_back( new_loc_def );
|
||||
}
|
||||
|
||||
// finished flag
|
||||
//
|
||||
char finished;
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos, &finished,
|
||||
1, MPI_CHAR, MPI_COMM_WORLD ) );
|
||||
|
||||
// free memory of receive buffer
|
||||
delete [] buffer;
|
||||
|
||||
if( !finished )
|
||||
{
|
||||
// add local to global definitions
|
||||
error = !processLocal( loc_defs );
|
||||
// add local to global definitions
|
||||
if( (error = !processLocal( loc_defs )) )
|
||||
break;
|
||||
|
||||
// free vector of local definitions
|
||||
//
|
||||
for( uint32_t i = 0; i < loc_defs.size(); i++ )
|
||||
delete loc_defs[i];
|
||||
loc_defs.clear();
|
||||
// free vector of local definitions
|
||||
//
|
||||
for( uint32_t i = 0; i < loc_defs.size(); i++ )
|
||||
delete loc_defs[i];
|
||||
loc_defs.clear();
|
||||
|
||||
// is source rank finished reading local definitions?
|
||||
if( finished )
|
||||
{
|
||||
// increment number of finished ranks
|
||||
finished_ranks_num++;
|
||||
|
||||
// send token translations to finished rank
|
||||
if( (error =
|
||||
!theTokenFactory->distTranslations( rank,
|
||||
finished_ranks_num == NumRanks )) )
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else // SLAVE
|
||||
{
|
||||
// send a notification to rank 0 that my rank is finished reading
|
||||
// local definitions
|
||||
// (empty vector of local definitions)
|
||||
if( !MyStreamIds.empty() )
|
||||
{
|
||||
char * buffer;
|
||||
VT_MPI_INT buffer_size;
|
||||
VT_MPI_INT buffer_pos = 0;
|
||||
// complete all sends and remove request handles and send buffers
|
||||
// from list
|
||||
while( send_buffers.size() > 0 )
|
||||
{
|
||||
// get the first request handle and send buffer from list
|
||||
//
|
||||
MPI_Request & request = send_buffers.front().first;
|
||||
char *& buffer = send_buffers.front().second;
|
||||
|
||||
// get size needed for the send buffer
|
||||
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD,
|
||||
&buffer_size ) );
|
||||
// wait until send is completed
|
||||
//
|
||||
MPI_Status status;
|
||||
CALL_MPI( MPI_Wait( &request, &status ) );
|
||||
|
||||
// allocate memory for the send buffer
|
||||
//
|
||||
buffer = new char[buffer_size];
|
||||
assert( buffer );
|
||||
// free memory of send buffer
|
||||
delete [] buffer;
|
||||
// remove request handle and send buffer from list
|
||||
send_buffers.pop_front();
|
||||
}
|
||||
|
||||
// pack send buffer
|
||||
//
|
||||
|
||||
uint32_t finished = 0;
|
||||
CALL_MPI( MPI_Pack( &finished, 1, MPI_UNSIGNED, buffer, buffer_size,
|
||||
&buffer_pos, MPI_COMM_WORLD ) );
|
||||
|
||||
// send buffer to rank 0
|
||||
//
|
||||
|
||||
MPI_Request request;
|
||||
CALL_MPI( MPI_Isend( buffer, buffer_size, MPI_PACKED, 0, msg_tag,
|
||||
MPI_COMM_WORLD, &request ) );
|
||||
|
||||
// add request handle and send buffer to list
|
||||
send_buffers.push_back( std::make_pair( request, buffer ) );
|
||||
}
|
||||
|
||||
// complete all sends and remove request handles and send buffers
|
||||
// from list
|
||||
while( send_buffers.size() > 0 )
|
||||
{
|
||||
// get the first request handle and send buffer from list
|
||||
//
|
||||
MPI_Request & request = send_buffers.front().first;
|
||||
char *& buffer = send_buffers.front().second;
|
||||
|
||||
// wait until send is completed
|
||||
//
|
||||
MPI_Status status;
|
||||
CALL_MPI( MPI_Wait( &request, &status ) );
|
||||
|
||||
// free memory of send buffer
|
||||
delete [] buffer;
|
||||
// remove request handle and send buffer from list
|
||||
send_buffers.pop_front();
|
||||
// receive token translations from rank 0
|
||||
error = !theTokenFactory->distTranslations();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -78,95 +78,181 @@ TokenFactoryC::getScope( const DefRecTypeT & type ) const
|
||||
#ifdef VT_MPI
|
||||
|
||||
bool
|
||||
TokenFactoryC::share()
|
||||
TokenFactoryC::distTranslations( const VT_MPI_INT & destRank,
|
||||
const bool wait )
|
||||
{
|
||||
bool error = false;
|
||||
|
||||
assert( NumRanks > 1 );
|
||||
assert( !m_def2scope.empty() );
|
||||
|
||||
// block until all ranks have reached this point
|
||||
CALL_MPI( MPI_Barrier( MPI_COMM_WORLD ) );
|
||||
// message tag to use for p2p communication
|
||||
const VT_MPI_INT msg_tag = 200;
|
||||
|
||||
VPrint( 1, "Sharing token translation tables\n" );
|
||||
|
||||
char * buffer;
|
||||
VT_MPI_INT buffer_pos;
|
||||
VT_MPI_INT buffer_size;
|
||||
MPI_Status status;
|
||||
|
||||
MASTER
|
||||
{
|
||||
assert( destRank != 0 );
|
||||
|
||||
// send token translation tables to given destination rank
|
||||
//
|
||||
|
||||
PVPrint( 3, " Sending token translation tables to rank %d\n", destRank );
|
||||
|
||||
// request handle for non-blocking send
|
||||
static MPI_Request request = MPI_REQUEST_NULL;
|
||||
|
||||
// send buffer
|
||||
static char * buffer = 0;
|
||||
|
||||
// get stream ids associated with given destination rank
|
||||
const std::set<uint32_t> & stream_ids = Rank2StreamIds[destRank];
|
||||
|
||||
// convert stream ids to master process ids
|
||||
// (=keys of token translation tables)
|
||||
//
|
||||
std::set<uint32_t> mprocess_ids;
|
||||
for( std::set<uint32_t>::const_iterator stream_it = stream_ids.begin();
|
||||
stream_it != stream_ids.end(); stream_it++ )
|
||||
mprocess_ids.insert( *stream_it & VT_TRACEID_BITMASK );
|
||||
|
||||
// get size needed for the send buffer
|
||||
//
|
||||
|
||||
VT_MPI_INT size;
|
||||
|
||||
buffer_size = 0;
|
||||
|
||||
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator it =
|
||||
m_def2scope.begin(); it != m_def2scope.end(); it++ )
|
||||
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator scope_it =
|
||||
m_def2scope.begin(); scope_it != m_def2scope.end(); scope_it++ )
|
||||
{
|
||||
// get scope
|
||||
TokenFactoryScopeC<DefRec_BaseS> * scope =
|
||||
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( it->second );
|
||||
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( scope_it->second );
|
||||
|
||||
// get size of token translation map
|
||||
buffer_size += scope->getPackSize();
|
||||
// get size needed to pack the number of translation tables into
|
||||
// the send buffer
|
||||
//
|
||||
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
|
||||
// get size needed to pack the token translation tables into the
|
||||
// send buffer
|
||||
//
|
||||
for( std::set<uint32_t>::const_iterator proc_it = mprocess_ids.begin();
|
||||
proc_it != mprocess_ids.end(); proc_it++ )
|
||||
buffer_size += scope->getPackSize( *proc_it );
|
||||
}
|
||||
}
|
||||
|
||||
// broadcast buffer size
|
||||
CALL_MPI( MPI_Bcast( &buffer_size, 1, MPI_INT, 0, MPI_COMM_WORLD ) );
|
||||
// wait until previous send is completed and free memory of the
|
||||
// send buffer
|
||||
//
|
||||
if( request != MPI_REQUEST_NULL )
|
||||
{
|
||||
assert( buffer );
|
||||
|
||||
// allocate memory for the send/receive buffer
|
||||
//
|
||||
buffer = new char[buffer_size];
|
||||
assert( buffer );
|
||||
CALL_MPI( MPI_Wait( &request, &status ) );
|
||||
delete [] buffer;
|
||||
}
|
||||
|
||||
// allocate memory for the send buffer
|
||||
//
|
||||
buffer = new char[buffer_size];
|
||||
assert( buffer );
|
||||
|
||||
MASTER
|
||||
{
|
||||
// pack send buffer
|
||||
//
|
||||
|
||||
buffer_pos = 0;
|
||||
|
||||
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator it =
|
||||
m_def2scope.begin(); it != m_def2scope.end(); it++ )
|
||||
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator scope_it =
|
||||
m_def2scope.begin(); scope_it != m_def2scope.end(); scope_it++ )
|
||||
{
|
||||
// get scope
|
||||
TokenFactoryScopeC<DefRec_BaseS> * scope =
|
||||
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( it->second );
|
||||
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( scope_it->second );
|
||||
|
||||
// pack token translation map to buffer
|
||||
scope->pack( buffer, buffer_size, buffer_pos );
|
||||
// pack number of token translation tables into the send buffer
|
||||
//
|
||||
uint32_t mprocess_size = mprocess_ids.size();
|
||||
CALL_MPI( MPI_Pack( &mprocess_size, 1, MPI_UNSIGNED, buffer,
|
||||
buffer_size, &buffer_pos, MPI_COMM_WORLD ) );
|
||||
|
||||
// pack token translation tables into the send buffer
|
||||
//
|
||||
for( std::set<uint32_t>::const_iterator proc_it = mprocess_ids.begin();
|
||||
proc_it != mprocess_ids.end(); proc_it++ )
|
||||
scope->pack( *proc_it, buffer, buffer_size, buffer_pos );
|
||||
}
|
||||
|
||||
// send buffer
|
||||
CALL_MPI( MPI_Isend( buffer, buffer_size, MPI_PACKED, destRank, msg_tag,
|
||||
MPI_COMM_WORLD, &request ) );
|
||||
|
||||
// if it's the last send, wait until completion and free memory of the
|
||||
// send buffer
|
||||
//
|
||||
if( wait )
|
||||
{
|
||||
CALL_MPI( MPI_Wait( &request, &status ) );
|
||||
delete [] buffer;
|
||||
}
|
||||
}
|
||||
|
||||
// broadcast buffer
|
||||
CALL_MPI( MPI_Bcast( buffer, buffer_size, MPI_PACKED, 0, MPI_COMM_WORLD ) );
|
||||
|
||||
SLAVE
|
||||
else // SLAVE
|
||||
{
|
||||
// receive token translation tables from rank 0
|
||||
//
|
||||
|
||||
PVPrint( 3, " Receiving token translation tables from rank 0\n" );
|
||||
|
||||
// receive buffer
|
||||
char * buffer;
|
||||
|
||||
// test for a message from rank 0
|
||||
CALL_MPI( MPI_Probe( 0, msg_tag, MPI_COMM_WORLD, &status ) );
|
||||
|
||||
// get size needed for the receive buffer
|
||||
CALL_MPI( MPI_Get_count( &status, MPI_PACKED, &buffer_size ) );
|
||||
|
||||
// allocate memory for the receive buffer
|
||||
//
|
||||
buffer = new char[buffer_size];
|
||||
assert( buffer );
|
||||
|
||||
// receive buffer
|
||||
CALL_MPI( MPI_Recv( buffer, buffer_size, MPI_PACKED, 0, msg_tag,
|
||||
MPI_COMM_WORLD, &status ) );
|
||||
|
||||
// unpack receive buffer
|
||||
//
|
||||
|
||||
buffer_pos = 0;
|
||||
|
||||
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator it =
|
||||
m_def2scope.begin(); it != m_def2scope.end(); it++ )
|
||||
for( std::map<DefRecTypeT, TokenFactoryScopeI*>::const_iterator scope_it =
|
||||
m_def2scope.begin(); scope_it != m_def2scope.end(); scope_it++ )
|
||||
{
|
||||
// get scope
|
||||
TokenFactoryScopeC<DefRec_BaseS> * scope =
|
||||
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( it->second );
|
||||
static_cast<TokenFactoryScopeC<DefRec_BaseS>*>( scope_it->second );
|
||||
|
||||
// unpack token translation map from buffer
|
||||
scope->unpack( buffer, buffer_size, buffer_pos );
|
||||
// unpack the number of token translation tables from the
|
||||
// receive buffer
|
||||
uint32_t mprocess_size;
|
||||
CALL_MPI( MPI_Unpack( buffer, buffer_size, &buffer_pos, &mprocess_size, 1,
|
||||
MPI_UNSIGNED, MPI_COMM_WORLD ) );
|
||||
|
||||
// unpack token translation tables from the receive buffer
|
||||
//
|
||||
for( uint32_t i = 0; i < mprocess_size; i++ )
|
||||
scope->unpack( buffer, buffer_size, buffer_pos );
|
||||
}
|
||||
|
||||
// free memory of the receive buffer
|
||||
delete [] buffer;
|
||||
}
|
||||
|
||||
// free memory of send/receive buffer
|
||||
delete [] buffer;
|
||||
|
||||
// SyncError( &error );
|
||||
|
||||
return !error;
|
||||
}
|
||||
|
||||
|
@ -39,8 +39,11 @@ public:
|
||||
TokenFactoryScopeI * getScope( const DefRecTypeT & type ) const;
|
||||
|
||||
#ifdef VT_MPI
|
||||
// share token translations to all ranks
|
||||
bool share();
|
||||
|
||||
// distribute token translation tables
|
||||
bool distTranslations( const VT_MPI_INT & destRank = 0,
|
||||
const bool wait = false );
|
||||
|
||||
#endif // VT_MPI
|
||||
|
||||
private:
|
||||
|
@ -45,21 +45,23 @@ public:
|
||||
// translate local to global token
|
||||
virtual uint32_t translate( const uint32_t & process,
|
||||
const uint32_t & localToken,
|
||||
const bool & showError = true ) const = 0;
|
||||
const bool showError = true ) const = 0;
|
||||
|
||||
// get next unused global token
|
||||
virtual uint32_t getNextToken() = 0;
|
||||
|
||||
#ifdef VT_MPI
|
||||
|
||||
// get size needed to pack token translation map
|
||||
virtual VT_MPI_INT getPackSize() = 0;
|
||||
// get size needed to pack token translation tables of certain process into
|
||||
// a buffer
|
||||
virtual VT_MPI_INT getPackSize( const uint32_t & process ) = 0;
|
||||
|
||||
// pack token translations into a buffer
|
||||
virtual void pack( char *& buffer, const VT_MPI_INT & bufferSize,
|
||||
VT_MPI_INT & bufferPos ) = 0;
|
||||
// pack token translation tables of certain process into a buffer
|
||||
virtual void pack( const uint32_t & process, char *& buffer,
|
||||
const VT_MPI_INT & bufferSize, VT_MPI_INT & bufferPos,
|
||||
const bool clear = true ) = 0;
|
||||
|
||||
// unpack token translations from a buffer
|
||||
// unpack token translation tables from a buffer
|
||||
virtual void unpack( char *& buffer, const VT_MPI_INT & bufferSize,
|
||||
VT_MPI_INT & bufferPos ) = 0;
|
||||
|
||||
@ -91,30 +93,32 @@ public:
|
||||
// translate local to global token
|
||||
inline uint32_t translate( const uint32_t & process,
|
||||
const uint32_t & localToken,
|
||||
const bool & showError = true ) const;
|
||||
const bool showError = true ) const;
|
||||
|
||||
// get next unused global token
|
||||
inline uint32_t getNextToken();
|
||||
|
||||
#ifdef VT_MPI
|
||||
|
||||
// get size needed to pack token translation map
|
||||
VT_MPI_INT getPackSize();
|
||||
// get size needed to pack token translation tables of certain process into
|
||||
// a buffer
|
||||
VT_MPI_INT getPackSize( const uint32_t & process );
|
||||
|
||||
// pack token translations into a buffer
|
||||
void pack( char *& buffer, const VT_MPI_INT & bufferSize,
|
||||
VT_MPI_INT & bufferPos );
|
||||
// pack token translation tables of certain process into a buffer
|
||||
void pack( const uint32_t & process, char *& buffer,
|
||||
const VT_MPI_INT & bufferSize, VT_MPI_INT & bufferPos,
|
||||
const bool clear = true );
|
||||
|
||||
// unpack token translations from a buffer
|
||||
// unpack token translation tables from a buffer
|
||||
void unpack( char *& buffer, const VT_MPI_INT & bufferSize,
|
||||
VT_MPI_INT & bufferPos );
|
||||
VT_MPI_INT & bufferPos );
|
||||
|
||||
#endif // VT_MPI
|
||||
|
||||
private:
|
||||
|
||||
// map process id <-> local/global token
|
||||
std::map<uint32_t, std::map<uint32_t, uint32_t> > m_mapLocGlobToken;
|
||||
// map process id <-> map local/global token
|
||||
std::map<uint32_t, std::map<uint32_t, uint32_t> > m_proc2TokenMap;
|
||||
|
||||
// pointer to target global definitions
|
||||
std::set<T> * m_globDefs;
|
||||
|
@ -79,13 +79,13 @@ TokenFactoryScopeC<T>::setTranslation( const uint32_t & process,
|
||||
uint32_t mprocess = process & VT_TRACEID_BITMASK;
|
||||
|
||||
// set token translation
|
||||
m_mapLocGlobToken[mprocess][localToken] = globalToken;
|
||||
m_proc2TokenMap[mprocess][localToken] = globalToken;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
uint32_t
|
||||
TokenFactoryScopeC<T>::translate( const uint32_t & process,
|
||||
const uint32_t & localToken, const bool & showError ) const
|
||||
const uint32_t & localToken, const bool showError ) const
|
||||
{
|
||||
uint32_t global_token = 0;
|
||||
|
||||
@ -94,10 +94,10 @@ TokenFactoryScopeC<T>::translate( const uint32_t & process,
|
||||
|
||||
// search token mappings of process
|
||||
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator
|
||||
proc_it = m_mapLocGlobToken.find( mprocess );
|
||||
proc_it = m_proc2TokenMap.find( mprocess );
|
||||
|
||||
// found?
|
||||
if( proc_it != m_mapLocGlobToken.end() )
|
||||
if( proc_it != m_proc2TokenMap.end() )
|
||||
{
|
||||
// search token mapping by local token
|
||||
std::map<uint32_t, uint32_t>::const_iterator map_it =
|
||||
@ -131,37 +131,28 @@ TokenFactoryScopeC<T>::getNextToken()
|
||||
|
||||
template <class T>
|
||||
VT_MPI_INT
|
||||
TokenFactoryScopeC<T>::getPackSize()
|
||||
TokenFactoryScopeC<T>::getPackSize( const uint32_t & process )
|
||||
{
|
||||
VT_MPI_INT buffer_size;
|
||||
VT_MPI_INT buffer_size = 0;
|
||||
VT_MPI_INT size;
|
||||
|
||||
// m_mapLocGlobToken.size()
|
||||
CALL_MPI( MPI_Pack_size( 1, MPI_UNSIGNED, MPI_COMM_WORLD, &buffer_size ) );
|
||||
|
||||
// m_mapLocGlobToken
|
||||
// process + m_proc2TokenMap[process].size()
|
||||
//
|
||||
if( m_mapLocGlobToken.size() > 0 )
|
||||
CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
|
||||
// get token translation table of process
|
||||
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator
|
||||
token_map_it = m_proc2TokenMap.find( process );
|
||||
|
||||
// m_proc2TokenMap[process]
|
||||
//
|
||||
if( token_map_it != m_proc2TokenMap.end() &&
|
||||
!token_map_it->second.empty() )
|
||||
{
|
||||
VT_MPI_INT size;
|
||||
|
||||
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator proc_it;
|
||||
for( proc_it = m_mapLocGlobToken.begin();
|
||||
proc_it != m_mapLocGlobToken.end(); proc_it++ )
|
||||
{
|
||||
// m_mapLocGlobToken[].first + m_mapLocGlobToken[].second.size()
|
||||
//
|
||||
CALL_MPI( MPI_Pack_size( 2, MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
|
||||
if( proc_it->second.size() > 0 )
|
||||
{
|
||||
// m_mapLocGlobToken[].second
|
||||
//
|
||||
CALL_MPI( MPI_Pack_size( (VT_MPI_INT)proc_it->second.size() * 2,
|
||||
MPI_UNSIGNED, MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
}
|
||||
}
|
||||
CALL_MPI( MPI_Pack_size( token_map_it->second.size() * 2, MPI_UNSIGNED,
|
||||
MPI_COMM_WORLD, &size ) );
|
||||
buffer_size += size;
|
||||
}
|
||||
|
||||
return buffer_size;
|
||||
@ -169,64 +160,43 @@ TokenFactoryScopeC<T>::getPackSize()
|
||||
|
||||
template <class T>
|
||||
void
|
||||
TokenFactoryScopeC<T>::pack( char *& buffer, const VT_MPI_INT & bufferSize,
|
||||
VT_MPI_INT & bufferPos )
|
||||
TokenFactoryScopeC<T>::pack( const uint32_t & process,
|
||||
char *& buffer, const VT_MPI_INT & bufferSize, VT_MPI_INT & bufferPos,
|
||||
const bool clear )
|
||||
{
|
||||
// m_mapLocGlobToken.size()
|
||||
// process
|
||||
CALL_MPI( MPI_Pack( const_cast<uint32_t*>( &process ), 1, MPI_UNSIGNED,
|
||||
buffer, bufferSize, &bufferPos, MPI_COMM_WORLD ) );
|
||||
|
||||
// get token translation table of process
|
||||
std::map<uint32_t, std::map<uint32_t, uint32_t> >::iterator token_map_it =
|
||||
m_proc2TokenMap.find( process );
|
||||
|
||||
// m_proc2TokenMap[process].size()
|
||||
//
|
||||
uint32_t proc_map_size = m_mapLocGlobToken.size();
|
||||
CALL_MPI( MPI_Pack( &proc_map_size, 1, MPI_UNSIGNED, buffer, bufferSize,
|
||||
uint32_t token_map_size =
|
||||
( token_map_it != m_proc2TokenMap.end() ) ?
|
||||
token_map_it->second.size() : 0;
|
||||
CALL_MPI( MPI_Pack( &token_map_size, 1, MPI_UNSIGNED, buffer, bufferSize,
|
||||
&bufferPos, MPI_COMM_WORLD ) );
|
||||
|
||||
// m_mapLocGlobToken
|
||||
// m_proc2TokenMap[process]
|
||||
//
|
||||
if( proc_map_size > 0 )
|
||||
if( token_map_it != m_proc2TokenMap.end() )
|
||||
{
|
||||
std::map<uint32_t, std::map<uint32_t, uint32_t> >::const_iterator proc_it;
|
||||
for( proc_it = m_mapLocGlobToken.begin();
|
||||
proc_it != m_mapLocGlobToken.end(); proc_it++ )
|
||||
for( std::map<uint32_t, uint32_t>::const_iterator token_pair_it =
|
||||
token_map_it->second.begin();
|
||||
token_pair_it != token_map_it->second.end(); token_pair_it++ )
|
||||
{
|
||||
// m_mapLocGlobToken[].first
|
||||
//
|
||||
uint32_t proc = proc_it->first;
|
||||
CALL_MPI( MPI_Pack( &proc, 1, MPI_UNSIGNED, buffer, bufferSize,
|
||||
&bufferPos, MPI_COMM_WORLD ) );
|
||||
|
||||
// m_mapLocGlobToken[].second.size()
|
||||
//
|
||||
uint32_t token_map_size = proc_it->second.size();
|
||||
CALL_MPI( MPI_Pack( &token_map_size, 1, MPI_UNSIGNED, buffer,
|
||||
uint32_t token_pair[2] =
|
||||
{ token_pair_it->first, token_pair_it->second };
|
||||
CALL_MPI( MPI_Pack( token_pair, 2, MPI_UNSIGNED, buffer,
|
||||
bufferSize, &bufferPos, MPI_COMM_WORLD ) );
|
||||
|
||||
// m_mapLocGlobToken[].second
|
||||
//
|
||||
if( token_map_size > 0 )
|
||||
{
|
||||
uint32_t * token_map_firsts = new uint32_t[token_map_size];
|
||||
uint32_t * token_map_seconds = new uint32_t[token_map_size];
|
||||
|
||||
std::map<uint32_t, uint32_t>::const_iterator tk_it;
|
||||
uint32_t i;
|
||||
|
||||
for( tk_it = proc_it->second.begin(), i = 0;
|
||||
tk_it != proc_it->second.end(), i < token_map_size;
|
||||
tk_it++, i++ )
|
||||
{
|
||||
token_map_firsts[i] = tk_it->first;
|
||||
token_map_seconds[i] = tk_it->second;
|
||||
}
|
||||
|
||||
CALL_MPI( MPI_Pack( token_map_firsts, (VT_MPI_INT)token_map_size,
|
||||
MPI_UNSIGNED, buffer, bufferSize, &bufferPos,
|
||||
MPI_COMM_WORLD ) );
|
||||
CALL_MPI( MPI_Pack( token_map_seconds, (VT_MPI_INT)token_map_size,
|
||||
MPI_UNSIGNED, buffer, bufferSize, &bufferPos,
|
||||
MPI_COMM_WORLD ) );
|
||||
|
||||
delete [] token_map_firsts;
|
||||
delete [] token_map_seconds;
|
||||
}
|
||||
}
|
||||
|
||||
// clear token translation table of certain process id
|
||||
if( clear )
|
||||
m_proc2TokenMap.erase( token_map_it );
|
||||
}
|
||||
}
|
||||
|
||||
@ -235,52 +205,30 @@ void
|
||||
TokenFactoryScopeC<T>::unpack( char *& buffer, const VT_MPI_INT & bufferSize,
|
||||
VT_MPI_INT & bufferPos )
|
||||
{
|
||||
// m_mapLocGlobToken.size()
|
||||
// process
|
||||
//
|
||||
uint32_t proc_map_size;
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &proc_map_size, 1,
|
||||
uint32_t process;
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &process, 1,
|
||||
MPI_UNSIGNED, MPI_COMM_WORLD ) );
|
||||
|
||||
// m_mapLocGlobToken
|
||||
// m_proc2TokenMap[process].size()
|
||||
//
|
||||
if( proc_map_size > 0 )
|
||||
uint32_t token_map_size;
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &token_map_size, 1,
|
||||
MPI_UNSIGNED, MPI_COMM_WORLD ) );
|
||||
|
||||
// m_proc2TokenMap[process]
|
||||
//
|
||||
if( token_map_size > 0 )
|
||||
{
|
||||
for( uint32_t i = 0; i < proc_map_size; i++ )
|
||||
for( uint32_t i = 0; i < token_map_size; i++ )
|
||||
{
|
||||
// m_mapLocGlobToken[].first
|
||||
//
|
||||
uint32_t proc;
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &proc, 1,
|
||||
uint32_t token_pair[2];
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, token_pair, 2,
|
||||
MPI_UNSIGNED, MPI_COMM_WORLD ) );
|
||||
|
||||
// m_mapLocGlobToken[].second.size()
|
||||
//
|
||||
uint32_t token_map_size;
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos, &token_map_size,
|
||||
1, MPI_UNSIGNED, MPI_COMM_WORLD ) );
|
||||
|
||||
// m_mapLocGlobToken[].second
|
||||
//
|
||||
if( token_map_size > 0 )
|
||||
{
|
||||
uint32_t * token_map_firsts = new uint32_t[token_map_size];
|
||||
uint32_t * token_map_seconds = new uint32_t[token_map_size];
|
||||
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos,
|
||||
token_map_firsts, (VT_MPI_INT)token_map_size,
|
||||
MPI_UNSIGNED, MPI_COMM_WORLD ) );
|
||||
CALL_MPI( MPI_Unpack( buffer, bufferSize, &bufferPos,
|
||||
token_map_seconds, (VT_MPI_INT)token_map_size,
|
||||
MPI_UNSIGNED, MPI_COMM_WORLD ) );
|
||||
|
||||
// set token translations for process
|
||||
//
|
||||
for( uint32_t j = 0; j < token_map_size; j++ )
|
||||
setTranslation( proc, token_map_firsts[j], token_map_seconds[j] );
|
||||
|
||||
delete [] token_map_firsts;
|
||||
delete [] token_map_seconds;
|
||||
}
|
||||
// set token translation
|
||||
m_proc2TokenMap[process][token_pair[0]] = token_pair[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1968,9 +1968,8 @@ VT_MPI_INT MPI_Waitall( VT_MPI_INT count,
|
||||
time = vt_pform_wtime();
|
||||
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_WAITALL]);
|
||||
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE) {
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE)
|
||||
array_of_statuses = vt_get_status_array(count);
|
||||
}
|
||||
vt_save_request_array(requests, count);
|
||||
|
||||
CALL_PMPI_3(MPI_Waitall, count, requests, array_of_statuses,
|
||||
@ -2062,9 +2061,8 @@ VT_MPI_INT MPI_Waitsome( VT_MPI_INT incount,
|
||||
time = vt_pform_wtime();
|
||||
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_WAITSOME]);
|
||||
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE) {
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE)
|
||||
array_of_statuses = vt_get_status_array(incount);
|
||||
}
|
||||
vt_save_request_array(array_of_requests, incount);
|
||||
|
||||
CALL_PMPI_5(MPI_Waitsome, incount, array_of_requests, outcount,
|
||||
@ -2202,9 +2200,8 @@ VT_MPI_INT MPI_Testall( VT_MPI_INT count,
|
||||
time = vt_pform_wtime();
|
||||
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_TESTALL]);
|
||||
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE) {
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE)
|
||||
array_of_statuses = vt_get_status_array(count);
|
||||
}
|
||||
vt_save_request_array(array_of_requests, count);
|
||||
|
||||
CALL_PMPI_4(MPI_Testall, count, array_of_requests, flag,
|
||||
@ -2256,9 +2253,8 @@ VT_MPI_INT MPI_Testsome( VT_MPI_INT incount,
|
||||
time = vt_pform_wtime();
|
||||
was_recorded = vt_enter(VT_CURRENT_THREAD, &time, vt_mpi_regid[VT__MPI_TESTSOME]);
|
||||
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE) {
|
||||
if (array_of_statuses == MPI_STATUSES_IGNORE)
|
||||
array_of_statuses = vt_get_status_array(incount);
|
||||
}
|
||||
vt_save_request_array(array_of_requests, incount);
|
||||
|
||||
CALL_PMPI_5(MPI_Testsome, incount, array_of_requests, outcount,
|
||||
@ -2911,12 +2907,15 @@ VT_MPI_INT MPI_Gather( void* sendbuf,
|
||||
|
||||
PMPI_Type_size(sendtype, &ssz);
|
||||
PMPI_Comm_rank(comm, &me);
|
||||
if ( me == root ) {
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Type_size(recvtype, &rsz);
|
||||
} else {
|
||||
N = rsz = 0;
|
||||
}
|
||||
if ( me == root )
|
||||
{
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Type_size(recvtype, &rsz);
|
||||
}
|
||||
else
|
||||
{
|
||||
N = rsz = 0;
|
||||
}
|
||||
|
||||
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
|
||||
vt_mpi_regid[VT__MPI_GATHER], matchid,
|
||||
@ -3035,10 +3034,15 @@ VT_MPI_INT MPI_Gatherv( void* sendbuf,
|
||||
{
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTTHRD_MY_VTTHRD);
|
||||
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Comm_rank(comm, &me);
|
||||
|
||||
recvcount = 0;
|
||||
for(i = 0; i<N; i++) recvcount += recvcounts[i];
|
||||
recvcount = recvsz = 0;
|
||||
if (me == root)
|
||||
{
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Type_size(recvtype, &recvsz);
|
||||
for(i = 0; i<N; i++) recvcount += recvcounts[i];
|
||||
}
|
||||
|
||||
#if defined(HAVE_DECL_MPI_IN_PLACE) && HAVE_DECL_MPI_IN_PLACE
|
||||
if (sendbuf == MPI_IN_PLACE)
|
||||
@ -3048,19 +3052,7 @@ VT_MPI_INT MPI_Gatherv( void* sendbuf,
|
||||
}
|
||||
#endif /* HAVE_DECL_MPI_IN_PLACE */
|
||||
|
||||
PMPI_Type_size(recvtype, &recvsz);
|
||||
PMPI_Type_size(sendtype, &sendsz);
|
||||
PMPI_Comm_rank(comm, &me);
|
||||
|
||||
recvsz = 0;
|
||||
if ( me == root )
|
||||
{
|
||||
PMPI_Type_size(recvtype, &recvsz);
|
||||
}
|
||||
else
|
||||
{
|
||||
recvcount = 0;
|
||||
}
|
||||
|
||||
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
|
||||
vt_mpi_regid[VT__MPI_GATHERV], matchid,
|
||||
@ -3441,12 +3433,15 @@ VT_MPI_INT MPI_Scatter( void* sendbuf,
|
||||
|
||||
PMPI_Type_size(recvtype, &recvsz);
|
||||
PMPI_Comm_rank(comm, &me);
|
||||
if ( me == root ) {
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Type_size(sendtype, &sendsz);
|
||||
} else {
|
||||
N = sendsz = 0;
|
||||
}
|
||||
if ( me == root )
|
||||
{
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Type_size(sendtype, &sendsz);
|
||||
}
|
||||
else
|
||||
{
|
||||
N = sendsz = 0;
|
||||
}
|
||||
|
||||
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
|
||||
vt_mpi_regid[VT__MPI_SCATTER], matchid,
|
||||
@ -3505,10 +3500,15 @@ VT_MPI_INT MPI_Scatterv( void* sendbuf,
|
||||
{
|
||||
matchid = VTTHRD_MPICOLLOP_NEXT_MATCHINGID(VTTHRD_MY_VTTHRD);
|
||||
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Comm_rank(comm, &me);
|
||||
|
||||
sendcount = 0;
|
||||
for(i = 0; i<N; i++) sendcount += sendcounts[i];
|
||||
sendcount = sendsz = 0;
|
||||
if (me == root)
|
||||
{
|
||||
PMPI_Comm_size(comm, &N);
|
||||
PMPI_Type_size(sendtype, &sendsz);
|
||||
for(i = 0; i<N; i++) sendcount += sendcounts[i];
|
||||
}
|
||||
|
||||
#if defined(HAVE_DECL_MPI_IN_PLACE) && HAVE_DECL_MPI_IN_PLACE
|
||||
if (recvbuf == MPI_IN_PLACE)
|
||||
@ -3518,17 +3518,7 @@ VT_MPI_INT MPI_Scatterv( void* sendbuf,
|
||||
}
|
||||
#endif /* HAVE_DECL_MPI_IN_PLACE */
|
||||
|
||||
sendsz = 0;
|
||||
PMPI_Type_size(recvtype, &recvsz);
|
||||
PMPI_Comm_rank(comm, &me);
|
||||
if ( me == root )
|
||||
{
|
||||
PMPI_Type_size(sendtype, &sendsz);
|
||||
}
|
||||
else
|
||||
{
|
||||
sendcount = 0;
|
||||
}
|
||||
|
||||
vt_mpi_collbegin(VT_CURRENT_THREAD, &time,
|
||||
vt_mpi_regid[VT__MPI_SCATTERV], matchid,
|
||||
|
@ -839,6 +839,7 @@ static void unify_traces(void)
|
||||
}
|
||||
argc++;
|
||||
|
||||
if ((vt_env_mode() & VT_MODE_TRACE) == 0) argv[argc++] = strdup("--stats");
|
||||
#if defined(HAVE_ZLIB) && HAVE_ZLIB
|
||||
if (!vt_env_compression()) argv[argc++] = strdup("--nocompress");
|
||||
#endif /* HAVE_ZLIB */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user