Small improvements on the test.
Rework the to_self test to be able to be used as a benchmark. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
75a53976a3
Коммит
0a24f0374e
@ -357,7 +357,7 @@ opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
|
||||
*/
|
||||
if( OPAL_LIKELY(0 == count) ) {
|
||||
pStack[1].type = pElems->elem.common.type;
|
||||
pStack[1].count = pElems->elem.count;
|
||||
pStack[1].count = pElems->elem.blocklen;
|
||||
} else {
|
||||
pStack[1].type = OPAL_DATATYPE_UINT1;
|
||||
pStack[1].count = pData->size - count;
|
||||
|
@ -167,15 +167,18 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
|
||||
if( ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
|
||||
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) &&
|
||||
(current->disp == (last.disp + (ptrdiff_t)last.count * last.extent)) &&
|
||||
((last.count == 1) || (current->count == 1) || (last.extent == current->extent)) ) {
|
||||
((current->count == 1) || (last.extent == current->extent)) ) {
|
||||
last.count += current->count;
|
||||
if( last.count == 1 ) {
|
||||
last.extent = current->extent;
|
||||
} /* otherwise keep the last.extent */
|
||||
/* find the lowest common denomitaor type */
|
||||
if( last.common.type != current->common.type ) {
|
||||
last.common.type = OPAL_DATATYPE_UINT1;
|
||||
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
|
||||
last.common.type = OPAL_DATATYPE_UINT1;
|
||||
}
|
||||
/* maximize the contiguous pieces */
|
||||
if( last.extent == (ptrdiff_t)(last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ) {
|
||||
last.blocklen *= last.count;
|
||||
last.count = 1;
|
||||
last.extent = last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size;
|
||||
}
|
||||
continue; /* next data */
|
||||
}
|
||||
|
@ -33,9 +33,6 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
uint32_t *iovec_count,
|
||||
int increment)
|
||||
{
|
||||
|
||||
|
||||
|
||||
opal_convertor_t *convertor;
|
||||
size_t remaining_length = 0;
|
||||
uint32_t i;
|
||||
@ -43,7 +40,6 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
struct iovec *temp_iov=NULL;
|
||||
size_t temp_data;
|
||||
|
||||
|
||||
convertor = opal_convertor_create( opal_local_arch, 0 );
|
||||
|
||||
if (OMPI_SUCCESS != opal_convertor_prepare_for_send (convertor,
|
||||
@ -55,9 +51,9 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
}
|
||||
|
||||
if ( 0 == datatype->super.size ) {
|
||||
*iovec_count = 0;
|
||||
*iov = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
*iovec_count = 0;
|
||||
*iov = NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
remaining_length = count * datatype->super.size;
|
||||
@ -69,10 +65,8 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
while (0 == opal_convertor_raw(convertor,
|
||||
temp_iov,
|
||||
&temp_count,
|
||||
&temp_data)) {
|
||||
while (0 == opal_convertor_raw(convertor, temp_iov,
|
||||
&temp_count, &temp_data)) {
|
||||
*iovec_count = *iovec_count + temp_count;
|
||||
*iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec));
|
||||
if (NULL == *iov) {
|
||||
@ -80,7 +74,7 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
free(temp_iov);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for (i=0 ; i<temp_count ; i++) {
|
||||
for (i = 0 ; i < temp_count ; i++) {
|
||||
(*iov)[i+(*iovec_count-temp_count)].iov_base = temp_iov[i].iov_base;
|
||||
(*iov)[i+(*iovec_count-temp_count)].iov_len = temp_iov[i].iov_len;
|
||||
}
|
||||
@ -90,12 +84,12 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
|
||||
}
|
||||
*iovec_count = *iovec_count + temp_count;
|
||||
if ( temp_count > 0 ) {
|
||||
*iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec));
|
||||
if (NULL == *iov) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
*iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec));
|
||||
if (NULL == *iov) {
|
||||
opal_output(1, "OUT OF MEMORY\n");
|
||||
free(temp_iov);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
}
|
||||
for (i=0 ; i<temp_count ; i++) {
|
||||
(*iov)[i+(*iovec_count-temp_count)].iov_base = temp_iov[i].iov_base;
|
||||
@ -342,7 +336,6 @@ int main (int argc, char *argv[]) {
|
||||
struct iovec * iov_1 = NULL;
|
||||
mca_common_ompio_decode_datatype ( datatype, 1, &iov_1, &iovec_count_1, 1);
|
||||
|
||||
|
||||
assert(iovec_count_300 == iovec_count_10);
|
||||
assert(iovec_count_300 == iovec_count_1);
|
||||
// assert(iov[100].iov_base == iov2[100].iov_base);
|
||||
|
@ -159,8 +159,7 @@ static int local_copy_ddt_count( opal_datatype_t const * const pdt, int count )
|
||||
osrc = (char*)malloc( malloced_size );
|
||||
|
||||
{
|
||||
for( size_t i = 0; i < malloced_size; i++ )
|
||||
osrc[i] = i % 128 + 32;
|
||||
for( size_t i = 0; i < malloced_size; i++ ) osrc[i] = i % 128 + 32;
|
||||
memcpy(odst, osrc, malloced_size);
|
||||
}
|
||||
pdst = odst - lb;
|
||||
|
@ -445,7 +445,7 @@ static int32_t opal_datatype_create_vector( int count, int bLength, int stride,
|
||||
}
|
||||
|
||||
pData = opal_datatype_create( oldType->desc.used + 2 );
|
||||
if( (bLength == stride) || (1 >= count) ) { /* the elements are contiguous */
|
||||
if( (bLength == stride) || (1 == count) ) { /* the elements are contiguous */
|
||||
opal_datatype_add( pData, oldType, count * bLength, 0, extent );
|
||||
} else {
|
||||
if( 1 == bLength ) {
|
||||
@ -476,7 +476,7 @@ static int32_t opal_datatype_create_hvector( int count, int bLength, ptrdiff_t s
|
||||
}
|
||||
|
||||
pTempData = opal_datatype_create( oldType->desc.used + 2 );
|
||||
if( ((extent * bLength) == stride) || (1 >= count) ) { /* contiguous */
|
||||
if( ((extent * bLength) == stride) || (1 == count) ) { /* contiguous */
|
||||
pData = pTempData;
|
||||
opal_datatype_add( pData, oldType, count * bLength, 0, extent );
|
||||
} else {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -15,8 +15,9 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
|
||||
#if OPEN_MPI && 0
|
||||
#if 0 && OPEN_MPI
|
||||
extern void ompi_datatype_dump( MPI_Datatype ddt );
|
||||
#define MPI_DDT_DUMP(ddt) ompi_datatype_dump( (ddt) )
|
||||
#else
|
||||
@ -178,23 +179,145 @@ create_indexed_gap_optimized_ddt( void )
|
||||
return dt3;
|
||||
}
|
||||
|
||||
static void print_result( int length, int cycles, double time )
|
||||
{
|
||||
double bandwidth, clock_prec;
|
||||
|
||||
/********************************************************************
|
||||
*******************************************************************/
|
||||
|
||||
#define DO_CONTIG 0x00000001
|
||||
#define DO_CONSTANT_GAP 0x00000002
|
||||
#define DO_INDEXED_GAP 0x00000004
|
||||
#define DO_OPTIMIZED_INDEXED_GAP 0x00000008
|
||||
#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x00000010
|
||||
|
||||
#define DO_PACK 0x01000000
|
||||
#define DO_UNPACK 0x02000000
|
||||
#define DO_ISEND_RECV 0x04000000
|
||||
#define DO_ISEND_IRECV 0x08000000
|
||||
#define DO_IRECV_SEND 0x10000000
|
||||
#define DO_IRECV_ISEND 0x20000000
|
||||
|
||||
#define MIN_LENGTH 1024
|
||||
#define MAX_LENGTH (1024*1024)
|
||||
|
||||
static int cycles = 100;
|
||||
static int trials = 20;
|
||||
static int warmups = 2;
|
||||
|
||||
static void print_result( int length, int trials, double* timers )
|
||||
{
|
||||
double bandwidth, clock_prec, temp;
|
||||
double min_time, max_time, average, std_dev = 0.0;
|
||||
double ordered[trials];
|
||||
int t, pos, quartile_start, quartile_end;
|
||||
|
||||
for( t = 0; t < trials; ordered[t] = timers[t], t++ );
|
||||
for( t = 0; t < trials-1; t++ ) {
|
||||
temp = ordered[t];
|
||||
pos = t;
|
||||
for( int i = t+1; i < trials; i++ ) {
|
||||
if( temp > ordered[i] ) {
|
||||
temp = ordered[i];
|
||||
pos = i;
|
||||
}
|
||||
}
|
||||
if( pos != t ) {
|
||||
temp = ordered[t];
|
||||
ordered[t] = ordered[pos];
|
||||
ordered[pos] = temp;
|
||||
}
|
||||
}
|
||||
quartile_start = trials - (3 * trials) / 4;
|
||||
quartile_end = trials - (1 * trials) / 4;
|
||||
clock_prec = MPI_Wtick();
|
||||
bandwidth = (length * clock_prec * cycles) / (1024.0 * 1024.0) / (time * clock_prec);
|
||||
printf( "%8d\t%.6f\t%.4f MB/s\n", length, time / cycles, bandwidth );
|
||||
min_time = ordered[quartile_start];
|
||||
max_time = ordered[quartile_start];
|
||||
average = ordered[quartile_start];
|
||||
for( t = quartile_start + 1; t < quartile_end; t++ ) {
|
||||
if( min_time > ordered[t] ) min_time = ordered[t];
|
||||
if( max_time < ordered[t] ) max_time = ordered[t];
|
||||
average += ordered[t];
|
||||
}
|
||||
average /= (quartile_end - quartile_start);
|
||||
for( t = quartile_start; t < quartile_end; t++ ) {
|
||||
std_dev += (ordered[t] - average) * (ordered[t] - average);
|
||||
}
|
||||
std_dev = sqrt( std_dev/(quartile_end - quartile_start) );
|
||||
|
||||
bandwidth = (length * clock_prec) / (1024.0 * 1024.0) / (average * clock_prec);
|
||||
printf( "%8d\t%15g\t%10.4f MB/s [min %10g max %10g std %2.2f%%]\n", length, average, bandwidth,
|
||||
min_time, max_time, (100.0 * std_dev) / average );
|
||||
}
|
||||
|
||||
static int pack( int cycles,
|
||||
MPI_Datatype sdt, int scount, void* sbuf,
|
||||
void* packed_buf )
|
||||
{
|
||||
int position, myself, c, t, outsize;
|
||||
double timers[trials];
|
||||
|
||||
MPI_Type_size( sdt, &outsize );
|
||||
outsize *= scount;
|
||||
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
|
||||
|
||||
for( t = 0; t < warmups; t++ ) {
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
position = 0;
|
||||
MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD);
|
||||
}
|
||||
}
|
||||
|
||||
for( t = 0; t < trials; t++ ) {
|
||||
timers[t] = MPI_Wtime();
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
position = 0;
|
||||
MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD);
|
||||
}
|
||||
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
|
||||
}
|
||||
print_result( outsize, trials, timers );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int unpack( int cycles,
|
||||
void* packed_buf,
|
||||
MPI_Datatype rdt, int rcount, void* rbuf )
|
||||
{
|
||||
int position, myself, c, t, insize;
|
||||
double timers[trials];
|
||||
|
||||
MPI_Type_size( rdt, &insize );
|
||||
insize *= rcount;
|
||||
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
|
||||
|
||||
for( t = 0; t < warmups; t++ ) {
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
position = 0;
|
||||
MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD);
|
||||
}
|
||||
}
|
||||
|
||||
for( t = 0; t < trials; t++ ) {
|
||||
timers[t] = MPI_Wtime();
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
position = 0;
|
||||
MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD);
|
||||
}
|
||||
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
|
||||
}
|
||||
print_result( insize, trials, timers );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int isend_recv( int cycles,
|
||||
MPI_Datatype sdt, int scount, void* sbuf,
|
||||
MPI_Datatype rdt, int rcount, void* rbuf )
|
||||
{
|
||||
int myself, tag = 0, i, slength, rlength;
|
||||
int myself, tag = 0, c, t, slength, rlength;
|
||||
MPI_Status status;
|
||||
MPI_Request req;
|
||||
double tstart, tend;
|
||||
double timers[trials];
|
||||
|
||||
MPI_Type_size( sdt, &slength );
|
||||
slength *= scount;
|
||||
@ -203,21 +326,16 @@ static int isend_recv( int cycles,
|
||||
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
|
||||
|
||||
tstart = MPI_Wtime();
|
||||
for( i = 0; i < cycles; i++ ) {
|
||||
#ifndef FAST
|
||||
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
|
||||
MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
|
||||
MPI_Wait( &req, &status );
|
||||
/*MPI_Request_free( &req );*/
|
||||
#else
|
||||
ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
|
||||
ftmpi_mpi_recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
|
||||
ftmpi_request_free( &req );
|
||||
#endif
|
||||
for( t = 0; t < trials; t++ ) {
|
||||
timers[t] = MPI_Wtime();
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
|
||||
MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
|
||||
MPI_Wait( &req, &status );
|
||||
}
|
||||
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
|
||||
}
|
||||
tend = MPI_Wtime();
|
||||
print_result( rlength, cycles, tend - tstart );
|
||||
print_result( rlength, trials, timers );
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -225,10 +343,10 @@ static int irecv_send( int cycles,
|
||||
MPI_Datatype sdt, int scount, void* sbuf,
|
||||
MPI_Datatype rdt, int rcount, void* rbuf )
|
||||
{
|
||||
int myself, tag = 0, i, slength, rlength;
|
||||
int myself, tag = 0, c, t, slength, rlength;
|
||||
MPI_Request req;
|
||||
MPI_Status status;
|
||||
double tstart, tend;
|
||||
double timers[trials];
|
||||
|
||||
MPI_Type_size( sdt, &slength );
|
||||
slength *= scount;
|
||||
@ -237,21 +355,16 @@ static int irecv_send( int cycles,
|
||||
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
|
||||
|
||||
tstart = MPI_Wtime();
|
||||
for( i = 0; i < cycles; i++ ) {
|
||||
#ifndef FAST
|
||||
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
|
||||
MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
|
||||
MPI_Wait( &req, &status );
|
||||
/*MPI_Request_free( &req );*/
|
||||
#else
|
||||
ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
|
||||
ftmpi_mpi_send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
|
||||
ftmpi_request_free( &req );
|
||||
#endif
|
||||
for( t = 0; t < trials; t++ ) {
|
||||
timers[t] = MPI_Wtime();
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
|
||||
MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
|
||||
MPI_Wait( &req, &status );
|
||||
}
|
||||
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
|
||||
}
|
||||
tend = MPI_Wtime();
|
||||
print_result( rlength, cycles, tend - tstart );
|
||||
print_result( rlength, trials, timers );
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -259,10 +372,10 @@ static int isend_irecv_wait( int cycles,
|
||||
MPI_Datatype sdt, int scount, void* sbuf,
|
||||
MPI_Datatype rdt, int rcount, void* rbuf )
|
||||
{
|
||||
int myself, tag = 0, i, slength, rlength;
|
||||
MPI_Request sreq, rreq;
|
||||
MPI_Status status;
|
||||
double tstart, tend;
|
||||
int myself, tag = 0, c, t, slength, rlength;
|
||||
MPI_Request requests[2];
|
||||
MPI_Status statuses[2];
|
||||
double timers[trials];
|
||||
|
||||
MPI_Type_size( sdt, &slength );
|
||||
slength *= scount;
|
||||
@ -271,25 +384,16 @@ static int isend_irecv_wait( int cycles,
|
||||
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
|
||||
|
||||
tstart = MPI_Wtime();
|
||||
for( i = 0; i < cycles; i++ ) {
|
||||
#ifndef FAST
|
||||
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
|
||||
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
|
||||
MPI_Wait( &sreq, &status );
|
||||
MPI_Wait( &rreq, &status );
|
||||
/*MPI_Request_free( &sreq );*/
|
||||
/*MPI_Request_free( &rreq );*/
|
||||
#else
|
||||
ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
|
||||
ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
|
||||
ftmpi_wait( &sreq, &status );
|
||||
ftmpi_request_free( &sreq );
|
||||
ftmpi_request_free( &rreq );
|
||||
#endif
|
||||
for( t = 0; t < trials; t++ ) {
|
||||
timers[t] = MPI_Wtime();
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[0] );
|
||||
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[1] );
|
||||
MPI_Waitall( 2, requests, statuses );
|
||||
}
|
||||
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
|
||||
}
|
||||
tend = MPI_Wtime();
|
||||
print_result( rlength, cycles, tend - tstart );
|
||||
print_result( rlength, trials, timers );
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -297,10 +401,10 @@ static int irecv_isend_wait( int cycles,
|
||||
MPI_Datatype sdt, int scount, void* sbuf,
|
||||
MPI_Datatype rdt, int rcount, void* rbuf )
|
||||
{
|
||||
int myself, tag = 0, i, slength, rlength;
|
||||
MPI_Request sreq, rreq;
|
||||
MPI_Status status;
|
||||
double tstart, tend;
|
||||
int myself, tag = 0, c, t, slength, rlength;
|
||||
MPI_Request requests[2];
|
||||
MPI_Status statuses[2];
|
||||
double timers[trials];
|
||||
|
||||
MPI_Type_size( sdt, &slength );
|
||||
slength *= scount;
|
||||
@ -309,74 +413,82 @@ static int irecv_isend_wait( int cycles,
|
||||
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
|
||||
|
||||
tstart = MPI_Wtime();
|
||||
for( i = 0; i < cycles; i++ ) {
|
||||
#ifndef FAST
|
||||
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
|
||||
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
|
||||
MPI_Wait( &sreq, &status );
|
||||
MPI_Wait( &rreq, &status );
|
||||
/*MPI_Request_free( &sreq );*/
|
||||
/*MPI_Request_free( &rreq );*/
|
||||
#else
|
||||
ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
|
||||
ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
|
||||
ftmpi_wait( &sreq, &status );
|
||||
ftmpi_request_free( &sreq );
|
||||
ftmpi_request_free( &rreq );
|
||||
#endif
|
||||
for( t = 0; t < trials; t++ ) {
|
||||
timers[t] = MPI_Wtime();
|
||||
for( c = 0; c < cycles; c++ ) {
|
||||
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[0] );
|
||||
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[1] );
|
||||
MPI_Waitall( 2, requests, statuses );
|
||||
}
|
||||
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
|
||||
}
|
||||
tend = MPI_Wtime();
|
||||
print_result( rlength, cycles, tend - tstart );
|
||||
print_result( rlength, trials, timers);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_test_for_ddt( MPI_Datatype sddt, MPI_Datatype rddt, int length )
|
||||
static int do_test_for_ddt( int doop, MPI_Datatype sddt, MPI_Datatype rddt, int length )
|
||||
{
|
||||
int i;
|
||||
MPI_Aint lb, extent;
|
||||
char *sbuf, *rbuf;
|
||||
int i;
|
||||
|
||||
MPI_Type_get_extent( sddt, &lb, &extent );
|
||||
sbuf = (char*)malloc( length );
|
||||
rbuf = (char*)malloc( length );
|
||||
printf( "# Isend recv (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
isend_recv( 10, sddt, i, sbuf, rddt, i, rbuf );
|
||||
if( doop & DO_PACK ) {
|
||||
printf("# Pack (max length %d)\n", length);
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
pack( cycles, sddt, i, sbuf, rbuf );
|
||||
}
|
||||
}
|
||||
printf( "# Isend Irecv Wait (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
isend_irecv_wait( 10, sddt, i, sbuf, rddt, i, rbuf );
|
||||
|
||||
if( doop & DO_UNPACK ) {
|
||||
printf("# Unpack (length %d)\n", length);
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
unpack( cycles, sbuf, rddt, i, rbuf );
|
||||
}
|
||||
}
|
||||
printf( "# Irecv send (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
irecv_send( 10, sddt, i, sbuf, rddt, i, rbuf );
|
||||
|
||||
if( doop & DO_ISEND_RECV ) {
|
||||
printf( "# Isend recv (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
isend_recv( cycles, sddt, i, sbuf, rddt, i, rbuf );
|
||||
}
|
||||
}
|
||||
printf( "# Irecv Isend Wait (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
irecv_isend_wait( 10, sddt, i, sbuf, rddt, i, rbuf );
|
||||
|
||||
if( doop & DO_ISEND_IRECV ) {
|
||||
printf( "# Isend Irecv Wait (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
isend_irecv_wait( cycles, sddt, i, sbuf, rddt, i, rbuf );
|
||||
}
|
||||
}
|
||||
|
||||
if( doop & DO_IRECV_SEND ) {
|
||||
printf( "# Irecv send (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
irecv_send( cycles, sddt, i, sbuf, rddt, i, rbuf );
|
||||
}
|
||||
}
|
||||
|
||||
if( doop & DO_IRECV_SEND ) {
|
||||
printf( "# Irecv Isend Wait (length %d)\n", length );
|
||||
for( i = 1; i <= (length/extent); i *= 2 ) {
|
||||
irecv_isend_wait( cycles, sddt, i, sbuf, rddt, i, rbuf );
|
||||
}
|
||||
}
|
||||
free( sbuf );
|
||||
free( rbuf );
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define DO_CONTIG 0x01
|
||||
#define DO_CONSTANT_GAP 0x02
|
||||
#define DO_INDEXED_GAP 0x04
|
||||
#define DO_OPTIMIZED_INDEXED_GAP 0x08
|
||||
#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x10
|
||||
|
||||
#define MIN_LENGTH 1024
|
||||
#define MAX_LENGTH (1024*1024)
|
||||
|
||||
int main( int argc, char* argv[] )
|
||||
{
|
||||
int run_tests = 0xffffffff; /* do all tests by default */
|
||||
int length, rank, size;
|
||||
int run_tests = 0xffff; /* do all datatype tests by default */
|
||||
int rank, size;
|
||||
MPI_Datatype ddt;
|
||||
/*int run_tests = DO_CONSTANT_GAP;*/
|
||||
|
||||
run_tests |= DO_PACK | DO_UNPACK;
|
||||
|
||||
MPI_Init (&argc, &argv);
|
||||
|
||||
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
|
||||
@ -389,16 +501,14 @@ int main( int argc, char* argv[] )
|
||||
|
||||
if( run_tests & DO_CONTIG ) {
|
||||
printf( "\ncontiguous datatype\n\n" );
|
||||
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
|
||||
do_test_for_ddt( MPI_INT, MPI_INT, length );
|
||||
do_test_for_ddt( run_tests, MPI_INT, MPI_INT, MAX_LENGTH );
|
||||
}
|
||||
|
||||
if( run_tests & DO_INDEXED_GAP ) {
|
||||
printf( "\nindexed gap\n\n" );
|
||||
ddt = create_indexed_gap_ddt();
|
||||
MPI_DDT_DUMP( ddt );
|
||||
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
|
||||
do_test_for_ddt( ddt, ddt, length );
|
||||
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
|
||||
MPI_Type_free( &ddt );
|
||||
}
|
||||
|
||||
@ -406,8 +516,7 @@ int main( int argc, char* argv[] )
|
||||
printf( "\noptimized indexed gap\n\n" );
|
||||
ddt = create_indexed_gap_optimized_ddt();
|
||||
MPI_DDT_DUMP( ddt );
|
||||
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
|
||||
do_test_for_ddt( ddt, ddt, length );
|
||||
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
|
||||
MPI_Type_free( &ddt );
|
||||
}
|
||||
|
||||
@ -415,8 +524,7 @@ int main( int argc, char* argv[] )
|
||||
printf( "\nconstant indexed gap\n\n" );
|
||||
ddt = create_indexed_constant_gap_ddt( 80, 100, 1 );
|
||||
MPI_DDT_DUMP( ddt );
|
||||
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
|
||||
do_test_for_ddt( ddt, ddt, length );
|
||||
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
|
||||
MPI_Type_free( &ddt );
|
||||
}
|
||||
|
||||
@ -424,8 +532,7 @@ int main( int argc, char* argv[] )
|
||||
printf( "\noptimized constant indexed gap\n\n" );
|
||||
ddt = create_optimized_indexed_constant_gap_ddt( 80, 100, 1 );
|
||||
MPI_DDT_DUMP( ddt );
|
||||
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
|
||||
do_test_for_ddt( ddt, ddt, length );
|
||||
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
|
||||
MPI_Type_free( &ddt );
|
||||
}
|
||||
|
||||
@ -433,8 +540,7 @@ int main( int argc, char* argv[] )
|
||||
printf( "\nstruct constant gap resized\n\n" );
|
||||
ddt = create_struct_constant_gap_resized_ddt( 0 /* unused */, 0 /* unused */, 0 /* unused */ );
|
||||
MPI_DDT_DUMP( ddt );
|
||||
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
|
||||
do_test_for_ddt( ddt, ddt, length );
|
||||
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
|
||||
MPI_Type_free( &ddt );
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2014-2019 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -18,7 +18,6 @@
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/datatype/opal_datatype_internal.h"
|
||||
// #include <mpi.h>
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
@ -61,6 +60,18 @@ static void print_bar_pbar(struct foo_t* bar, struct pfoo_t* pbar)
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
static void print_stack(opal_convertor_t* conv)
|
||||
{
|
||||
printf("Stack pos %d [converted %" PRIsize_t "/%" PRIsize_t "]\n",
|
||||
conv->stack_pos, conv->bConverted, conv->local_size);
|
||||
for( uint32_t i = 0; i <= conv->stack_pos; i++ ) {
|
||||
printf( "[%u] index %d, type %s count %" PRIsize_t " disp %p\n",
|
||||
i, conv->pStack[i].index, opal_datatype_basicDatatypes[conv->pStack[i].type]->name,
|
||||
conv->pStack[i].count, (void*)conv->pStack[i].disp);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) {
|
||||
int i, j, errors = 0;
|
||||
struct iovec a;
|
||||
@ -104,6 +115,7 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) {
|
||||
max_data = a.iov_len;
|
||||
pos = arr[i][1];
|
||||
opal_convertor_set_position(pConv, &pos);
|
||||
print_stack(pConv);
|
||||
assert(arr[i][1] == pos);
|
||||
opal_convertor_unpack( pConv, &a, &iov_count, &max_data );
|
||||
a.iov_base = (char*)a.iov_base - 1024;
|
||||
@ -118,9 +130,10 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) {
|
||||
bar[j].d[1] != 0.0 ||
|
||||
bar[j].d[2] != pbar[j].d[1]) {
|
||||
if(0 == errors) {
|
||||
fprintf(stderr, "ERROR ! count=%d, position=%d, ptr = %p"
|
||||
(void)opal_datatype_dump(&newtype->super);
|
||||
fprintf(stderr, "ERROR ! position=%d/%d, ptr = %p"
|
||||
" got (%d,%d,%d,%g,%g,%g) expected (%d,%d,%d,%g,%g,%g)\n",
|
||||
N, j, (void*)&bar[j],
|
||||
j, N, (void*)&bar[j],
|
||||
bar[j].i[0],
|
||||
bar[j].i[1],
|
||||
bar[j].i[2],
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user