1
1

Small improvements on the test.

Rework the to_self test to be able to be used as a benchmark.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
George Bosilca 2019-05-18 19:31:24 -04:00
родитель 75a53976a3
Коммит 0a24f0374e
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 09C926752C9F09B1
7 изменённых файлов: 267 добавлений и 153 удалений

Просмотреть файл

@ -357,7 +357,7 @@ opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
*/
if( OPAL_LIKELY(0 == count) ) {
pStack[1].type = pElems->elem.common.type;
pStack[1].count = pElems->elem.count;
pStack[1].count = pElems->elem.blocklen;
} else {
pStack[1].type = OPAL_DATATYPE_UINT1;
pStack[1].count = pData->size - count;

Просмотреть файл

@ -167,15 +167,18 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
if( ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) &&
(current->disp == (last.disp + (ptrdiff_t)last.count * last.extent)) &&
((last.count == 1) || (current->count == 1) || (last.extent == current->extent)) ) {
((current->count == 1) || (last.extent == current->extent)) ) {
last.count += current->count;
if( last.count == 1 ) {
last.extent = current->extent;
} /* otherwise keep the last.extent */
/* find the lowest common denomitaor type */
if( last.common.type != current->common.type ) {
last.common.type = OPAL_DATATYPE_UINT1;
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
last.common.type = OPAL_DATATYPE_UINT1;
}
/* maximize the contiguous pieces */
if( last.extent == (ptrdiff_t)(last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ) {
last.blocklen *= last.count;
last.count = 1;
last.extent = last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size;
}
continue; /* next data */
}

Просмотреть файл

@ -33,9 +33,6 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
uint32_t *iovec_count,
int increment)
{
opal_convertor_t *convertor;
size_t remaining_length = 0;
uint32_t i;
@ -43,7 +40,6 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
struct iovec *temp_iov=NULL;
size_t temp_data;
convertor = opal_convertor_create( opal_local_arch, 0 );
if (OMPI_SUCCESS != opal_convertor_prepare_for_send (convertor,
@ -55,9 +51,9 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
}
if ( 0 == datatype->super.size ) {
*iovec_count = 0;
*iov = NULL;
return OMPI_SUCCESS;
*iovec_count = 0;
*iov = NULL;
return OMPI_SUCCESS;
}
remaining_length = count * datatype->super.size;
@ -69,10 +65,8 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
return OMPI_ERR_OUT_OF_RESOURCE;
}
while (0 == opal_convertor_raw(convertor,
temp_iov,
&temp_count,
&temp_data)) {
while (0 == opal_convertor_raw(convertor, temp_iov,
&temp_count, &temp_data)) {
*iovec_count = *iovec_count + temp_count;
*iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec));
if (NULL == *iov) {
@ -80,7 +74,7 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
free(temp_iov);
return OMPI_ERR_OUT_OF_RESOURCE;
}
for (i=0 ; i<temp_count ; i++) {
for (i = 0 ; i < temp_count ; i++) {
(*iov)[i+(*iovec_count-temp_count)].iov_base = temp_iov[i].iov_base;
(*iov)[i+(*iovec_count-temp_count)].iov_len = temp_iov[i].iov_len;
}
@ -90,12 +84,12 @@ mca_common_ompio_decode_datatype ( ompi_datatype_t *datatype,
}
*iovec_count = *iovec_count + temp_count;
if ( temp_count > 0 ) {
*iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec));
if (NULL == *iov) {
opal_output(1, "OUT OF MEMORY\n");
*iov = (struct iovec *) realloc (*iov, *iovec_count * sizeof(struct iovec));
if (NULL == *iov) {
opal_output(1, "OUT OF MEMORY\n");
free(temp_iov);
return OMPI_ERR_OUT_OF_RESOURCE;
}
return OMPI_ERR_OUT_OF_RESOURCE;
}
}
for (i=0 ; i<temp_count ; i++) {
(*iov)[i+(*iovec_count-temp_count)].iov_base = temp_iov[i].iov_base;
@ -342,7 +336,6 @@ int main (int argc, char *argv[]) {
struct iovec * iov_1 = NULL;
mca_common_ompio_decode_datatype ( datatype, 1, &iov_1, &iovec_count_1, 1);
assert(iovec_count_300 == iovec_count_10);
assert(iovec_count_300 == iovec_count_1);
// assert(iov[100].iov_base == iov2[100].iov_base);

Просмотреть файл

@ -159,8 +159,7 @@ static int local_copy_ddt_count( opal_datatype_t const * const pdt, int count )
osrc = (char*)malloc( malloced_size );
{
for( size_t i = 0; i < malloced_size; i++ )
osrc[i] = i % 128 + 32;
for( size_t i = 0; i < malloced_size; i++ ) osrc[i] = i % 128 + 32;
memcpy(odst, osrc, malloced_size);
}
pdst = odst - lb;

Просмотреть файл

@ -445,7 +445,7 @@ static int32_t opal_datatype_create_vector( int count, int bLength, int stride,
}
pData = opal_datatype_create( oldType->desc.used + 2 );
if( (bLength == stride) || (1 >= count) ) { /* the elements are contiguous */
if( (bLength == stride) || (1 == count) ) { /* the elements are contiguous */
opal_datatype_add( pData, oldType, count * bLength, 0, extent );
} else {
if( 1 == bLength ) {
@ -476,7 +476,7 @@ static int32_t opal_datatype_create_hvector( int count, int bLength, ptrdiff_t s
}
pTempData = opal_datatype_create( oldType->desc.used + 2 );
if( ((extent * bLength) == stride) || (1 >= count) ) { /* contiguous */
if( ((extent * bLength) == stride) || (1 == count) ) { /* contiguous */
pData = pTempData;
opal_datatype_add( pData, oldType, count * bLength, 0, extent );
} else {

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2014 The University of Tennessee and The University
* Copyright (c) 2004-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
@ -15,8 +15,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#if OPEN_MPI && 0
#if 0 && OPEN_MPI
extern void ompi_datatype_dump( MPI_Datatype ddt );
#define MPI_DDT_DUMP(ddt) ompi_datatype_dump( (ddt) )
#else
@ -178,23 +179,145 @@ create_indexed_gap_optimized_ddt( void )
return dt3;
}
static void print_result( int length, int cycles, double time )
{
double bandwidth, clock_prec;
/********************************************************************
*******************************************************************/
#define DO_CONTIG 0x00000001
#define DO_CONSTANT_GAP 0x00000002
#define DO_INDEXED_GAP 0x00000004
#define DO_OPTIMIZED_INDEXED_GAP 0x00000008
#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x00000010
#define DO_PACK 0x01000000
#define DO_UNPACK 0x02000000
#define DO_ISEND_RECV 0x04000000
#define DO_ISEND_IRECV 0x08000000
#define DO_IRECV_SEND 0x10000000
#define DO_IRECV_ISEND 0x20000000
#define MIN_LENGTH 1024
#define MAX_LENGTH (1024*1024)
static int cycles = 100;
static int trials = 20;
static int warmups = 2;
static void print_result( int length, int trials, double* timers )
{
double bandwidth, clock_prec, temp;
double min_time, max_time, average, std_dev = 0.0;
double ordered[trials];
int t, pos, quartile_start, quartile_end;
for( t = 0; t < trials; ordered[t] = timers[t], t++ );
for( t = 0; t < trials-1; t++ ) {
temp = ordered[t];
pos = t;
for( int i = t+1; i < trials; i++ ) {
if( temp > ordered[i] ) {
temp = ordered[i];
pos = i;
}
}
if( pos != t ) {
temp = ordered[t];
ordered[t] = ordered[pos];
ordered[pos] = temp;
}
}
quartile_start = trials - (3 * trials) / 4;
quartile_end = trials - (1 * trials) / 4;
clock_prec = MPI_Wtick();
bandwidth = (length * clock_prec * cycles) / (1024.0 * 1024.0) / (time * clock_prec);
printf( "%8d\t%.6f\t%.4f MB/s\n", length, time / cycles, bandwidth );
min_time = ordered[quartile_start];
max_time = ordered[quartile_start];
average = ordered[quartile_start];
for( t = quartile_start + 1; t < quartile_end; t++ ) {
if( min_time > ordered[t] ) min_time = ordered[t];
if( max_time < ordered[t] ) max_time = ordered[t];
average += ordered[t];
}
average /= (quartile_end - quartile_start);
for( t = quartile_start; t < quartile_end; t++ ) {
std_dev += (ordered[t] - average) * (ordered[t] - average);
}
std_dev = sqrt( std_dev/(quartile_end - quartile_start) );
bandwidth = (length * clock_prec) / (1024.0 * 1024.0) / (average * clock_prec);
printf( "%8d\t%15g\t%10.4f MB/s [min %10g max %10g std %2.2f%%]\n", length, average, bandwidth,
min_time, max_time, (100.0 * std_dev) / average );
}
static int pack( int cycles,
MPI_Datatype sdt, int scount, void* sbuf,
void* packed_buf )
{
int position, myself, c, t, outsize;
double timers[trials];
MPI_Type_size( sdt, &outsize );
outsize *= scount;
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
for( t = 0; t < warmups; t++ ) {
for( c = 0; c < cycles; c++ ) {
position = 0;
MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD);
}
}
for( t = 0; t < trials; t++ ) {
timers[t] = MPI_Wtime();
for( c = 0; c < cycles; c++ ) {
position = 0;
MPI_Pack(sbuf, scount, sdt, packed_buf, outsize, &position, MPI_COMM_WORLD);
}
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
}
print_result( outsize, trials, timers );
return 0;
}
static int unpack( int cycles,
void* packed_buf,
MPI_Datatype rdt, int rcount, void* rbuf )
{
int position, myself, c, t, insize;
double timers[trials];
MPI_Type_size( rdt, &insize );
insize *= rcount;
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
for( t = 0; t < warmups; t++ ) {
for( c = 0; c < cycles; c++ ) {
position = 0;
MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD);
}
}
for( t = 0; t < trials; t++ ) {
timers[t] = MPI_Wtime();
for( c = 0; c < cycles; c++ ) {
position = 0;
MPI_Unpack(packed_buf, insize, &position, rbuf, rcount, rdt, MPI_COMM_WORLD);
}
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
}
print_result( insize, trials, timers );
return 0;
}
static int isend_recv( int cycles,
MPI_Datatype sdt, int scount, void* sbuf,
MPI_Datatype rdt, int rcount, void* rbuf )
{
int myself, tag = 0, i, slength, rlength;
int myself, tag = 0, c, t, slength, rlength;
MPI_Status status;
MPI_Request req;
double tstart, tend;
double timers[trials];
MPI_Type_size( sdt, &slength );
slength *= scount;
@ -203,21 +326,16 @@ static int isend_recv( int cycles,
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
tstart = MPI_Wtime();
for( i = 0; i < cycles; i++ ) {
#ifndef FAST
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
MPI_Wait( &req, &status );
/*MPI_Request_free( &req );*/
#else
ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
ftmpi_mpi_recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
ftmpi_request_free( &req );
#endif
for( t = 0; t < trials; t++ ) {
timers[t] = MPI_Wtime();
for( c = 0; c < cycles; c++ ) {
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
MPI_Wait( &req, &status );
}
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
}
tend = MPI_Wtime();
print_result( rlength, cycles, tend - tstart );
print_result( rlength, trials, timers );
return 0;
}
@ -225,10 +343,10 @@ static int irecv_send( int cycles,
MPI_Datatype sdt, int scount, void* sbuf,
MPI_Datatype rdt, int rcount, void* rbuf )
{
int myself, tag = 0, i, slength, rlength;
int myself, tag = 0, c, t, slength, rlength;
MPI_Request req;
MPI_Status status;
double tstart, tend;
double timers[trials];
MPI_Type_size( sdt, &slength );
slength *= scount;
@ -237,21 +355,16 @@ static int irecv_send( int cycles,
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
tstart = MPI_Wtime();
for( i = 0; i < cycles; i++ ) {
#ifndef FAST
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
MPI_Wait( &req, &status );
/*MPI_Request_free( &req );*/
#else
ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
ftmpi_mpi_send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
ftmpi_request_free( &req );
#endif
for( t = 0; t < trials; t++ ) {
timers[t] = MPI_Wtime();
for( c = 0; c < cycles; c++ ) {
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
MPI_Wait( &req, &status );
}
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
}
tend = MPI_Wtime();
print_result( rlength, cycles, tend - tstart );
print_result( rlength, trials, timers );
return 0;
}
@ -259,10 +372,10 @@ static int isend_irecv_wait( int cycles,
MPI_Datatype sdt, int scount, void* sbuf,
MPI_Datatype rdt, int rcount, void* rbuf )
{
int myself, tag = 0, i, slength, rlength;
MPI_Request sreq, rreq;
MPI_Status status;
double tstart, tend;
int myself, tag = 0, c, t, slength, rlength;
MPI_Request requests[2];
MPI_Status statuses[2];
double timers[trials];
MPI_Type_size( sdt, &slength );
slength *= scount;
@ -271,25 +384,16 @@ static int isend_irecv_wait( int cycles,
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
tstart = MPI_Wtime();
for( i = 0; i < cycles; i++ ) {
#ifndef FAST
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
MPI_Wait( &sreq, &status );
MPI_Wait( &rreq, &status );
/*MPI_Request_free( &sreq );*/
/*MPI_Request_free( &rreq );*/
#else
ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
ftmpi_wait( &sreq, &status );
ftmpi_request_free( &sreq );
ftmpi_request_free( &rreq );
#endif
for( t = 0; t < trials; t++ ) {
timers[t] = MPI_Wtime();
for( c = 0; c < cycles; c++ ) {
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[0] );
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[1] );
MPI_Waitall( 2, requests, statuses );
}
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
}
tend = MPI_Wtime();
print_result( rlength, cycles, tend - tstart );
print_result( rlength, trials, timers );
return 0;
}
@ -297,10 +401,10 @@ static int irecv_isend_wait( int cycles,
MPI_Datatype sdt, int scount, void* sbuf,
MPI_Datatype rdt, int rcount, void* rbuf )
{
int myself, tag = 0, i, slength, rlength;
MPI_Request sreq, rreq;
MPI_Status status;
double tstart, tend;
int myself, tag = 0, c, t, slength, rlength;
MPI_Request requests[2];
MPI_Status statuses[2];
double timers[trials];
MPI_Type_size( sdt, &slength );
slength *= scount;
@ -309,74 +413,82 @@ static int irecv_isend_wait( int cycles,
MPI_Comm_rank( MPI_COMM_WORLD, &myself );
tstart = MPI_Wtime();
for( i = 0; i < cycles; i++ ) {
#ifndef FAST
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
MPI_Wait( &sreq, &status );
MPI_Wait( &rreq, &status );
/*MPI_Request_free( &sreq );*/
/*MPI_Request_free( &rreq );*/
#else
ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
ftmpi_wait( &sreq, &status );
ftmpi_request_free( &sreq );
ftmpi_request_free( &rreq );
#endif
for( t = 0; t < trials; t++ ) {
timers[t] = MPI_Wtime();
for( c = 0; c < cycles; c++ ) {
MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &requests[0] );
MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &requests[1] );
MPI_Waitall( 2, requests, statuses );
}
timers[t] = (MPI_Wtime() - timers[t]) / cycles;
}
tend = MPI_Wtime();
print_result( rlength, cycles, tend - tstart );
print_result( rlength, trials, timers);
return 0;
}
static int do_test_for_ddt( MPI_Datatype sddt, MPI_Datatype rddt, int length )
static int do_test_for_ddt( int doop, MPI_Datatype sddt, MPI_Datatype rddt, int length )
{
int i;
MPI_Aint lb, extent;
char *sbuf, *rbuf;
int i;
MPI_Type_get_extent( sddt, &lb, &extent );
sbuf = (char*)malloc( length );
rbuf = (char*)malloc( length );
printf( "# Isend recv (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
isend_recv( 10, sddt, i, sbuf, rddt, i, rbuf );
if( doop & DO_PACK ) {
printf("# Pack (max length %d)\n", length);
for( i = 1; i <= (length/extent); i *= 2 ) {
pack( cycles, sddt, i, sbuf, rbuf );
}
}
printf( "# Isend Irecv Wait (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
isend_irecv_wait( 10, sddt, i, sbuf, rddt, i, rbuf );
if( doop & DO_UNPACK ) {
printf("# Unpack (length %d)\n", length);
for( i = 1; i <= (length/extent); i *= 2 ) {
unpack( cycles, sbuf, rddt, i, rbuf );
}
}
printf( "# Irecv send (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
irecv_send( 10, sddt, i, sbuf, rddt, i, rbuf );
if( doop & DO_ISEND_RECV ) {
printf( "# Isend recv (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
isend_recv( cycles, sddt, i, sbuf, rddt, i, rbuf );
}
}
printf( "# Irecv Isend Wait (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
irecv_isend_wait( 10, sddt, i, sbuf, rddt, i, rbuf );
if( doop & DO_ISEND_IRECV ) {
printf( "# Isend Irecv Wait (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
isend_irecv_wait( cycles, sddt, i, sbuf, rddt, i, rbuf );
}
}
if( doop & DO_IRECV_SEND ) {
printf( "# Irecv send (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
irecv_send( cycles, sddt, i, sbuf, rddt, i, rbuf );
}
}
if( doop & DO_IRECV_SEND ) {
printf( "# Irecv Isend Wait (length %d)\n", length );
for( i = 1; i <= (length/extent); i *= 2 ) {
irecv_isend_wait( cycles, sddt, i, sbuf, rddt, i, rbuf );
}
}
free( sbuf );
free( rbuf );
return 0;
}
#define DO_CONTIG 0x01
#define DO_CONSTANT_GAP 0x02
#define DO_INDEXED_GAP 0x04
#define DO_OPTIMIZED_INDEXED_GAP 0x08
#define DO_STRUCT_CONSTANT_GAP_RESIZED 0x10
#define MIN_LENGTH 1024
#define MAX_LENGTH (1024*1024)
int main( int argc, char* argv[] )
{
int run_tests = 0xffffffff; /* do all tests by default */
int length, rank, size;
int run_tests = 0xffff; /* do all datatype tests by default */
int rank, size;
MPI_Datatype ddt;
/*int run_tests = DO_CONSTANT_GAP;*/
run_tests |= DO_PACK | DO_UNPACK;
MPI_Init (&argc, &argv);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
@ -389,16 +501,14 @@ int main( int argc, char* argv[] )
if( run_tests & DO_CONTIG ) {
printf( "\ncontiguous datatype\n\n" );
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
do_test_for_ddt( MPI_INT, MPI_INT, length );
do_test_for_ddt( run_tests, MPI_INT, MPI_INT, MAX_LENGTH );
}
if( run_tests & DO_INDEXED_GAP ) {
printf( "\nindexed gap\n\n" );
ddt = create_indexed_gap_ddt();
MPI_DDT_DUMP( ddt );
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
do_test_for_ddt( ddt, ddt, length );
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
MPI_Type_free( &ddt );
}
@ -406,8 +516,7 @@ int main( int argc, char* argv[] )
printf( "\noptimized indexed gap\n\n" );
ddt = create_indexed_gap_optimized_ddt();
MPI_DDT_DUMP( ddt );
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
do_test_for_ddt( ddt, ddt, length );
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
MPI_Type_free( &ddt );
}
@ -415,8 +524,7 @@ int main( int argc, char* argv[] )
printf( "\nconstant indexed gap\n\n" );
ddt = create_indexed_constant_gap_ddt( 80, 100, 1 );
MPI_DDT_DUMP( ddt );
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
do_test_for_ddt( ddt, ddt, length );
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
MPI_Type_free( &ddt );
}
@ -424,8 +532,7 @@ int main( int argc, char* argv[] )
printf( "\noptimized constant indexed gap\n\n" );
ddt = create_optimized_indexed_constant_gap_ddt( 80, 100, 1 );
MPI_DDT_DUMP( ddt );
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
do_test_for_ddt( ddt, ddt, length );
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
MPI_Type_free( &ddt );
}
@ -433,8 +540,7 @@ int main( int argc, char* argv[] )
printf( "\nstruct constant gap resized\n\n" );
ddt = create_struct_constant_gap_resized_ddt( 0 /* unused */, 0 /* unused */, 0 /* unused */ );
MPI_DDT_DUMP( ddt );
for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
do_test_for_ddt( ddt, ddt, length );
do_test_for_ddt( run_tests, ddt, ddt, MAX_LENGTH );
MPI_Type_free( &ddt );
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2014 The University of Tennessee and The University
* Copyright (c) 2014-2019 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -18,7 +18,6 @@
#include "opal/runtime/opal.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/datatype/opal_datatype_internal.h"
// #include <mpi.h>
#include <time.h>
#include <stdlib.h>
#include <stdio.h>
@ -61,6 +60,18 @@ static void print_bar_pbar(struct foo_t* bar, struct pfoo_t* pbar)
fprintf(stderr, "\n");
}
static void print_stack(opal_convertor_t* conv)
{
printf("Stack pos %d [converted %" PRIsize_t "/%" PRIsize_t "]\n",
conv->stack_pos, conv->bConverted, conv->local_size);
for( uint32_t i = 0; i <= conv->stack_pos; i++ ) {
printf( "[%u] index %d, type %s count %" PRIsize_t " disp %p\n",
i, conv->pStack[i].index, opal_datatype_basicDatatypes[conv->pStack[i].type]->name,
conv->pStack[i].count, (void*)conv->pStack[i].disp);
}
printf("\n");
}
static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) {
int i, j, errors = 0;
struct iovec a;
@ -104,6 +115,7 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) {
max_data = a.iov_len;
pos = arr[i][1];
opal_convertor_set_position(pConv, &pos);
print_stack(pConv);
assert(arr[i][1] == pos);
opal_convertor_unpack( pConv, &a, &iov_count, &max_data );
a.iov_base = (char*)a.iov_base - 1024;
@ -118,9 +130,10 @@ static int testcase(ompi_datatype_t * newtype, size_t arr[10][2]) {
bar[j].d[1] != 0.0 ||
bar[j].d[2] != pbar[j].d[1]) {
if(0 == errors) {
fprintf(stderr, "ERROR ! count=%d, position=%d, ptr = %p"
(void)opal_datatype_dump(&newtype->super);
fprintf(stderr, "ERROR ! position=%d/%d, ptr = %p"
" got (%d,%d,%d,%g,%g,%g) expected (%d,%d,%d,%g,%g,%g)\n",
N, j, (void*)&bar[j],
j, N, (void*)&bar[j],
bar[j].i[0],
bar[j].i[1],
bar[j].i[2],