From 33a3ace8740538069e4d96eabb518e987b69f82c Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 30 Apr 2015 17:18:00 -0400 Subject: [PATCH] Minimize the alignments. We only do it when we need to pack data that must be aligned (aka the displacement). All other cases do not require special alignments, and are treated normally. Fix the comment regarding the alignment requirements. --- ompi/datatype/ompi_datatype_args.c | 34 +++++++++++------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 84e37942fa..3083afaf1d 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -64,12 +64,9 @@ typedef struct __dt_args { * copy the buffer into an aligned buffer first. */ #if OPAL_ALIGN_WORD_SIZE_INTEGERS -#define OMPI_DATATYPE_ALIGN_INT(VALUE, TYPE) \ - (VALUE) = OPAL_ALIGN((VALUE), sizeof(OPAL_PTRDIFF_TYPE), TYPE) #define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) \ (PTR) = OPAL_ALIGN_PTR((PTR), sizeof(OPAL_PTRDIFF_TYPE), TYPE) #else -#define OMPI_DATATYPE_ALIGN_INT(VALUE, TYPE) #define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) #endif /* OPAL_ALIGN_WORD_SIZE_INTEGERS */ @@ -104,8 +101,7 @@ typedef struct __dt_args { else pArgs->i = (int*)buf; \ pArgs->ref_count = 1; \ pArgs->total_pack_size = (4 + (IC)) * sizeof(int) + \ - (AC) * sizeof(OPAL_PTRDIFF_TYPE) + (DC) * sizeof(int); \ - OMPI_DATATYPE_ALIGN_INT( pArgs->total_pack_size, int ); \ + (AC) * sizeof(OPAL_PTRDIFF_TYPE); \ (PDATA)->args = (void*)pArgs; \ (PDATA)->packed_description = NULL; \ } while(0) @@ -451,17 +447,16 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, position[2] = args->ca; position[3] = args->cd; next_packed += (4 * sizeof(int)); - /* So far there are 4 integers in the array, so we're still 64 bits aligned - * if we suppose that the original buffer was 64 bits aligned. - * - * In order to solve issues with the Sparc 64 which require 64 bits pointers - * to be correctly aligned, we have to start adding the data in a smart way, - * just to keep everything as aligned as possible. Therefore, the first - * array we have to copy is the array of displacements, followed by the - * array of datatypes (both of them might be arrays of pointers) and then - * finally the array of counts. + /* Spoiler: We will access the data in this storage structure, and thus we + * need to align it to the expected boundaries (special thanks to Sparc64). + * The simplest way is to ensure that prior to each type that must be 64 + * bits aligned, we have a pointer that is 64 bits aligned. That will minimize + * the memory requirements in all cases where no displacements are stored. */ if( 0 < args->ca ) { + /* description of the displacements must be 64 bits aligned */ + OMPI_DATATYPE_ALIGN_PTR(next_packed, char*); + memcpy( next_packed, args->a, sizeof(OPAL_PTRDIFF_TYPE) * args->ca ); next_packed += sizeof(OPAL_PTRDIFF_TYPE) * args->ca; } @@ -472,9 +467,6 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, memcpy( next_packed, args->i, sizeof(int) * args->ci ); next_packed += args->ci * sizeof(int); - /* description of next datatype should be 64 bits aligned */ - OMPI_DATATYPE_ALIGN_PTR(next_packed, char*); - /* copy the rest of the data */ for( i = 0; i < args->cd; i++ ) { ompi_datatype_t* temp_data = args->d[i]; @@ -512,7 +504,6 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype, __ompi_datatype_pack_description( datatype, &recursive_buffer, &next_index ); if( !ompi_datatype_is_predefined(datatype) ) { args->total_pack_size = (uintptr_t)((char*)recursive_buffer - (char*)datatype->packed_description); - OMPI_DATATYPE_ALIGN_PTR(args->total_pack_size, char*); } } @@ -560,8 +551,6 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p #endif next_buffer = (char*)*packed_buffer; - /* The pointer should always be aligned on MPI_Aint, aka OPAL_PTRDIFF_TYPE */ - OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*); position = (int*)next_buffer; create_type = position[0]; @@ -597,6 +586,9 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p number_of_datatype ); next_buffer += (4 * sizeof(int)); /* move after the header */ + /* The pointer should always be aligned on MPI_Aint, aka OPAL_PTRDIFF_TYPE */ + OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*); + array_of_disp = (OPAL_PTRDIFF_TYPE*)next_buffer; next_buffer += number_of_disp * sizeof(OPAL_PTRDIFF_TYPE); /* the other datatypes */ @@ -788,8 +780,6 @@ ompi_datatype_t* ompi_datatype_create_from_packed_description( void** packed_buf datatype = __ompi_datatype_create_from_packed_description( packed_buffer, remote_processor ); - /* Keep the pointer aligned to MPI_Aint */ - OMPI_DATATYPE_ALIGN_PTR(*packed_buffer, void*); if( NULL == datatype ) { return NULL; }