diff --git a/ompi/datatype/ompi_datatype_args.c b/ompi/datatype/ompi_datatype_args.c index 84e37942fa..3083afaf1d 100644 --- a/ompi/datatype/ompi_datatype_args.c +++ b/ompi/datatype/ompi_datatype_args.c @@ -64,12 +64,9 @@ typedef struct __dt_args { * copy the buffer into an aligned buffer first. */ #if OPAL_ALIGN_WORD_SIZE_INTEGERS -#define OMPI_DATATYPE_ALIGN_INT(VALUE, TYPE) \ - (VALUE) = OPAL_ALIGN((VALUE), sizeof(OPAL_PTRDIFF_TYPE), TYPE) #define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) \ (PTR) = OPAL_ALIGN_PTR((PTR), sizeof(OPAL_PTRDIFF_TYPE), TYPE) #else -#define OMPI_DATATYPE_ALIGN_INT(VALUE, TYPE) #define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) #endif /* OPAL_ALIGN_WORD_SIZE_INTEGERS */ @@ -104,8 +101,7 @@ typedef struct __dt_args { else pArgs->i = (int*)buf; \ pArgs->ref_count = 1; \ pArgs->total_pack_size = (4 + (IC)) * sizeof(int) + \ - (AC) * sizeof(OPAL_PTRDIFF_TYPE) + (DC) * sizeof(int); \ - OMPI_DATATYPE_ALIGN_INT( pArgs->total_pack_size, int ); \ + (AC) * sizeof(OPAL_PTRDIFF_TYPE); \ (PDATA)->args = (void*)pArgs; \ (PDATA)->packed_description = NULL; \ } while(0) @@ -451,17 +447,16 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, position[2] = args->ca; position[3] = args->cd; next_packed += (4 * sizeof(int)); - /* So far there are 4 integers in the array, so we're still 64 bits aligned - * if we suppose that the original buffer was 64 bits aligned. - * - * In order to solve issues with the Sparc 64 which require 64 bits pointers - * to be correctly aligned, we have to start adding the data in a smart way, - * just to keep everything as aligned as possible. Therefore, the first - * array we have to copy is the array of displacements, followed by the - * array of datatypes (both of them might be arrays of pointers) and then - * finally the array of counts. + /* Spoiler: We will access the data in this storage structure, and thus we + * need to align it to the expected boundaries (special thanks to Sparc64). + * The simplest way is to ensure that prior to each type that must be 64 + * bits aligned, we have a pointer that is 64 bits aligned. That will minimize + * the memory requirements in all cases where no displacements are stored. */ if( 0 < args->ca ) { + /* description of the displacements must be 64 bits aligned */ + OMPI_DATATYPE_ALIGN_PTR(next_packed, char*); + memcpy( next_packed, args->a, sizeof(OPAL_PTRDIFF_TYPE) * args->ca ); next_packed += sizeof(OPAL_PTRDIFF_TYPE) * args->ca; } @@ -472,9 +467,6 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype, memcpy( next_packed, args->i, sizeof(int) * args->ci ); next_packed += args->ci * sizeof(int); - /* description of next datatype should be 64 bits aligned */ - OMPI_DATATYPE_ALIGN_PTR(next_packed, char*); - /* copy the rest of the data */ for( i = 0; i < args->cd; i++ ) { ompi_datatype_t* temp_data = args->d[i]; @@ -512,7 +504,6 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype, __ompi_datatype_pack_description( datatype, &recursive_buffer, &next_index ); if( !ompi_datatype_is_predefined(datatype) ) { args->total_pack_size = (uintptr_t)((char*)recursive_buffer - (char*)datatype->packed_description); - OMPI_DATATYPE_ALIGN_PTR(args->total_pack_size, char*); } } @@ -560,8 +551,6 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p #endif next_buffer = (char*)*packed_buffer; - /* The pointer should always be aligned on MPI_Aint, aka OPAL_PTRDIFF_TYPE */ - OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*); position = (int*)next_buffer; create_type = position[0]; @@ -597,6 +586,9 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p number_of_datatype ); next_buffer += (4 * sizeof(int)); /* move after the header */ + /* The pointer should always be aligned on MPI_Aint, aka OPAL_PTRDIFF_TYPE */ + OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*); + array_of_disp = (OPAL_PTRDIFF_TYPE*)next_buffer; next_buffer += number_of_disp * sizeof(OPAL_PTRDIFF_TYPE); /* the other datatypes */ @@ -788,8 +780,6 @@ ompi_datatype_t* ompi_datatype_create_from_packed_description( void** packed_buf datatype = __ompi_datatype_create_from_packed_description( packed_buffer, remote_processor ); - /* Keep the pointer aligned to MPI_Aint */ - OMPI_DATATYPE_ALIGN_PTR(*packed_buffer, void*); if( NULL == datatype ) { return NULL; }