Minimize the alignments. We only do it when we need to pack
data that must be aligned (aka the displacement). All other cases do not require special alignments, and are treated normally. Fix the comment regarding the alignment requirements.
Этот коммит содержится в:
родитель
267fdfe435
Коммит
33a3ace874
@ -64,12 +64,9 @@ typedef struct __dt_args {
|
||||
* copy the buffer into an aligned buffer first.
|
||||
*/
|
||||
#if OPAL_ALIGN_WORD_SIZE_INTEGERS
|
||||
#define OMPI_DATATYPE_ALIGN_INT(VALUE, TYPE) \
|
||||
(VALUE) = OPAL_ALIGN((VALUE), sizeof(OPAL_PTRDIFF_TYPE), TYPE)
|
||||
#define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE) \
|
||||
(PTR) = OPAL_ALIGN_PTR((PTR), sizeof(OPAL_PTRDIFF_TYPE), TYPE)
|
||||
#else
|
||||
#define OMPI_DATATYPE_ALIGN_INT(VALUE, TYPE)
|
||||
#define OMPI_DATATYPE_ALIGN_PTR(PTR, TYPE)
|
||||
#endif /* OPAL_ALIGN_WORD_SIZE_INTEGERS */
|
||||
|
||||
@ -104,8 +101,7 @@ typedef struct __dt_args {
|
||||
else pArgs->i = (int*)buf; \
|
||||
pArgs->ref_count = 1; \
|
||||
pArgs->total_pack_size = (4 + (IC)) * sizeof(int) + \
|
||||
(AC) * sizeof(OPAL_PTRDIFF_TYPE) + (DC) * sizeof(int); \
|
||||
OMPI_DATATYPE_ALIGN_INT( pArgs->total_pack_size, int ); \
|
||||
(AC) * sizeof(OPAL_PTRDIFF_TYPE); \
|
||||
(PDATA)->args = (void*)pArgs; \
|
||||
(PDATA)->packed_description = NULL; \
|
||||
} while(0)
|
||||
@ -451,17 +447,16 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype,
|
||||
position[2] = args->ca;
|
||||
position[3] = args->cd;
|
||||
next_packed += (4 * sizeof(int));
|
||||
/* So far there are 4 integers in the array, so we're still 64 bits aligned
|
||||
* if we suppose that the original buffer was 64 bits aligned.
|
||||
*
|
||||
* In order to solve issues with the Sparc 64 which require 64 bits pointers
|
||||
* to be correctly aligned, we have to start adding the data in a smart way,
|
||||
* just to keep everything as aligned as possible. Therefore, the first
|
||||
* array we have to copy is the array of displacements, followed by the
|
||||
* array of datatypes (both of them might be arrays of pointers) and then
|
||||
* finally the array of counts.
|
||||
/* Spoiler: We will access the data in this storage structure, and thus we
|
||||
* need to align it to the expected boundaries (special thanks to Sparc64).
|
||||
* The simplest way is to ensure that prior to each type that must be 64
|
||||
* bits aligned, we have a pointer that is 64 bits aligned. That will minimize
|
||||
* the memory requirements in all cases where no displacements are stored.
|
||||
*/
|
||||
if( 0 < args->ca ) {
|
||||
/* description of the displacements must be 64 bits aligned */
|
||||
OMPI_DATATYPE_ALIGN_PTR(next_packed, char*);
|
||||
|
||||
memcpy( next_packed, args->a, sizeof(OPAL_PTRDIFF_TYPE) * args->ca );
|
||||
next_packed += sizeof(OPAL_PTRDIFF_TYPE) * args->ca;
|
||||
}
|
||||
@ -472,9 +467,6 @@ static inline int __ompi_datatype_pack_description( ompi_datatype_t* datatype,
|
||||
memcpy( next_packed, args->i, sizeof(int) * args->ci );
|
||||
next_packed += args->ci * sizeof(int);
|
||||
|
||||
/* description of next datatype should be 64 bits aligned */
|
||||
OMPI_DATATYPE_ALIGN_PTR(next_packed, char*);
|
||||
|
||||
/* copy the rest of the data */
|
||||
for( i = 0; i < args->cd; i++ ) {
|
||||
ompi_datatype_t* temp_data = args->d[i];
|
||||
@ -512,7 +504,6 @@ int ompi_datatype_get_pack_description( ompi_datatype_t* datatype,
|
||||
__ompi_datatype_pack_description( datatype, &recursive_buffer, &next_index );
|
||||
if( !ompi_datatype_is_predefined(datatype) ) {
|
||||
args->total_pack_size = (uintptr_t)((char*)recursive_buffer - (char*)datatype->packed_description);
|
||||
OMPI_DATATYPE_ALIGN_PTR(args->total_pack_size, char*);
|
||||
}
|
||||
}
|
||||
|
||||
@ -560,8 +551,6 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p
|
||||
#endif
|
||||
|
||||
next_buffer = (char*)*packed_buffer;
|
||||
/* The pointer should always be aligned on MPI_Aint, aka OPAL_PTRDIFF_TYPE */
|
||||
OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*);
|
||||
position = (int*)next_buffer;
|
||||
|
||||
create_type = position[0];
|
||||
@ -597,6 +586,9 @@ static ompi_datatype_t* __ompi_datatype_create_from_packed_description( void** p
|
||||
number_of_datatype );
|
||||
next_buffer += (4 * sizeof(int)); /* move after the header */
|
||||
|
||||
/* The pointer should always be aligned on MPI_Aint, aka OPAL_PTRDIFF_TYPE */
|
||||
OMPI_DATATYPE_ALIGN_PTR(next_buffer, char*);
|
||||
|
||||
array_of_disp = (OPAL_PTRDIFF_TYPE*)next_buffer;
|
||||
next_buffer += number_of_disp * sizeof(OPAL_PTRDIFF_TYPE);
|
||||
/* the other datatypes */
|
||||
@ -788,8 +780,6 @@ ompi_datatype_t* ompi_datatype_create_from_packed_description( void** packed_buf
|
||||
|
||||
datatype = __ompi_datatype_create_from_packed_description( packed_buffer,
|
||||
remote_processor );
|
||||
/* Keep the pointer aligned to MPI_Aint */
|
||||
OMPI_DATATYPE_ALIGN_PTR(*packed_buffer, void*);
|
||||
if( NULL == datatype ) {
|
||||
return NULL;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user