1
1

Optimize the heterogeneous case when there are multiple

identical types.
Этот коммит содержится в:
George Bosilca 2015-05-27 01:07:29 -04:00
родитель 79d5e2a92b
Коммит ef74566734

Просмотреть файл

@ -42,20 +42,33 @@
*/ */
static inline void static inline void
opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size) opal_dt_swap_bytes(void *to_p, const void *from_p, const size_t size, size_t count)
{ {
size_t i; size_t i;
size_t back_i = size - 1; size_t back_i = size - 1;
uint8_t *to = (uint8_t*) to_p; uint8_t *to = (uint8_t*) to_p;
uint8_t *from = (uint8_t*) from_p; uint8_t *from = (uint8_t*) from_p;
/* Do the first element */
for (i = 0 ; i < size ; i++, back_i--) { for (i = 0 ; i < size ; i++, back_i--) {
to[back_i] = from[i]; to[back_i] = from[i];
} }
/* Do all the others if any */
while(count > 1) {
to += size;
from += size;
count--;
for (i = 0 ; i < size ; i++, back_i--) {
to[back_i] = from[i];
}
}
} }
/** /**
* BEWARE: Do not use the following macro with composed types such as complex. Instead, use * BEWARE: Do not use the following macro with composed types such as
* the COPY_2TYPE_HETEROGENEOUS. * complex. As the swap is done using the entire type sizeof, the
* wrong endianess translation will be done. Instead, use the
* COPY_2SAMETYPE_HETEROGENEOUS.
*/ */
#define COPY_TYPE_HETEROGENEOUS( TYPENAME, TYPE ) \ #define COPY_TYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
static int32_t \ static int32_t \
@ -72,10 +85,14 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
\ \
if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \ if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \
(opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \ (opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
for( i = 0; i < count; i++ ) { \ if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE)); \ opal_dt_swap_bytes(to, from, sizeof(TYPE), count); \
to += to_extent; \ } else { \
from += from_extent; \ for( i = 0; i < count; i++ ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE), 1); \
to += to_extent; \
from += from_extent; \
} \
} \ } \
} else if ((OPAL_PTRDIFF_TYPE)sizeof(TYPE) == to_extent && \ } else if ((OPAL_PTRDIFF_TYPE)sizeof(TYPE) == to_extent && \
(OPAL_PTRDIFF_TYPE)sizeof(TYPE) == from_extent) { \ (OPAL_PTRDIFF_TYPE)sizeof(TYPE) == from_extent) { \
@ -92,6 +109,44 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count,
return count; \ return count; \
} }
#define COPY_2SAMETYPE_HETEROGENEOUS( TYPENAME, TYPE ) \
static int32_t \
copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \
const char* from, size_t from_len, OPAL_PTRDIFF_TYPE from_extent, \
char* to, size_t to_length, OPAL_PTRDIFF_TYPE to_extent, \
OPAL_PTRDIFF_TYPE *advance) \
{ \
uint32_t i; \
\
datatype_check( #TYPE, sizeof(TYPE), sizeof(TYPE), &count, \
from, from_len, from_extent, \
to, to_length, to_extent); \
\
if ((pConvertor->remoteArch & OPAL_ARCH_ISBIGENDIAN) != \
(opal_local_arch & OPAL_ARCH_ISBIGENDIAN)) { \
if( (to_extent == from_extent) && (to_extent == sizeof(TYPE)) ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE), 2 * count); \
} else { \
for( i = 0; i < count; i++ ) { \
opal_dt_swap_bytes(to, from, sizeof(TYPE), 2); \
to += to_extent; \
from += from_extent; \
} \
} \
} else if ((OPAL_PTRDIFF_TYPE)sizeof(TYPE) == to_extent && \
(OPAL_PTRDIFF_TYPE)sizeof(TYPE) == from_extent) { \
MEMCPY( to, from, count * sizeof(TYPE) ); \
} else { \
/* source or destination are non-contigous */ \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, sizeof(TYPE) ); \
to += to_extent; \
from += from_extent; \
} \
} \
*advance = count * from_extent; \
return count; \
}
#define COPY_2TYPE_HETEROGENEOUS( TYPENAME, TYPE1, TYPE2 ) \ #define COPY_2TYPE_HETEROGENEOUS( TYPENAME, TYPE1, TYPE2 ) \
static int32_t \ static int32_t \
@ -114,9 +169,9 @@ copy_##TYPENAME##_heterogeneous(opal_convertor_t *pConvertor, uint32_t count, \
TYPE1* to_1, *from_1; \ TYPE1* to_1, *from_1; \
TYPE2* to_2, *from_2; \ TYPE2* to_2, *from_2; \
to_1 = (TYPE1*) to; from_1 = (TYPE1*) from; \ to_1 = (TYPE1*) to; from_1 = (TYPE1*) from; \
opal_dt_swap_bytes(to_1, from_1, sizeof(TYPE1)); \ opal_dt_swap_bytes(to_1, from_1, sizeof(TYPE1), 1); \
to_2 = (TYPE2*) (to_1 + 1); from_2 = (TYPE2*) (from_1 + 1); \ to_2 = (TYPE2*) (to_1 + 1); from_2 = (TYPE2*) (from_1 + 1); \
opal_dt_swap_bytes(to_2, from_2, sizeof(TYPE2)); \ opal_dt_swap_bytes(to_2, from_2, sizeof(TYPE2), 1); \
to += to_extent; \ to += to_extent; \
from += from_extent; \ from += from_extent; \
} \ } \
@ -287,21 +342,21 @@ COPY_TYPE_HETEROGENEOUS( float16, long double )
#endif #endif
#if HAVE_FLOAT__COMPLEX #if HAVE_FLOAT__COMPLEX
COPY_2TYPE_HETEROGENEOUS( float_complex, float, float ) COPY_2SAMETYPE_HETEROGENEOUS( float_complex, float )
#else #else
/* #error No basic type for copy function for opal_datatype_float_complex found */ /* #error No basic type for copy function for opal_datatype_float_complex found */
#define copy_float_complex_heterogeneous NULL #define copy_float_complex_heterogeneous NULL
#endif #endif
#if HAVE_DOUBLE__COMPLEX #if HAVE_DOUBLE__COMPLEX
COPY_2TYPE_HETEROGENEOUS( double_complex, double, double ) COPY_2SAMETYPE_HETEROGENEOUS( double_complex, double )
#else #else
/* #error No basic type for copy function for opal_datatype_double_complex found */ /* #error No basic type for copy function for opal_datatype_double_complex found */
#define copy_double_complex_heterogeneous NULL #define copy_double_complex_heterogeneous NULL
#endif #endif
#if HAVE_LONG_DOUBLE__COMPLEX #if HAVE_LONG_DOUBLE__COMPLEX
COPY_2TYPE_HETEROGENEOUS( long_double_complex, long double, long double ) COPY_2SAMETYPE_HETEROGENEOUS( long_double_complex, long double )
#else #else
/* #error No basic type for copy function for opal_datatype_long_double_complex found */ /* #error No basic type for copy function for opal_datatype_long_double_complex found */
#define copy_long_double_complex_heterogeneous NULL #define copy_long_double_complex_heterogeneous NULL