1
1
* Don't overflow the internal datatype count.
Change the type of the count to be a size_t (it does not alter the total
size of the internal structures, so has no impact on the ABI).

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>

* Optimize the datatype creation.
The internal array of counts of predefined types is now only created
when needed, which is either in a heterogeneous environment, or when
one call get_elements. It saves space and makes the convertor creation a
little faster in some cases.

Rearrange the fields in the datatype description structs.

The macro OPAL_DATATYPE_INIT_PTYPES_ARRAY had a bug, and the
static array was only partially created. All predefined types should
have the ptypes array created and initialized.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>

* Fix the boundary computation.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>

* test/datatype: add test for short unpack on heteregeneous cluster

Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp>
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>

* Trying to reduce the cost of creating a convertor.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>

* Respect the unpack boundaries.
As Gilles suggested on #2535 the opal_unpack_general_function was
unpacking based on the requested count and not on the amount of packed
data provided.
Fixes #2535.

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
bosilca 2017-05-09 09:31:40 -04:00 коммит произвёл GitHub
родитель a66909b8b4
Коммит cbf03b3113
20 изменённых файлов: 374 добавлений и 167 удалений

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
@ -25,6 +25,7 @@
#include "ompi/runtime/params.h"
#include "ompi/datatype/ompi_datatype.h"
#include "opal/datatype/opal_datatype_internal.h"
int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t *count)
{
@ -48,9 +49,10 @@ int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t
there are no leftover bytes */
if (!ompi_datatype_is_predefined(datatype)) {
if (0 != internal_count) {
opal_datatype_compute_ptypes(&datatype->super);
/* count the basic elements in the datatype */
for (i = 4, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) {
total += datatype->super.btypes[i];
for (i = OPAL_DATATYPE_FIRST_TYPE, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) {
total += datatype->super.ptypes[i];
}
internal_count = total * internal_count;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2009-2013 The University of Tennessee and The University
* Copyright (c) 2009-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
@ -467,7 +467,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
.name = OPAL_DATATYPE_INIT_NAME(TYPE ## SIZE), \
.desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \
.opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(TYPE ## SIZE) \
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY(TYPE ## SIZE) \
}
#define OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN( TYPE, NAME, SIZE, ALIGN, FLAGS ) \

Просмотреть файл

@ -384,8 +384,9 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}};
(PDST)->super.opt_desc = (PSRC)->super.opt_desc; \
(PDST)->packed_description = (PSRC)->packed_description; \
(PSRC)->packed_description = NULL; \
memcpy( (PDST)->super.btypes, (PSRC)->super.btypes, \
OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t) ); \
/* transfer the ptypes */ \
(PDST)->super.ptypes = (PSRC)->super.ptypes; \
(PSRC)->super.ptypes = NULL; \
} while(0)
#define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2, FLAGS) \
@ -393,20 +394,20 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}};
struct { type1 v1; type2 v2; } s[2]; \
ompi_datatype_t *types[2], *ptype; \
int bLength[2] = {1, 1}; \
ptrdiff_t base, displ[2]; \
ptrdiff_t base, displ[2]; \
\
types[0] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType1]; \
types[1] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType2]; \
base = (ptrdiff_t)(&(s[0])); \
displ[0] = (ptrdiff_t)(&(s[0].v1)); \
base = (ptrdiff_t)(&(s[0])); \
displ[0] = (ptrdiff_t)(&(s[0].v1)); \
displ[0] -= base; \
displ[1] = (ptrdiff_t)(&(s[0].v2)); \
displ[1] = (ptrdiff_t)(&(s[0].v2)); \
displ[1] -= base; \
\
ompi_datatype_create_struct( 2, bLength, displ, types, &ptype ); \
displ[0] = (ptrdiff_t)(&(s[1])); \
displ[0] = (ptrdiff_t)(&(s[1])); \
displ[0] -= base; \
if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \
if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \
ptype->super.ub = displ[0]; /* force a new extent for the datatype */ \
ptype->super.flags |= (FLAGS); \
ptype->id = MPIDDT; \
@ -736,7 +737,7 @@ void ompi_datatype_dump( const ompi_datatype_t* pData )
(long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used,
(long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb),
(long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb),
(int)pData->super.nbElems, (int)pData->super.btypes[OPAL_DATATYPE_LOOP], (int)pData->super.flags );
(int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags );
/* dump the flags */
if( ompi_datatype_is_predefined(pData) ) {
index += snprintf( buffer + index, length - index, "predefined " );

Просмотреть файл

@ -366,7 +366,8 @@ static inline int memchecker_datatype(MPI_Datatype type)
opal_memchecker_base_isdefined (&type->super.opt_desc.length, sizeof(opal_datatype_count_t));
opal_memchecker_base_isdefined (&type->super.opt_desc.used, sizeof(opal_datatype_count_t));
opal_memchecker_base_isdefined (&type->super.opt_desc.desc, sizeof(dt_elem_desc_t *));
opal_memchecker_base_isdefined (&type->super.btypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t));
if( NULL != type->super.ptypes )
opal_memchecker_base_isdefined (&type->super.ptypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(size_t));
opal_memchecker_base_isdefined (&type->id, sizeof(int32_t));
opal_memchecker_base_isdefined (&type->d_f_to_c_index, sizeof(int32_t));

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -43,9 +43,6 @@
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
#endif
extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor,
int starting_point, const int* sizes );
static void opal_convertor_construct( opal_convertor_t* convertor )
{
convertor->pStack = convertor->static_stack;
@ -226,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv,
if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) {
/**
* We are doing conversion on a contiguous datatype on a homogeneous
* environment. The convertor contain minimal informations, we only
* environment. The convertor contain minimal information, we only
* use the bConverted to manage the conversion.
*/
uint32_t i;
@ -447,31 +444,49 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
return rc;
}
static size_t
opal_datatype_compute_remote_size( const opal_datatype_t* pData,
const size_t* sizes )
{
uint32_t typeMask = pData->bdt_used;
size_t length = 0;
if( OPAL_UNLIKELY(NULL == pData->ptypes) ) {
/* Allocate and fill the array of types used in the datatype description */
opal_datatype_compute_ptypes( (opal_datatype_t*)pData );
}
for( int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) {
if( typeMask & ((uint32_t)1 << i) ) {
length += (pData->ptypes[i] * sizes[i]);
typeMask ^= ((uint32_t)1 << i);
}
}
return length;
}
/**
* Compute the remote size. If necessary remove the homogeneous flag
* and redirect the convertor description toward the non-optimized
* datatype representation.
*/
#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
{ \
if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
opal_convertor_master_t* master; \
int i; \
uint32_t mask = datatype->bdt_used; \
convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \
master = convertor->master; \
convertor->remote_size = 0; \
for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
if( mask & ((uint32_t)1 << i) ) { \
convertor->remote_size += (datatype->btypes[i] * \
master->remote_sizes[i]); \
mask ^= ((uint32_t)1 << i); \
} \
} \
convertor->remote_size *= convertor->count; \
convertor->use_desc = &(datatype->desc); \
} \
size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
{
opal_datatype_t* datatype = (opal_datatype_t*)pConvertor->pDesc;
pConvertor->remote_size = pConvertor->local_size;
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
pConvertor->use_desc = &(datatype->desc);
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {
/* This is for a single datatype, we must update it with the count */
pConvertor->remote_size = opal_datatype_compute_remote_size(datatype,
pConvertor->master->remote_sizes);
pConvertor->remote_size *= pConvertor->count;
}
}
pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE;
return pConvertor->remote_size;
}
/**
@ -483,29 +498,26 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
*/
#define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \
{ \
uint32_t bdt_mask; \
\
convertor->local_size = count * datatype->size; \
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
convertor->count = count; \
convertor->pDesc = (opal_datatype_t*)datatype; \
convertor->bConverted = 0; \
convertor->use_desc = &(datatype->opt_desc); \
/* If the data is empty we just mark the convertor as \
* completed. With this flag set the pack and unpack functions \
* will not do anything. \
*/ \
if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \
convertor->flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED; \
convertor->flags |= (OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE); \
convertor->local_size = convertor->remote_size = 0; \
return OPAL_SUCCESS; \
} \
/* Compute the local in advance */ \
convertor->local_size = count * datatype->size; \
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
convertor->count = count; \
\
/* Grab the datatype part of the flags */ \
convertor->flags &= CONVERTOR_TYPE_MASK; \
convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \
convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
convertor->pDesc = (opal_datatype_t*)datatype; \
convertor->bConverted = 0; \
convertor->use_desc = &(datatype->opt_desc); \
\
convertor->remote_size = convertor->local_size; \
if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \
@ -516,9 +528,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
} \
} \
\
bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \
OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype, \
bdt_mask ); \
assert( (convertor)->pDesc == (datatype) ); \
opal_convertor_compute_remote_size( convertor ); \
assert( NULL != convertor->use_desc->desc ); \
/* For predefined datatypes (contiguous) do nothing more */ \
/* if checksum is enabled then always continue */ \
@ -530,7 +541,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
} \
convertor->flags &= ~CONVERTOR_NO_OP; \
{ \
uint32_t required_stack_length = datatype->btypes[OPAL_DATATYPE_LOOP] + 1; \
uint32_t required_stack_length = datatype->loops + 1; \
\
if( required_stack_length > convertor->stack_size ) { \
assert(convertor->pStack == convertor->static_stack); \
@ -714,8 +725,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index,
(int)pStack[stack_pos].count, (long)pStack[stack_pos].disp );
if( pStack->index != -1 )
opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n",
pDesc[pStack[stack_pos].index].elem.count,
opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n",
(unsigned long)pDesc[pStack[stack_pos].index].elem.count,
(long)pDesc[pStack[stack_pos].index].elem.disp,
(long)pDesc[pStack[stack_pos].index].elem.extent );
else

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2014 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -54,6 +54,7 @@ BEGIN_C_DECLS
#define CONVERTOR_STATE_ALLOC 0x04000000
#define CONVERTOR_COMPLETED 0x08000000
#define CONVERTOR_CUDA_UNIFIED 0x10000000
#define CONVERTOR_HAS_REMOTE_SIZE 0x20000000
union dt_elem_desc;
typedef struct opal_convertor_t opal_convertor_t;
@ -72,7 +73,7 @@ struct dt_stack_t {
int32_t index; /**< index in the element description */
int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */
size_t count; /**< number of times we still have to do it */
ptrdiff_t disp; /**< actual displacement depending on the count field */
ptrdiff_t disp; /**< actual displacement depending on the count field */
};
typedef struct dt_stack_t dt_stack_t;
@ -186,9 +187,16 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv
return 1;
}
/**
* Update the size of the remote datatype representation. The size will
* depend on the configuration of the master convertor. In homogeneous
* environments, the local and remote sizes are identical.
*/
size_t
opal_convertor_compute_remote_size( opal_convertor_t* pConv );
/*
*
/**
* Return the local size of the convertor (count times the size of the datatype).
*/
static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv,
size_t* pSize )
@ -197,16 +205,24 @@ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv
}
/*
*
/**
* Return the remote size of the convertor (count times the remote size of the
* datatype). On homogeneous environments the local and remote sizes are
* identical.
*/
static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv,
size_t* pSize )
{
if( pConv->flags & CONVERTOR_HOMOGENEOUS ) {
*pSize = pConv->local_size;
return;
}
if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) {
opal_convertor_compute_remote_size( (opal_convertor_t*)pConv);
}
*pSize = pConv->remote_size;
}
/**
* Return the current absolute position of the next pack/unpack. This function is
* mostly useful for contiguous datatypes, when we need to get the pointer to the
@ -279,6 +295,7 @@ opal_convertor_raw( opal_convertor_t* convertor, /* [IN/OUT] */
uint32_t* iov_count, /* [IN/OUT] */
size_t* length ); /* [OUT] */
/*
* Upper level does not need to call the _nocheck function directly.
*/

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2015 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -53,9 +53,10 @@ BEGIN_C_DECLS
#endif
/*
* No more than this number of _Basic_ datatypes in C/CPP or Fortran
* are supported (in order to not change setup and usage of btypes).
* are supported (in order to not change setup and usage of the predefined
* datatypes).
*
* XXX TODO Adapt to whatever the OMPI-layer needs
* BEWARE: This constant should reflect whatever the OMPI-layer needs.
*/
#define OPAL_DATATYPE_MAX_SUPPORTED 47
@ -108,13 +109,14 @@ struct opal_datatype_t {
uint32_t bdt_used; /**< bitset of which basic datatypes are used in the data description */
size_t size; /**< total size in bytes of the memory used by the data if
the data is put on a contiguous buffer */
ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */
ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */
ptrdiff_t lb; /**< lower bound in memory */
ptrdiff_t ub; /**< upper bound in memory */
ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */
ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */
ptrdiff_t lb; /**< lower bound in memory */
ptrdiff_t ub; /**< upper bound in memory */
/* --- cacheline 1 boundary (64 bytes) --- */
size_t nbElems; /**< total number of elements inside the datatype */
uint32_t align; /**< data should be aligned to */
uint32_t loops; /**< number of loops on the iternal type stack */
/* Attribute fields */
char name[OPAL_MAX_OBJECT_NAME]; /**< name of the datatype */
@ -123,11 +125,12 @@ struct opal_datatype_t {
dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless
or in the send case (without conversion) */
uint32_t btypes[OPAL_DATATYPE_MAX_SUPPORTED];
/**< basic elements count used to compute the size of the
datatype for remote nodes. The length of the array is dependent on
the maximum number of datatypes of all top layers.
Reason being is that Fortran is not at the OPAL layer. */
size_t *ptypes; /**< array of basic predefined types that facilitate the computing
of the remote size in heterogeneous environments. The length of the
array is dependent on the maximum number of predefined datatypes of
all language interfaces (because Fortran is not known at the OPAL
layer). This field should never be initialized in homogeneous
environments */
/* --- cacheline 5 boundary (320 bytes) was 32-36 bytes ago --- */
/* size: 352, cachelines: 6, members: 15 */
@ -281,6 +284,8 @@ OPAL_DECLSPEC int32_t
opal_datatype_copy_content_same_ddt( const opal_datatype_t* pData, int32_t count,
char* pDestBuf, char* pSrcBuf );
OPAL_DECLSPEC int opal_datatype_compute_ptypes( opal_datatype_t* datatype );
OPAL_DECLSPEC const opal_datatype_t*
opal_datatype_match_size( int size, uint16_t datakind, uint16_t datalang );

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2016 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -279,7 +279,8 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
* predefined non contiguous datatypes (like MPI_SHORT_INT).
*/
if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) {
pdtBase->btypes[pdtAdd->id] += count;
if( NULL != pdtBase->ptypes )
pdtBase->ptypes[pdtAdd->id] += count;
pLast->elem.common.type = pdtAdd->id;
pLast->elem.count = count;
pLast->elem.disp = disp;
@ -291,13 +292,13 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
}
} else {
/* keep trace of the total number of basic datatypes in the datatype definition */
pdtBase->btypes[OPAL_DATATYPE_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_LOOP];
pdtBase->btypes[OPAL_DATATYPE_END_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_END_LOOP];
pdtBase->btypes[OPAL_DATATYPE_LB] |= pdtAdd->btypes[OPAL_DATATYPE_LB];
pdtBase->btypes[OPAL_DATATYPE_UB] |= pdtAdd->btypes[OPAL_DATATYPE_UB];
for( i = 4; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
pdtBase->loops += pdtAdd->loops;
pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_LB);
pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_UB);
if( (NULL != pdtBase->ptypes) && (NULL != pdtAdd->ptypes) ) {
for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]);
}
if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) &&
(extent == pdtAdd->desc.desc[0].elem.extent) ){
pLast->elem = pdtAdd->desc.desc[0].elem;
@ -312,7 +313,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
pLoop = pLast;
CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent,
(pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED)) );
pdtBase->btypes[OPAL_DATATYPE_LOOP] += 2;
pdtBase->loops += 2;
pdtBase->desc.used += 2;
pLast++;
}

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -61,6 +61,9 @@ int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t *
dest_type->opt_desc.used = src_type->opt_desc.used;
memcpy( dest_type->opt_desc.desc, src_type->opt_desc.desc, desc_length * sizeof(dt_elem_desc_t) );
}
} else {
assert( NULL == dest_type->opt_desc.desc );
assert( 0 == dest_type->opt_desc.length );
}
}
dest_type->id = src_type->id; /* preserve the default id. This allow us to

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2012 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
@ -179,7 +179,7 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i
return 0; /* completed */
}
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 1) );
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 1) );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2013 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -30,8 +30,6 @@
static void opal_datatype_construct( opal_datatype_t* pData )
{
int i;
pData->size = 0;
pData->flags = OPAL_DATATYPE_FLAG_CONTIGUOUS;
pData->id = 0;
@ -53,32 +51,36 @@ static void opal_datatype_construct( opal_datatype_t* pData )
pData->opt_desc.length = 0;
pData->opt_desc.used = 0;
for( i = 0; i < OPAL_DATATYPE_MAX_SUPPORTED; i++ )
pData->btypes[i] = 0;
pData->ptypes = NULL;
pData->loops = 0;
}
static void opal_datatype_destruct( opal_datatype_t* datatype )
{
if (!opal_datatype_is_predefined(datatype)) {
if( datatype->desc.desc != NULL ) {
free( datatype->desc.desc );
datatype->desc.length = 0;
datatype->desc.used = 0;
}
}
if( datatype->opt_desc.desc != NULL ) {
/**
* As the default description and the optimized description might point to the
* same data description we should start by cleaning the optimized description.
*/
if( NULL != datatype->opt_desc.desc ) {
if( datatype->opt_desc.desc != datatype->desc.desc )
free( datatype->opt_desc.desc );
datatype->opt_desc.length = 0;
datatype->opt_desc.used = 0;
datatype->opt_desc.desc = NULL;
}
/**
* As the default description and the optimized description can point to the
* same memory location we should keep the default location pointer until we
* know what we should do with the optimized description.
*/
datatype->desc.desc = NULL;
if (!opal_datatype_is_predefined(datatype)) {
if( NULL != datatype->desc.desc ) {
free( datatype->desc.desc );
datatype->desc.length = 0;
datatype->desc.used = 0;
datatype->desc.desc = NULL;
}
}
/* dont free the ptypes of predefined types (it was not dynamically allocated) */
if( (NULL != datatype->ptypes) && (datatype->id >= OPAL_DATATYPE_MAX_PREDEFINED) ) {
free(datatype->ptypes);
datatype->ptypes = NULL;
}
/* make sure the name is set to empty */
datatype->name[0] = '\0';

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -42,8 +42,14 @@ int opal_datatype_contain_basic_datatypes( const opal_datatype_t* pData, char* p
if( pData->flags & OPAL_DATATYPE_FLAG_USER_LB ) index += snprintf( ptr, length - index, "lb " );
if( pData->flags & OPAL_DATATYPE_FLAG_USER_UB ) index += snprintf( ptr + index, length - index, "ub " );
for( i = 0; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) {
if( pData->bdt_used & mask )
index += snprintf( ptr + index, length - index, "%s ", opal_datatype_basicDatatypes[i]->name );
if( pData->bdt_used & mask ) {
if( NULL == pData->ptypes ) {
index += snprintf( ptr + index, length - index, "%s:* ", opal_datatype_basicDatatypes[i]->name );
} else {
index += snprintf( ptr + index, length - index, "%s:%lu ", opal_datatype_basicDatatypes[i]->name,
pData->ptypes[i]);
}
}
mask <<= 1;
if( length <= (size_t)index ) break;
}
@ -115,7 +121,7 @@ void opal_datatype_dump( const opal_datatype_t* pData )
(void*)pData, pData->name, (long)pData->size, (int)pData->align, pData->id, (int)pData->desc.length, (int)pData->desc.used,
(long)pData->true_lb, (long)pData->true_ub, (long)(pData->true_ub - pData->true_lb),
(long)pData->lb, (long)pData->ub, (long)(pData->ub - pData->lb),
(int)pData->nbElems, (int)pData->btypes[OPAL_DATATYPE_LOOP], (int)pData->flags );
(int)pData->nbElems, (int)pData->loops, (int)pData->flags );
/* dump the flags */
if( pData->flags == OPAL_DATATYPE_FLAG_PREDEFINED )
index += snprintf( buffer + index, length - index, "predefined " );

Просмотреть файл

@ -3,10 +3,10 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
* Copyright (c) 2004-2017 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
@ -34,21 +34,8 @@
#include "opal/datatype/opal_datatype_internal.h"
int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
size_t starting_point,
const size_t* sizes );
static inline size_t
opal_convertor_compute_remote_size( const opal_datatype_t* pData, const size_t* sizes )
{
uint32_t i;
size_t length = 0;
for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) {
length += (pData->btypes[i] * sizes[i]);
}
return length;
}
extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor,
size_t starting_point, const size_t* sizes );
int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
size_t starting_point, const size_t* sizes )
@ -104,7 +91,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
}
/* remove from the main loop all the complete datatypes */
remote_size = opal_convertor_compute_remote_size( pData, sizes );
remote_size = opal_convertor_compute_remote_size( pConvertor );
count = (int32_t)(starting_point / remote_size);
resting_place -= (remote_size * count);
pStack->count = pConvertor->count - count;
@ -114,7 +101,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].elem.disp;
pos_desc = 0;
remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->btypes[OPAL_DATATYPE_LOOP] + 1));
remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->loops + 1));
remoteLength[0] = 0; /* initial value set to ZERO */
loop_length = 0;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
@ -39,9 +39,9 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t
/* Normally the size should be less or equal to the size of the datatype.
* This function does not support a iSize bigger than the size of the datatype.
*/
assert( (uint32_t)iSize <= datatype->size );
DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize );
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) );
assert( iSize <= datatype->size );
DUMP( "dt_count_elements( %p, %ul )\n", (void*)datatype, (unsigned long)iSize );
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) );
pStack->count = 1;
pStack->index = -1;
pStack->disp = 0;
@ -53,8 +53,10 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t
if( --(pStack->count) == 0 ) { /* end of loop */
stack_pos--; pStack--;
if( stack_pos == -1 ) return nbElems; /* completed */
pos_desc++; /* advance to the next element after the end loop */
} else {
pos_desc = pStack->index + 1; /* go back to the begining of the loop */
}
pos_desc = pStack->index + 1;
continue;
}
if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) {
@ -93,9 +95,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
/**
* Handle all complete multiple of the datatype.
*/
for( pos_desc = 4; pos_desc < OPAL_DATATYPE_MAX_PREDEFINED; pos_desc++ ) {
local_length += datatype->btypes[pos_desc];
}
local_length = datatype->nbElems;
pos_desc = count / local_length;
count = count % local_length;
*length = datatype->size * pos_desc;
@ -104,7 +104,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
}
DUMP( "dt_set_element_count( %p, %d )\n", (void*)datatype, count );
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) );
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) );
pStack->count = 1;
pStack->index = -1;
pStack->disp = 0;
@ -116,8 +116,10 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
if( --(pStack->count) == 0 ) { /* end of loop */
stack_pos--; pStack--;
if( stack_pos == -1 ) return 0;
pos_desc++; /* advance to the next element after the end loop */
} else {
pos_desc = pStack->index + 1; /* go back to the begining of the loop */
}
pos_desc = pStack->index + 1;
continue;
}
if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) {
@ -143,3 +145,56 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
}
}
/**
* Compute the array of counts of the predefined datatypes contained in
* the datatype. We have no simple way to create this array, as we only
* sporadically need it (when we deal with heterogeneous environments or
* when we use get_element_count). Thus, we will pay the cost once per
* datatype, but we will only update this array if/when needed.
*/
int opal_datatype_compute_ptypes( opal_datatype_t* datatype )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
ssize_t nbElems = 0, stack_pos = 0;
dt_elem_desc_t* pElems;
if( NULL != datatype->ptypes ) return 0;
datatype->ptypes = (size_t*)calloc(OPAL_DATATYPE_MAX_SUPPORTED, sizeof(size_t));
DUMP( "opal_datatype_compute_ptypes( %p )\n", (void*)datatype );
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) );
pStack->count = 1;
pStack->index = -1;
pStack->disp = 0;
pElems = datatype->desc.desc;
pos_desc = 0;
while( 1 ) { /* loop forever the exit condition is on the last OPAL_DATATYPE_END_LOOP */
if( OPAL_DATATYPE_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
stack_pos--; pStack--;
if( stack_pos == -1 ) return 0; /* completed */
pos_desc++; /* advance to the next element after the end loop */
} else {
pos_desc = pStack->index + 1; /* go back to the begining of the loop */
}
continue;
}
if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) {
ddt_loop_desc_t* loop = &(pElems[pos_desc].loop);
do {
PUSH_STACK( pStack, stack_pos, pos_desc, OPAL_DATATYPE_LOOP, loop->loops, 0 );
pos_desc++;
} while( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
}
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
/* now here we have a basic datatype */
datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count;
nbElems += pElems[pos_desc].elem.count;
pos_desc++; /* advance to the next data */
}
}
}

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2012 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -155,10 +155,10 @@ typedef struct ddt_elem_id_description ddt_elem_id_description;
*/
struct ddt_elem_desc {
ddt_elem_id_description common; /**< basic data description and flags */
uint32_t count; /**< number of blocks */
uint32_t blocklen; /**< number of elements on each block */
ptrdiff_t extent; /**< extent of each block (in bytes) */
ptrdiff_t disp; /**< displacement of the first block */
size_t count; /**< number of blocks */
ptrdiff_t extent; /**< extent of each block (in bytes) */
ptrdiff_t disp; /**< displacement of the first block */
};
typedef struct ddt_elem_desc ddt_elem_desc_t;
@ -172,10 +172,10 @@ typedef struct ddt_elem_desc ddt_elem_desc_t;
*/
struct ddt_loop_desc {
ddt_elem_id_description common; /**< basic data description and flags */
uint32_t loops; /**< number of elements */
uint32_t items; /**< number of items in the loop */
uint32_t loops; /**< number of elements */
size_t unused; /**< not used right now */
ptrdiff_t extent; /**< extent of the whole loop */
ptrdiff_t extent; /**< extent of the whole loop */
};
typedef struct ddt_loop_desc ddt_loop_desc_t;
@ -184,7 +184,7 @@ struct ddt_endloop_desc {
uint32_t items; /**< number of elements */
uint32_t unused; /**< not used right now */
size_t size; /**< real size of the data in the loop */
ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */
ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */
};
typedef struct ddt_endloop_desc ddt_endloop_desc_t;
@ -214,13 +214,20 @@ union dt_elem_desc {
(_place)->end_loop.unused = -1; \
} while(0)
/**
* Create one or more elements depending on the value of _count. If the value
* is too large for the type of elem.count then use oth the elem.count and
* elem.blocklen to create it. If the number is prime then create a second
* element to account for the difference.
*/
#define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \
do { \
(_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \
(_place)->elem.common.type = (_type); \
(_place)->elem.count = (_count); \
(_place)->elem.disp = (_disp); \
(_place)->elem.extent = (_extent); \
(_place)->elem.count = (_count); \
(_place)->elem.blocklen = 1; \
} while(0)
/*
@ -238,8 +245,8 @@ struct opal_datatype_t;
* OPAL_DATATYPE_INIT_BTYPES_ARRAY_[0-21], then order and naming would _not_ matter....
*/
#define OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE { 0 }
#define OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) { [OPAL_DATATYPE_ ## NAME] = 1 }
#define OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE NULL
#define OPAL_DATATYPE_INIT_PTYPES_ARRAY(NAME) (size_t[OPAL_DATATYPE_MAX_PREDEFINED]){ [OPAL_DATATYPE_ ## NAME] = 1, [OPAL_DATATYPE_MAX_PREDEFINED-1] = 0 }
#define OPAL_DATATYPE_INIT_NAME(NAME) "OPAL_" #NAME
@ -268,7 +275,7 @@ struct opal_datatype_t;
.name = OPAL_DATATYPE_INIT_NAME(NAME), \
.desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \
.opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
}
#define OPAL_DATATYPE_INITIALIZER_UNAVAILABLE( FLAGS ) \
@ -287,7 +294,7 @@ struct opal_datatype_t;
.name = OPAL_DATATYPE_INIT_NAME(EMPTY), \
.desc = OPAL_DATATYPE_INIT_DESC_NULL, \
.opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
}
#define OPAL_DATATYPE_INIT_BASIC_TYPE( TYPE, NAME, FLAGS ) \
@ -303,7 +310,7 @@ struct opal_datatype_t;
.name = OPAL_DATATYPE_INIT_NAME(NAME), \
.desc = OPAL_DATATYPE_INIT_DESC_NULL, \
.opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
}
#define OPAL_DATATYPE_INIT_BASIC_DATATYPE( TYPE, ALIGN, NAME, FLAGS ) \
@ -319,7 +326,7 @@ struct opal_datatype_t;
.name = OPAL_DATATYPE_INIT_NAME(NAME), \
.desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \
.opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
}
#define OPAL_DATATYPE_INITIALIZER_LOOP(FLAGS) OPAL_DATATYPE_INIT_BASIC_TYPE( OPAL_DATATYPE_LOOP, LOOP, FLAGS )
@ -476,7 +483,10 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem )
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \
do { \
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
(COUNTER) = (ELEMENT)->elem.count; \
if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \
(COUNTER) = (ELEMENT)->loop.loops; \
else \
(COUNTER) = (ELEMENT)->elem.count; \
} while (0)
OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length );

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -42,21 +42,22 @@
static int32_t
opal_datatype_optimize_short( opal_datatype_t* pData,
int32_t count,
dt_type_desc_t* pTypeDesc )
int32_t count,
dt_type_desc_t* pTypeDesc )
{
dt_elem_desc_t* pElemDesc;
ddt_elem_desc_t opt_elem;
dt_stack_t* pOrigStack;
dt_stack_t* pStack; /* pointer to the position on the stack */
int32_t pos_desc = 0; /* actual position in the description of the derived datatype */
int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0;
int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1;
int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity;
ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0;
uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */
uint32_t i;
size_t last_length = 0;
pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) );
pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) );
SAVE_STACK( pStack, -1, 0, count, 0 );
pTypeDesc->length = 2 * pData->desc.used + 1 /* for the fake OPAL_DATATYPE_END_LOOP at the end */;
@ -85,7 +86,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
pElemDesc++; nbElems++;
if( --stack_pos >= 0 ) { /* still something to do ? */
ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop);
pStartLoop->items = (pElemDesc - 1)->elem.count;
pStartLoop->items = end_loop->items;
total_disp = pStack->disp; /* update the displacement position */
}
pStack--; /* go down one position on the stack */
@ -98,8 +99,8 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp;
continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
== (total_disp + loop_disp));
continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
== (total_disp + loop_disp));
if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
/* the loop is contiguous or composed by contiguous elements with a gap */
if( loop->extent == (ptrdiff_t)end_loop->size ) {
@ -206,7 +207,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
/* now here we have a basic datatype */
type = pData->desc.desc[pos_desc].elem.common.type;
continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
== (total_disp + pData->desc.desc[pos_desc].elem.disp));
if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity &&

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2014 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -502,6 +502,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor,
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
pos_desc++; /* advance to the next data */
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
if( 0 == iov_len_local ) goto complete_loop; /* escape if we're done */
continue;
}
conv_ptr += rc * description[pos_desc].elem.extent;

Просмотреть файл

@ -18,7 +18,7 @@ if PROJECT_OMPI
MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw unpack_ooo ddt_pack external32
MPI_CHECKS = to_self
endif
TESTS = opal_datatype_test $(MPI_TESTS)
TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS)
check_PROGRAMS = $(TESTS) $(MPI_CHECKS)
@ -79,5 +79,10 @@ external32_LDADD = \
$(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
unpack_hetero_SOURCES = unpack_hetero.c
unpack_hetero_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS)
unpack_hetero_LDADD = \
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
distclean:
rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2007 The University of Tennessee and The University
* Copyright (c) 2004-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
@ -23,7 +23,7 @@
/**
* The purpose of this test is to simulate the multi-network packing and
* unpacking process. The pack operation will happens in-order while the
* will be done randomly. Therefore, before each unpack the correct
* unpack will be done randomly. Therefore, before each unpack the correct
* position in the user buffer has to be set.
*/

99
test/datatype/unpack_hetero.c Обычный файл
Просмотреть файл

@ -0,0 +1,99 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/runtime/opal.h"
#include "opal/datatype/opal_datatype.h"
#include "opal/datatype/opal_datatype_internal.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/datatype/opal_datatype_prototypes.h"
#include "opal/util/arch.h"
#include <time.h>
#include <stdlib.h>
#ifdef HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include <stdio.h>
#include <string.h>
/* Compile with:
gcc -DHAVE_CONFIG_H -I. -I../../include -I../.. -I../../include -I../../../ompi-trunk/opal -I../../../ompi-trunk/orte -g opal_datatype_test.c -o opal_datatype_test
*/
uint32_t remote_arch = 0xffffffff;
/**
* Main function. Call several tests and print-out the results. It try to stress the convertor
* using difficult data-type constructions as well as strange segment sizes for the conversion.
* Usually, it is able to detect most of the data-type and convertor problems. Any modifications
* on the data-type engine should first pass all the tests from this file, before going into other
* tests.
*/
int main( int argc, char* argv[] )
{
opal_datatype_init();
/**
* By default simulate homogeneous architectures.
*/
remote_arch = opal_local_arch ^ OPAL_ARCH_ISBIGENDIAN;
opal_convertor_t * pConv;
int sbuf[2], rbuf[2];
size_t max_data;
struct iovec a;
uint32_t iov_count;
sbuf[0] = 0x01000000; sbuf[1] = 0x02000000;
printf( "\n\n#\n * TEST UNPACKING 1 int out of 1\n#\n\n" );
pConv = opal_convertor_create( remote_arch, 0 );
rbuf[0] = -1; rbuf[1] = -1;
if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 1, rbuf ) ) {
printf( "Cannot attach the datatype to a convertor\n" );
return OPAL_ERROR;
}
a.iov_base = sbuf;
a.iov_len = 4;
iov_count = 1;
max_data = 4;
opal_unpack_general( pConv, &a, &iov_count, &max_data );
assert(1 == rbuf[0]);
assert(-1 == rbuf[1]);
OBJ_RELEASE(pConv);
printf( "\n\n#\n * TEST UNPACKING 1 int out of 2\n#\n\n" );
pConv = opal_convertor_create( remote_arch, 0 );
rbuf[0] = -1; rbuf[1] = -1;
if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 2, rbuf ) ) {
printf( "Cannot attach the datatype to a convertor\n" );
return OPAL_ERROR;
}
a.iov_base = sbuf;
a.iov_len = 4;
iov_count = 1;
max_data = 4;
opal_unpack_general( pConv, &a, &iov_count, &max_data );
assert(1 == rbuf[0]);
assert(-1 == rbuf[1]);
OBJ_RELEASE(pConv);
/* clean-ups all data allocations */
opal_datatype_finalize();
opal_finalize();
return OPAL_SUCCESS;
}