Topic/datatype (#3441)
* Don't overflow the internal datatype count. Change the type of the count to be a size_t (it does not alter the total size of the internal structures, so has no impact on the ABI). Signed-off-by: George Bosilca <bosilca@icl.utk.edu> * Optimize the datatype creation. The internal array of counts of predefined types is now only created when needed, which is either in a heterogeneous environment, or when one call get_elements. It saves space and makes the convertor creation a little faster in some cases. Rearrange the fields in the datatype description structs. The macro OPAL_DATATYPE_INIT_PTYPES_ARRAY had a bug, and the static array was only partially created. All predefined types should have the ptypes array created and initialized. Signed-off-by: George Bosilca <bosilca@icl.utk.edu> * Fix the boundary computation. Signed-off-by: George Bosilca <bosilca@icl.utk.edu> * test/datatype: add test for short unpack on heteregeneous cluster Signed-off-by: Gilles Gouaillardet <gilles@rist.or.jp> Signed-off-by: George Bosilca <bosilca@icl.utk.edu> * Trying to reduce the cost of creating a convertor. Signed-off-by: George Bosilca <bosilca@icl.utk.edu> * Respect the unpack boundaries. As Gilles suggested on #2535 the opal_unpack_general_function was unpacking based on the requested count and not on the amount of packed data provided. Fixes #2535. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
a66909b8b4
Коммит
cbf03b3113
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
|
||||
@ -25,6 +25,7 @@
|
||||
|
||||
#include "ompi/runtime/params.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "opal/datatype/opal_datatype_internal.h"
|
||||
|
||||
int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t *count)
|
||||
{
|
||||
@ -48,9 +49,10 @@ int ompi_datatype_get_elements (ompi_datatype_t *datatype, size_t ucount, size_t
|
||||
there are no leftover bytes */
|
||||
if (!ompi_datatype_is_predefined(datatype)) {
|
||||
if (0 != internal_count) {
|
||||
opal_datatype_compute_ptypes(&datatype->super);
|
||||
/* count the basic elements in the datatype */
|
||||
for (i = 4, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) {
|
||||
total += datatype->super.btypes[i];
|
||||
for (i = OPAL_DATATYPE_FIRST_TYPE, total = 0 ; i < OPAL_DATATYPE_MAX_PREDEFINED ; ++i) {
|
||||
total += datatype->super.ptypes[i];
|
||||
}
|
||||
internal_count = total * internal_count;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2009-2013 The University of Tennessee and The University
|
||||
* Copyright (c) 2009-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
@ -467,7 +467,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
|
||||
.name = OPAL_DATATYPE_INIT_NAME(TYPE ## SIZE), \
|
||||
.desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \
|
||||
.opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(TYPE ## SIZE), \
|
||||
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(TYPE ## SIZE) \
|
||||
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY(TYPE ## SIZE) \
|
||||
}
|
||||
|
||||
#define OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE_FORTRAN( TYPE, NAME, SIZE, ALIGN, FLAGS ) \
|
||||
|
@ -384,8 +384,9 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}};
|
||||
(PDST)->super.opt_desc = (PSRC)->super.opt_desc; \
|
||||
(PDST)->packed_description = (PSRC)->packed_description; \
|
||||
(PSRC)->packed_description = NULL; \
|
||||
memcpy( (PDST)->super.btypes, (PSRC)->super.btypes, \
|
||||
OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t) ); \
|
||||
/* transfer the ptypes */ \
|
||||
(PDST)->super.ptypes = (PSRC)->super.ptypes; \
|
||||
(PSRC)->super.ptypes = NULL; \
|
||||
} while(0)
|
||||
|
||||
#define DECLARE_MPI2_COMPOSED_STRUCT_DDT( PDATA, MPIDDT, MPIDDTNAME, type1, type2, MPIType1, MPIType2, FLAGS) \
|
||||
@ -393,20 +394,20 @@ opal_pointer_array_t ompi_datatype_f_to_c_table = {{0}};
|
||||
struct { type1 v1; type2 v2; } s[2]; \
|
||||
ompi_datatype_t *types[2], *ptype; \
|
||||
int bLength[2] = {1, 1}; \
|
||||
ptrdiff_t base, displ[2]; \
|
||||
ptrdiff_t base, displ[2]; \
|
||||
\
|
||||
types[0] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType1]; \
|
||||
types[1] = (ompi_datatype_t*)ompi_datatype_basicDatatypes[MPIType2]; \
|
||||
base = (ptrdiff_t)(&(s[0])); \
|
||||
displ[0] = (ptrdiff_t)(&(s[0].v1)); \
|
||||
base = (ptrdiff_t)(&(s[0])); \
|
||||
displ[0] = (ptrdiff_t)(&(s[0].v1)); \
|
||||
displ[0] -= base; \
|
||||
displ[1] = (ptrdiff_t)(&(s[0].v2)); \
|
||||
displ[1] = (ptrdiff_t)(&(s[0].v2)); \
|
||||
displ[1] -= base; \
|
||||
\
|
||||
ompi_datatype_create_struct( 2, bLength, displ, types, &ptype ); \
|
||||
displ[0] = (ptrdiff_t)(&(s[1])); \
|
||||
displ[0] = (ptrdiff_t)(&(s[1])); \
|
||||
displ[0] -= base; \
|
||||
if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \
|
||||
if( displ[0] != (displ[1] + (ptrdiff_t)sizeof(type2)) ) \
|
||||
ptype->super.ub = displ[0]; /* force a new extent for the datatype */ \
|
||||
ptype->super.flags |= (FLAGS); \
|
||||
ptype->id = MPIDDT; \
|
||||
@ -736,7 +737,7 @@ void ompi_datatype_dump( const ompi_datatype_t* pData )
|
||||
(long)pData->super.size, (int)pData->super.align, pData->super.id, (int)pData->super.desc.length, (int)pData->super.desc.used,
|
||||
(long)pData->super.true_lb, (long)pData->super.true_ub, (long)(pData->super.true_ub - pData->super.true_lb),
|
||||
(long)pData->super.lb, (long)pData->super.ub, (long)(pData->super.ub - pData->super.lb),
|
||||
(int)pData->super.nbElems, (int)pData->super.btypes[OPAL_DATATYPE_LOOP], (int)pData->super.flags );
|
||||
(int)pData->super.nbElems, (int)pData->super.loops, (int)pData->super.flags );
|
||||
/* dump the flags */
|
||||
if( ompi_datatype_is_predefined(pData) ) {
|
||||
index += snprintf( buffer + index, length - index, "predefined " );
|
||||
|
@ -366,7 +366,8 @@ static inline int memchecker_datatype(MPI_Datatype type)
|
||||
opal_memchecker_base_isdefined (&type->super.opt_desc.length, sizeof(opal_datatype_count_t));
|
||||
opal_memchecker_base_isdefined (&type->super.opt_desc.used, sizeof(opal_datatype_count_t));
|
||||
opal_memchecker_base_isdefined (&type->super.opt_desc.desc, sizeof(dt_elem_desc_t *));
|
||||
opal_memchecker_base_isdefined (&type->super.btypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(uint32_t));
|
||||
if( NULL != type->super.ptypes )
|
||||
opal_memchecker_base_isdefined (&type->super.ptypes, OPAL_DATATYPE_MAX_PREDEFINED * sizeof(size_t));
|
||||
|
||||
opal_memchecker_base_isdefined (&type->id, sizeof(int32_t));
|
||||
opal_memchecker_base_isdefined (&type->d_f_to_c_index, sizeof(int32_t));
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -43,9 +43,6 @@
|
||||
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
|
||||
#endif
|
||||
|
||||
extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor,
|
||||
int starting_point, const int* sizes );
|
||||
|
||||
static void opal_convertor_construct( opal_convertor_t* convertor )
|
||||
{
|
||||
convertor->pStack = convertor->static_stack;
|
||||
@ -226,7 +223,7 @@ int32_t opal_convertor_pack( opal_convertor_t* pConv,
|
||||
if( OPAL_LIKELY(pConv->flags & CONVERTOR_NO_OP) ) {
|
||||
/**
|
||||
* We are doing conversion on a contiguous datatype on a homogeneous
|
||||
* environment. The convertor contain minimal informations, we only
|
||||
* environment. The convertor contain minimal information, we only
|
||||
* use the bConverted to manage the conversion.
|
||||
*/
|
||||
uint32_t i;
|
||||
@ -447,31 +444,49 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
return rc;
|
||||
}
|
||||
|
||||
static size_t
|
||||
opal_datatype_compute_remote_size( const opal_datatype_t* pData,
|
||||
const size_t* sizes )
|
||||
{
|
||||
uint32_t typeMask = pData->bdt_used;
|
||||
size_t length = 0;
|
||||
|
||||
if( OPAL_UNLIKELY(NULL == pData->ptypes) ) {
|
||||
/* Allocate and fill the array of types used in the datatype description */
|
||||
opal_datatype_compute_ptypes( (opal_datatype_t*)pData );
|
||||
}
|
||||
|
||||
for( int i = OPAL_DATATYPE_FIRST_TYPE; typeMask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) {
|
||||
if( typeMask & ((uint32_t)1 << i) ) {
|
||||
length += (pData->ptypes[i] * sizes[i]);
|
||||
typeMask ^= ((uint32_t)1 << i);
|
||||
}
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the remote size. If necessary remove the homogeneous flag
|
||||
* and redirect the convertor description toward the non-optimized
|
||||
* datatype representation.
|
||||
*/
|
||||
#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
|
||||
{ \
|
||||
if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
|
||||
opal_convertor_master_t* master; \
|
||||
int i; \
|
||||
uint32_t mask = datatype->bdt_used; \
|
||||
convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \
|
||||
master = convertor->master; \
|
||||
convertor->remote_size = 0; \
|
||||
for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
|
||||
if( mask & ((uint32_t)1 << i) ) { \
|
||||
convertor->remote_size += (datatype->btypes[i] * \
|
||||
master->remote_sizes[i]); \
|
||||
mask ^= ((uint32_t)1 << i); \
|
||||
} \
|
||||
} \
|
||||
convertor->remote_size *= convertor->count; \
|
||||
convertor->use_desc = &(datatype->desc); \
|
||||
} \
|
||||
size_t opal_convertor_compute_remote_size( opal_convertor_t* pConvertor )
|
||||
{
|
||||
opal_datatype_t* datatype = (opal_datatype_t*)pConvertor->pDesc;
|
||||
|
||||
pConvertor->remote_size = pConvertor->local_size;
|
||||
if( OPAL_UNLIKELY(datatype->bdt_used & pConvertor->master->hetero_mask) ) {
|
||||
pConvertor->flags &= (~CONVERTOR_HOMOGENEOUS);
|
||||
pConvertor->use_desc = &(datatype->desc);
|
||||
if( 0 == (pConvertor->flags & CONVERTOR_HAS_REMOTE_SIZE) ) {
|
||||
/* This is for a single datatype, we must update it with the count */
|
||||
pConvertor->remote_size = opal_datatype_compute_remote_size(datatype,
|
||||
pConvertor->master->remote_sizes);
|
||||
pConvertor->remote_size *= pConvertor->count;
|
||||
}
|
||||
}
|
||||
pConvertor->flags |= CONVERTOR_HAS_REMOTE_SIZE;
|
||||
return pConvertor->remote_size;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -483,29 +498,26 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
*/
|
||||
#define OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf ) \
|
||||
{ \
|
||||
uint32_t bdt_mask; \
|
||||
\
|
||||
convertor->local_size = count * datatype->size; \
|
||||
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
|
||||
convertor->count = count; \
|
||||
convertor->pDesc = (opal_datatype_t*)datatype; \
|
||||
convertor->bConverted = 0; \
|
||||
convertor->use_desc = &(datatype->opt_desc); \
|
||||
/* If the data is empty we just mark the convertor as \
|
||||
* completed. With this flag set the pack and unpack functions \
|
||||
* will not do anything. \
|
||||
*/ \
|
||||
if( OPAL_UNLIKELY((0 == count) || (0 == datatype->size)) ) { \
|
||||
convertor->flags |= OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED; \
|
||||
convertor->flags |= (OPAL_DATATYPE_FLAG_NO_GAPS | CONVERTOR_COMPLETED | CONVERTOR_HAS_REMOTE_SIZE); \
|
||||
convertor->local_size = convertor->remote_size = 0; \
|
||||
return OPAL_SUCCESS; \
|
||||
} \
|
||||
/* Compute the local in advance */ \
|
||||
convertor->local_size = count * datatype->size; \
|
||||
convertor->pBaseBuf = (unsigned char*)pUserBuf; \
|
||||
convertor->count = count; \
|
||||
\
|
||||
/* Grab the datatype part of the flags */ \
|
||||
convertor->flags &= CONVERTOR_TYPE_MASK; \
|
||||
convertor->flags |= (CONVERTOR_DATATYPE_MASK & datatype->flags); \
|
||||
convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
|
||||
convertor->pDesc = (opal_datatype_t*)datatype; \
|
||||
convertor->bConverted = 0; \
|
||||
convertor->use_desc = &(datatype->opt_desc); \
|
||||
\
|
||||
convertor->remote_size = convertor->local_size; \
|
||||
if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \
|
||||
@ -516,9 +528,8 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
} \
|
||||
} \
|
||||
\
|
||||
bdt_mask = datatype->bdt_used & convertor->master->hetero_mask; \
|
||||
OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE( convertor, datatype, \
|
||||
bdt_mask ); \
|
||||
assert( (convertor)->pDesc == (datatype) ); \
|
||||
opal_convertor_compute_remote_size( convertor ); \
|
||||
assert( NULL != convertor->use_desc->desc ); \
|
||||
/* For predefined datatypes (contiguous) do nothing more */ \
|
||||
/* if checksum is enabled then always continue */ \
|
||||
@ -530,7 +541,7 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
} \
|
||||
convertor->flags &= ~CONVERTOR_NO_OP; \
|
||||
{ \
|
||||
uint32_t required_stack_length = datatype->btypes[OPAL_DATATYPE_LOOP] + 1; \
|
||||
uint32_t required_stack_length = datatype->loops + 1; \
|
||||
\
|
||||
if( required_stack_length > convertor->stack_size ) { \
|
||||
assert(convertor->pStack == convertor->static_stack); \
|
||||
@ -714,8 +725,8 @@ void opal_datatype_dump_stack( const dt_stack_t* pStack, int stack_pos,
|
||||
opal_output( 0, "%d: pos %d count %d disp %ld ", stack_pos, pStack[stack_pos].index,
|
||||
(int)pStack[stack_pos].count, (long)pStack[stack_pos].disp );
|
||||
if( pStack->index != -1 )
|
||||
opal_output( 0, "\t[desc count %d disp %ld extent %ld]\n",
|
||||
pDesc[pStack[stack_pos].index].elem.count,
|
||||
opal_output( 0, "\t[desc count %lu disp %ld extent %ld]\n",
|
||||
(unsigned long)pDesc[pStack[stack_pos].index].elem.count,
|
||||
(long)pDesc[pStack[stack_pos].index].elem.disp,
|
||||
(long)pDesc[pStack[stack_pos].index].elem.extent );
|
||||
else
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -54,6 +54,7 @@ BEGIN_C_DECLS
|
||||
#define CONVERTOR_STATE_ALLOC 0x04000000
|
||||
#define CONVERTOR_COMPLETED 0x08000000
|
||||
#define CONVERTOR_CUDA_UNIFIED 0x10000000
|
||||
#define CONVERTOR_HAS_REMOTE_SIZE 0x20000000
|
||||
|
||||
union dt_elem_desc;
|
||||
typedef struct opal_convertor_t opal_convertor_t;
|
||||
@ -72,7 +73,7 @@ struct dt_stack_t {
|
||||
int32_t index; /**< index in the element description */
|
||||
int16_t type; /**< the type used for the last pack/unpack (original or OPAL_DATATYPE_UINT1) */
|
||||
size_t count; /**< number of times we still have to do it */
|
||||
ptrdiff_t disp; /**< actual displacement depending on the count field */
|
||||
ptrdiff_t disp; /**< actual displacement depending on the count field */
|
||||
};
|
||||
typedef struct dt_stack_t dt_stack_t;
|
||||
|
||||
@ -186,9 +187,16 @@ static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConv
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the size of the remote datatype representation. The size will
|
||||
* depend on the configuration of the master convertor. In homogeneous
|
||||
* environments, the local and remote sizes are identical.
|
||||
*/
|
||||
size_t
|
||||
opal_convertor_compute_remote_size( opal_convertor_t* pConv );
|
||||
|
||||
/*
|
||||
*
|
||||
/**
|
||||
* Return the local size of the convertor (count times the size of the datatype).
|
||||
*/
|
||||
static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv,
|
||||
size_t* pSize )
|
||||
@ -197,16 +205,24 @@ static inline void opal_convertor_get_packed_size( const opal_convertor_t* pConv
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
/**
|
||||
* Return the remote size of the convertor (count times the remote size of the
|
||||
* datatype). On homogeneous environments the local and remote sizes are
|
||||
* identical.
|
||||
*/
|
||||
static inline void opal_convertor_get_unpacked_size( const opal_convertor_t* pConv,
|
||||
size_t* pSize )
|
||||
{
|
||||
if( pConv->flags & CONVERTOR_HOMOGENEOUS ) {
|
||||
*pSize = pConv->local_size;
|
||||
return;
|
||||
}
|
||||
if( 0 == (CONVERTOR_HAS_REMOTE_SIZE & pConv->flags) ) {
|
||||
opal_convertor_compute_remote_size( (opal_convertor_t*)pConv);
|
||||
}
|
||||
*pSize = pConv->remote_size;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the current absolute position of the next pack/unpack. This function is
|
||||
* mostly useful for contiguous datatypes, when we need to get the pointer to the
|
||||
@ -279,6 +295,7 @@ opal_convertor_raw( opal_convertor_t* convertor, /* [IN/OUT] */
|
||||
uint32_t* iov_count, /* [IN/OUT] */
|
||||
size_t* length ); /* [OUT] */
|
||||
|
||||
|
||||
/*
|
||||
* Upper level does not need to call the _nocheck function directly.
|
||||
*/
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2015 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -53,9 +53,10 @@ BEGIN_C_DECLS
|
||||
#endif
|
||||
/*
|
||||
* No more than this number of _Basic_ datatypes in C/CPP or Fortran
|
||||
* are supported (in order to not change setup and usage of btypes).
|
||||
* are supported (in order to not change setup and usage of the predefined
|
||||
* datatypes).
|
||||
*
|
||||
* XXX TODO Adapt to whatever the OMPI-layer needs
|
||||
* BEWARE: This constant should reflect whatever the OMPI-layer needs.
|
||||
*/
|
||||
#define OPAL_DATATYPE_MAX_SUPPORTED 47
|
||||
|
||||
@ -108,13 +109,14 @@ struct opal_datatype_t {
|
||||
uint32_t bdt_used; /**< bitset of which basic datatypes are used in the data description */
|
||||
size_t size; /**< total size in bytes of the memory used by the data if
|
||||
the data is put on a contiguous buffer */
|
||||
ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */
|
||||
ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */
|
||||
ptrdiff_t lb; /**< lower bound in memory */
|
||||
ptrdiff_t ub; /**< upper bound in memory */
|
||||
ptrdiff_t true_lb; /**< the true lb of the data without user defined lb and ub */
|
||||
ptrdiff_t true_ub; /**< the true ub of the data without user defined lb and ub */
|
||||
ptrdiff_t lb; /**< lower bound in memory */
|
||||
ptrdiff_t ub; /**< upper bound in memory */
|
||||
/* --- cacheline 1 boundary (64 bytes) --- */
|
||||
size_t nbElems; /**< total number of elements inside the datatype */
|
||||
uint32_t align; /**< data should be aligned to */
|
||||
uint32_t loops; /**< number of loops on the iternal type stack */
|
||||
|
||||
/* Attribute fields */
|
||||
char name[OPAL_MAX_OBJECT_NAME]; /**< name of the datatype */
|
||||
@ -123,11 +125,12 @@ struct opal_datatype_t {
|
||||
dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless
|
||||
or in the send case (without conversion) */
|
||||
|
||||
uint32_t btypes[OPAL_DATATYPE_MAX_SUPPORTED];
|
||||
/**< basic elements count used to compute the size of the
|
||||
datatype for remote nodes. The length of the array is dependent on
|
||||
the maximum number of datatypes of all top layers.
|
||||
Reason being is that Fortran is not at the OPAL layer. */
|
||||
size_t *ptypes; /**< array of basic predefined types that facilitate the computing
|
||||
of the remote size in heterogeneous environments. The length of the
|
||||
array is dependent on the maximum number of predefined datatypes of
|
||||
all language interfaces (because Fortran is not known at the OPAL
|
||||
layer). This field should never be initialized in homogeneous
|
||||
environments */
|
||||
/* --- cacheline 5 boundary (320 bytes) was 32-36 bytes ago --- */
|
||||
|
||||
/* size: 352, cachelines: 6, members: 15 */
|
||||
@ -281,6 +284,8 @@ OPAL_DECLSPEC int32_t
|
||||
opal_datatype_copy_content_same_ddt( const opal_datatype_t* pData, int32_t count,
|
||||
char* pDestBuf, char* pSrcBuf );
|
||||
|
||||
OPAL_DECLSPEC int opal_datatype_compute_ptypes( opal_datatype_t* datatype );
|
||||
|
||||
OPAL_DECLSPEC const opal_datatype_t*
|
||||
opal_datatype_match_size( int size, uint16_t datakind, uint16_t datalang );
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -279,7 +279,8 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
|
||||
* predefined non contiguous datatypes (like MPI_SHORT_INT).
|
||||
*/
|
||||
if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) {
|
||||
pdtBase->btypes[pdtAdd->id] += count;
|
||||
if( NULL != pdtBase->ptypes )
|
||||
pdtBase->ptypes[pdtAdd->id] += count;
|
||||
pLast->elem.common.type = pdtAdd->id;
|
||||
pLast->elem.count = count;
|
||||
pLast->elem.disp = disp;
|
||||
@ -291,13 +292,13 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
|
||||
}
|
||||
} else {
|
||||
/* keep trace of the total number of basic datatypes in the datatype definition */
|
||||
pdtBase->btypes[OPAL_DATATYPE_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_LOOP];
|
||||
pdtBase->btypes[OPAL_DATATYPE_END_LOOP] += pdtAdd->btypes[OPAL_DATATYPE_END_LOOP];
|
||||
pdtBase->btypes[OPAL_DATATYPE_LB] |= pdtAdd->btypes[OPAL_DATATYPE_LB];
|
||||
pdtBase->btypes[OPAL_DATATYPE_UB] |= pdtAdd->btypes[OPAL_DATATYPE_UB];
|
||||
for( i = 4; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
|
||||
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
|
||||
|
||||
pdtBase->loops += pdtAdd->loops;
|
||||
pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_LB);
|
||||
pdtBase->flags |= (pdtAdd->flags & OPAL_DATATYPE_FLAG_USER_UB);
|
||||
if( (NULL != pdtBase->ptypes) && (NULL != pdtAdd->ptypes) ) {
|
||||
for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
|
||||
if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]);
|
||||
}
|
||||
if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) &&
|
||||
(extent == pdtAdd->desc.desc[0].elem.extent) ){
|
||||
pLast->elem = pdtAdd->desc.desc[0].elem;
|
||||
@ -312,7 +313,7 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
|
||||
pLoop = pLast;
|
||||
CREATE_LOOP_START( pLast, count, pdtAdd->desc.used + 1, extent,
|
||||
(pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED)) );
|
||||
pdtBase->btypes[OPAL_DATATYPE_LOOP] += 2;
|
||||
pdtBase->loops += 2;
|
||||
pdtBase->desc.used += 2;
|
||||
pLast++;
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -61,6 +61,9 @@ int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t *
|
||||
dest_type->opt_desc.used = src_type->opt_desc.used;
|
||||
memcpy( dest_type->opt_desc.desc, src_type->opt_desc.desc, desc_length * sizeof(dt_elem_desc_t) );
|
||||
}
|
||||
} else {
|
||||
assert( NULL == dest_type->opt_desc.desc );
|
||||
assert( 0 == dest_type->opt_desc.length );
|
||||
}
|
||||
}
|
||||
dest_type->id = src_type->id; /* preserve the default id. This allow us to
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
@ -179,7 +179,7 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i
|
||||
return 0; /* completed */
|
||||
}
|
||||
|
||||
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 1) );
|
||||
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 1) );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2013 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -30,8 +30,6 @@
|
||||
|
||||
static void opal_datatype_construct( opal_datatype_t* pData )
|
||||
{
|
||||
int i;
|
||||
|
||||
pData->size = 0;
|
||||
pData->flags = OPAL_DATATYPE_FLAG_CONTIGUOUS;
|
||||
pData->id = 0;
|
||||
@ -53,32 +51,36 @@ static void opal_datatype_construct( opal_datatype_t* pData )
|
||||
pData->opt_desc.length = 0;
|
||||
pData->opt_desc.used = 0;
|
||||
|
||||
for( i = 0; i < OPAL_DATATYPE_MAX_SUPPORTED; i++ )
|
||||
pData->btypes[i] = 0;
|
||||
pData->ptypes = NULL;
|
||||
pData->loops = 0;
|
||||
}
|
||||
|
||||
static void opal_datatype_destruct( opal_datatype_t* datatype )
|
||||
{
|
||||
if (!opal_datatype_is_predefined(datatype)) {
|
||||
if( datatype->desc.desc != NULL ) {
|
||||
free( datatype->desc.desc );
|
||||
datatype->desc.length = 0;
|
||||
datatype->desc.used = 0;
|
||||
}
|
||||
}
|
||||
if( datatype->opt_desc.desc != NULL ) {
|
||||
/**
|
||||
* As the default description and the optimized description might point to the
|
||||
* same data description we should start by cleaning the optimized description.
|
||||
*/
|
||||
if( NULL != datatype->opt_desc.desc ) {
|
||||
if( datatype->opt_desc.desc != datatype->desc.desc )
|
||||
free( datatype->opt_desc.desc );
|
||||
datatype->opt_desc.length = 0;
|
||||
datatype->opt_desc.used = 0;
|
||||
datatype->opt_desc.desc = NULL;
|
||||
}
|
||||
/**
|
||||
* As the default description and the optimized description can point to the
|
||||
* same memory location we should keep the default location pointer until we
|
||||
* know what we should do with the optimized description.
|
||||
*/
|
||||
datatype->desc.desc = NULL;
|
||||
if (!opal_datatype_is_predefined(datatype)) {
|
||||
if( NULL != datatype->desc.desc ) {
|
||||
free( datatype->desc.desc );
|
||||
datatype->desc.length = 0;
|
||||
datatype->desc.used = 0;
|
||||
datatype->desc.desc = NULL;
|
||||
}
|
||||
}
|
||||
/* dont free the ptypes of predefined types (it was not dynamically allocated) */
|
||||
if( (NULL != datatype->ptypes) && (datatype->id >= OPAL_DATATYPE_MAX_PREDEFINED) ) {
|
||||
free(datatype->ptypes);
|
||||
datatype->ptypes = NULL;
|
||||
}
|
||||
|
||||
/* make sure the name is set to empty */
|
||||
datatype->name[0] = '\0';
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -42,8 +42,14 @@ int opal_datatype_contain_basic_datatypes( const opal_datatype_t* pData, char* p
|
||||
if( pData->flags & OPAL_DATATYPE_FLAG_USER_LB ) index += snprintf( ptr, length - index, "lb " );
|
||||
if( pData->flags & OPAL_DATATYPE_FLAG_USER_UB ) index += snprintf( ptr + index, length - index, "ub " );
|
||||
for( i = 0; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) {
|
||||
if( pData->bdt_used & mask )
|
||||
index += snprintf( ptr + index, length - index, "%s ", opal_datatype_basicDatatypes[i]->name );
|
||||
if( pData->bdt_used & mask ) {
|
||||
if( NULL == pData->ptypes ) {
|
||||
index += snprintf( ptr + index, length - index, "%s:* ", opal_datatype_basicDatatypes[i]->name );
|
||||
} else {
|
||||
index += snprintf( ptr + index, length - index, "%s:%lu ", opal_datatype_basicDatatypes[i]->name,
|
||||
pData->ptypes[i]);
|
||||
}
|
||||
}
|
||||
mask <<= 1;
|
||||
if( length <= (size_t)index ) break;
|
||||
}
|
||||
@ -115,7 +121,7 @@ void opal_datatype_dump( const opal_datatype_t* pData )
|
||||
(void*)pData, pData->name, (long)pData->size, (int)pData->align, pData->id, (int)pData->desc.length, (int)pData->desc.used,
|
||||
(long)pData->true_lb, (long)pData->true_ub, (long)(pData->true_ub - pData->true_lb),
|
||||
(long)pData->lb, (long)pData->ub, (long)(pData->ub - pData->lb),
|
||||
(int)pData->nbElems, (int)pData->btypes[OPAL_DATATYPE_LOOP], (int)pData->flags );
|
||||
(int)pData->nbElems, (int)pData->loops, (int)pData->flags );
|
||||
/* dump the flags */
|
||||
if( pData->flags == OPAL_DATATYPE_FLAG_PREDEFINED )
|
||||
index += snprintf( buffer + index, length - index, "predefined " );
|
||||
|
@ -3,10 +3,10 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
* Copyright (c) 2004-2017 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
@ -34,21 +34,8 @@
|
||||
#include "opal/datatype/opal_datatype_internal.h"
|
||||
|
||||
|
||||
int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
|
||||
size_t starting_point,
|
||||
const size_t* sizes );
|
||||
|
||||
static inline size_t
|
||||
opal_convertor_compute_remote_size( const opal_datatype_t* pData, const size_t* sizes )
|
||||
{
|
||||
uint32_t i;
|
||||
size_t length = 0;
|
||||
|
||||
for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ ) {
|
||||
length += (pData->btypes[i] * sizes[i]);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
extern int opal_convertor_create_stack_with_pos_general( opal_convertor_t* convertor,
|
||||
size_t starting_point, const size_t* sizes );
|
||||
|
||||
int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
|
||||
size_t starting_point, const size_t* sizes )
|
||||
@ -104,7 +91,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
|
||||
}
|
||||
|
||||
/* remove from the main loop all the complete datatypes */
|
||||
remote_size = opal_convertor_compute_remote_size( pData, sizes );
|
||||
remote_size = opal_convertor_compute_remote_size( pConvertor );
|
||||
count = (int32_t)(starting_point / remote_size);
|
||||
resting_place -= (remote_size * count);
|
||||
pStack->count = pConvertor->count - count;
|
||||
@ -114,7 +101,7 @@ int opal_convertor_create_stack_with_pos_general( opal_convertor_t* pConvertor,
|
||||
pStack->disp = count * (pData->ub - pData->lb) + pElems[loop_length].elem.disp;
|
||||
|
||||
pos_desc = 0;
|
||||
remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->btypes[OPAL_DATATYPE_LOOP] + 1));
|
||||
remoteLength = (size_t*)alloca( sizeof(size_t) * (pConvertor->pDesc->loops + 1));
|
||||
remoteLength[0] = 0; /* initial value set to ZERO */
|
||||
loop_length = 0;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
@ -39,9 +39,9 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t
|
||||
/* Normally the size should be less or equal to the size of the datatype.
|
||||
* This function does not support a iSize bigger than the size of the datatype.
|
||||
*/
|
||||
assert( (uint32_t)iSize <= datatype->size );
|
||||
DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize );
|
||||
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) );
|
||||
assert( iSize <= datatype->size );
|
||||
DUMP( "dt_count_elements( %p, %ul )\n", (void*)datatype, (unsigned long)iSize );
|
||||
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) );
|
||||
pStack->count = 1;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
@ -53,8 +53,10 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
stack_pos--; pStack--;
|
||||
if( stack_pos == -1 ) return nbElems; /* completed */
|
||||
pos_desc++; /* advance to the next element after the end loop */
|
||||
} else {
|
||||
pos_desc = pStack->index + 1; /* go back to the begining of the loop */
|
||||
}
|
||||
pos_desc = pStack->index + 1;
|
||||
continue;
|
||||
}
|
||||
if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
@ -93,9 +95,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
|
||||
/**
|
||||
* Handle all complete multiple of the datatype.
|
||||
*/
|
||||
for( pos_desc = 4; pos_desc < OPAL_DATATYPE_MAX_PREDEFINED; pos_desc++ ) {
|
||||
local_length += datatype->btypes[pos_desc];
|
||||
}
|
||||
local_length = datatype->nbElems;
|
||||
pos_desc = count / local_length;
|
||||
count = count % local_length;
|
||||
*length = datatype->size * pos_desc;
|
||||
@ -104,7 +104,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
|
||||
}
|
||||
|
||||
DUMP( "dt_set_element_count( %p, %d )\n", (void*)datatype, count );
|
||||
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->btypes[OPAL_DATATYPE_LOOP] + 2) );
|
||||
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) );
|
||||
pStack->count = 1;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
@ -116,8 +116,10 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
stack_pos--; pStack--;
|
||||
if( stack_pos == -1 ) return 0;
|
||||
pos_desc++; /* advance to the next element after the end loop */
|
||||
} else {
|
||||
pos_desc = pStack->index + 1; /* go back to the begining of the loop */
|
||||
}
|
||||
pos_desc = pStack->index + 1;
|
||||
continue;
|
||||
}
|
||||
if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
@ -143,3 +145,56 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the array of counts of the predefined datatypes contained in
|
||||
* the datatype. We have no simple way to create this array, as we only
|
||||
* sporadically need it (when we deal with heterogeneous environments or
|
||||
* when we use get_element_count). Thus, we will pay the cost once per
|
||||
* datatype, but we will only update this array if/when needed.
|
||||
*/
|
||||
int opal_datatype_compute_ptypes( opal_datatype_t* datatype )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||
ssize_t nbElems = 0, stack_pos = 0;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
if( NULL != datatype->ptypes ) return 0;
|
||||
datatype->ptypes = (size_t*)calloc(OPAL_DATATYPE_MAX_SUPPORTED, sizeof(size_t));
|
||||
|
||||
DUMP( "opal_datatype_compute_ptypes( %p )\n", (void*)datatype );
|
||||
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (datatype->loops + 2) );
|
||||
pStack->count = 1;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pElems = datatype->desc.desc;
|
||||
pos_desc = 0;
|
||||
|
||||
while( 1 ) { /* loop forever the exit condition is on the last OPAL_DATATYPE_END_LOOP */
|
||||
if( OPAL_DATATYPE_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
stack_pos--; pStack--;
|
||||
if( stack_pos == -1 ) return 0; /* completed */
|
||||
pos_desc++; /* advance to the next element after the end loop */
|
||||
} else {
|
||||
pos_desc = pStack->index + 1; /* go back to the begining of the loop */
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ) {
|
||||
ddt_loop_desc_t* loop = &(pElems[pos_desc].loop);
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, OPAL_DATATYPE_LOOP, loop->loops, 0 );
|
||||
pos_desc++;
|
||||
} while( OPAL_DATATYPE_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */
|
||||
DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
|
||||
}
|
||||
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
||||
/* now here we have a basic datatype */
|
||||
datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count;
|
||||
nbElems += pElems[pos_desc].elem.count;
|
||||
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2012 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -155,10 +155,10 @@ typedef struct ddt_elem_id_description ddt_elem_id_description;
|
||||
*/
|
||||
struct ddt_elem_desc {
|
||||
ddt_elem_id_description common; /**< basic data description and flags */
|
||||
uint32_t count; /**< number of blocks */
|
||||
uint32_t blocklen; /**< number of elements on each block */
|
||||
ptrdiff_t extent; /**< extent of each block (in bytes) */
|
||||
ptrdiff_t disp; /**< displacement of the first block */
|
||||
size_t count; /**< number of blocks */
|
||||
ptrdiff_t extent; /**< extent of each block (in bytes) */
|
||||
ptrdiff_t disp; /**< displacement of the first block */
|
||||
};
|
||||
typedef struct ddt_elem_desc ddt_elem_desc_t;
|
||||
|
||||
@ -172,10 +172,10 @@ typedef struct ddt_elem_desc ddt_elem_desc_t;
|
||||
*/
|
||||
struct ddt_loop_desc {
|
||||
ddt_elem_id_description common; /**< basic data description and flags */
|
||||
uint32_t loops; /**< number of elements */
|
||||
uint32_t items; /**< number of items in the loop */
|
||||
uint32_t loops; /**< number of elements */
|
||||
size_t unused; /**< not used right now */
|
||||
ptrdiff_t extent; /**< extent of the whole loop */
|
||||
ptrdiff_t extent; /**< extent of the whole loop */
|
||||
};
|
||||
typedef struct ddt_loop_desc ddt_loop_desc_t;
|
||||
|
||||
@ -184,7 +184,7 @@ struct ddt_endloop_desc {
|
||||
uint32_t items; /**< number of elements */
|
||||
uint32_t unused; /**< not used right now */
|
||||
size_t size; /**< real size of the data in the loop */
|
||||
ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */
|
||||
ptrdiff_t first_elem_disp; /**< the displacement of the first block in the loop */
|
||||
};
|
||||
typedef struct ddt_endloop_desc ddt_endloop_desc_t;
|
||||
|
||||
@ -214,13 +214,20 @@ union dt_elem_desc {
|
||||
(_place)->end_loop.unused = -1; \
|
||||
} while(0)
|
||||
|
||||
|
||||
/**
|
||||
* Create one or more elements depending on the value of _count. If the value
|
||||
* is too large for the type of elem.count then use oth the elem.count and
|
||||
* elem.blocklen to create it. If the number is prime then create a second
|
||||
* element to account for the difference.
|
||||
*/
|
||||
#define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \
|
||||
do { \
|
||||
(_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \
|
||||
(_place)->elem.common.type = (_type); \
|
||||
(_place)->elem.count = (_count); \
|
||||
(_place)->elem.disp = (_disp); \
|
||||
(_place)->elem.extent = (_extent); \
|
||||
(_place)->elem.count = (_count); \
|
||||
(_place)->elem.blocklen = 1; \
|
||||
} while(0)
|
||||
/*
|
||||
@ -238,8 +245,8 @@ struct opal_datatype_t;
|
||||
* OPAL_DATATYPE_INIT_BTYPES_ARRAY_[0-21], then order and naming would _not_ matter....
|
||||
*/
|
||||
|
||||
#define OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE { 0 }
|
||||
#define OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) { [OPAL_DATATYPE_ ## NAME] = 1 }
|
||||
#define OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE NULL
|
||||
#define OPAL_DATATYPE_INIT_PTYPES_ARRAY(NAME) (size_t[OPAL_DATATYPE_MAX_PREDEFINED]){ [OPAL_DATATYPE_ ## NAME] = 1, [OPAL_DATATYPE_MAX_PREDEFINED-1] = 0 }
|
||||
|
||||
#define OPAL_DATATYPE_INIT_NAME(NAME) "OPAL_" #NAME
|
||||
|
||||
@ -268,7 +275,7 @@ struct opal_datatype_t;
|
||||
.name = OPAL_DATATYPE_INIT_NAME(NAME), \
|
||||
.desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \
|
||||
.opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(UNAVAILABLE), \
|
||||
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \
|
||||
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
|
||||
}
|
||||
|
||||
#define OPAL_DATATYPE_INITIALIZER_UNAVAILABLE( FLAGS ) \
|
||||
@ -287,7 +294,7 @@ struct opal_datatype_t;
|
||||
.name = OPAL_DATATYPE_INIT_NAME(EMPTY), \
|
||||
.desc = OPAL_DATATYPE_INIT_DESC_NULL, \
|
||||
.opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \
|
||||
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY_UNAVAILABLE \
|
||||
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
|
||||
}
|
||||
|
||||
#define OPAL_DATATYPE_INIT_BASIC_TYPE( TYPE, NAME, FLAGS ) \
|
||||
@ -303,7 +310,7 @@ struct opal_datatype_t;
|
||||
.name = OPAL_DATATYPE_INIT_NAME(NAME), \
|
||||
.desc = OPAL_DATATYPE_INIT_DESC_NULL, \
|
||||
.opt_desc = OPAL_DATATYPE_INIT_DESC_NULL, \
|
||||
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \
|
||||
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
|
||||
}
|
||||
|
||||
#define OPAL_DATATYPE_INIT_BASIC_DATATYPE( TYPE, ALIGN, NAME, FLAGS ) \
|
||||
@ -319,7 +326,7 @@ struct opal_datatype_t;
|
||||
.name = OPAL_DATATYPE_INIT_NAME(NAME), \
|
||||
.desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \
|
||||
.opt_desc = OPAL_DATATYPE_INIT_DESC_PREDEFINED(NAME), \
|
||||
.btypes = OPAL_DATATYPE_INIT_BTYPES_ARRAY(NAME) \
|
||||
.ptypes = OPAL_DATATYPE_INIT_PTYPES_ARRAY_UNAVAILABLE \
|
||||
}
|
||||
|
||||
#define OPAL_DATATYPE_INITIALIZER_LOOP(FLAGS) OPAL_DATATYPE_INIT_BASIC_TYPE( OPAL_DATATYPE_LOOP, LOOP, FLAGS )
|
||||
@ -476,7 +483,10 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem )
|
||||
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \
|
||||
do { \
|
||||
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
||||
(COUNTER) = (ELEMENT)->elem.count; \
|
||||
if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \
|
||||
(COUNTER) = (ELEMENT)->loop.loops; \
|
||||
else \
|
||||
(COUNTER) = (ELEMENT)->elem.count; \
|
||||
} while (0)
|
||||
|
||||
OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length );
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -42,21 +42,22 @@
|
||||
|
||||
static int32_t
|
||||
opal_datatype_optimize_short( opal_datatype_t* pData,
|
||||
int32_t count,
|
||||
dt_type_desc_t* pTypeDesc )
|
||||
int32_t count,
|
||||
dt_type_desc_t* pTypeDesc )
|
||||
{
|
||||
dt_elem_desc_t* pElemDesc;
|
||||
ddt_elem_desc_t opt_elem;
|
||||
dt_stack_t* pOrigStack;
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int32_t pos_desc = 0; /* actual position in the description of the derived datatype */
|
||||
int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0;
|
||||
int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1;
|
||||
int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity;
|
||||
ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0;
|
||||
uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */
|
||||
uint32_t i;
|
||||
size_t last_length = 0;
|
||||
|
||||
pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) );
|
||||
pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) );
|
||||
SAVE_STACK( pStack, -1, 0, count, 0 );
|
||||
|
||||
pTypeDesc->length = 2 * pData->desc.used + 1 /* for the fake OPAL_DATATYPE_END_LOOP at the end */;
|
||||
@ -85,7 +86,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
|
||||
pElemDesc++; nbElems++;
|
||||
if( --stack_pos >= 0 ) { /* still something to do ? */
|
||||
ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop);
|
||||
pStartLoop->items = (pElemDesc - 1)->elem.count;
|
||||
pStartLoop->items = end_loop->items;
|
||||
total_disp = pStack->disp; /* update the displacement position */
|
||||
}
|
||||
pStack--; /* go down one position on the stack */
|
||||
@ -98,8 +99,8 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
|
||||
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
|
||||
ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp;
|
||||
|
||||
continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
|
||||
== (total_disp + loop_disp));
|
||||
continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
|
||||
== (total_disp + loop_disp));
|
||||
if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
/* the loop is contiguous or composed by contiguous elements with a gap */
|
||||
if( loop->extent == (ptrdiff_t)end_loop->size ) {
|
||||
@ -206,7 +207,7 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
|
||||
while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
|
||||
/* now here we have a basic datatype */
|
||||
type = pData->desc.desc[pos_desc].elem.common.type;
|
||||
continuity = ((last_disp + last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
|
||||
continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
|
||||
== (total_disp + pData->desc.desc[pos_desc].elem.disp));
|
||||
|
||||
if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity &&
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -502,6 +502,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor,
|
||||
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
||||
pos_desc++; /* advance to the next data */
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
if( 0 == iov_len_local ) goto complete_loop; /* escape if we're done */
|
||||
continue;
|
||||
}
|
||||
conv_ptr += rc * description[pos_desc].elem.extent;
|
||||
|
@ -18,7 +18,7 @@ if PROJECT_OMPI
|
||||
MPI_TESTS = checksum position position_noncontig ddt_test ddt_raw unpack_ooo ddt_pack external32
|
||||
MPI_CHECKS = to_self
|
||||
endif
|
||||
TESTS = opal_datatype_test $(MPI_TESTS)
|
||||
TESTS = opal_datatype_test unpack_hetero $(MPI_TESTS)
|
||||
|
||||
check_PROGRAMS = $(TESTS) $(MPI_CHECKS)
|
||||
|
||||
@ -79,5 +79,10 @@ external32_LDADD = \
|
||||
$(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \
|
||||
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
|
||||
|
||||
unpack_hetero_SOURCES = unpack_hetero.c
|
||||
unpack_hetero_LDFLAGS = $(OMPI_PKG_CONFIG_LDFLAGS)
|
||||
unpack_hetero_LDADD = \
|
||||
$(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la
|
||||
|
||||
distclean:
|
||||
rm -rf *.dSYM .deps .libs *.log *.o *.trs $(check_PROGRAMS) Makefile
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2013 Cisco Systems, Inc. All rights reserved.
|
||||
@ -23,7 +23,7 @@
|
||||
/**
|
||||
* The purpose of this test is to simulate the multi-network packing and
|
||||
* unpacking process. The pack operation will happens in-order while the
|
||||
* will be done randomly. Therefore, before each unpack the correct
|
||||
* unpack will be done randomly. Therefore, before each unpack the correct
|
||||
* position in the user buffer has to be set.
|
||||
*/
|
||||
|
||||
|
99
test/datatype/unpack_hetero.c
Обычный файл
99
test/datatype/unpack_hetero.c
Обычный файл
@ -0,0 +1,99 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "opal_config.h"
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/datatype/opal_datatype.h"
|
||||
#include "opal/datatype/opal_datatype_internal.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/datatype/opal_datatype_prototypes.h"
|
||||
#include "opal/util/arch.h"
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* Compile with:
|
||||
gcc -DHAVE_CONFIG_H -I. -I../../include -I../.. -I../../include -I../../../ompi-trunk/opal -I../../../ompi-trunk/orte -g opal_datatype_test.c -o opal_datatype_test
|
||||
*/
|
||||
|
||||
uint32_t remote_arch = 0xffffffff;
|
||||
|
||||
/**
|
||||
* Main function. Call several tests and print-out the results. It try to stress the convertor
|
||||
* using difficult data-type constructions as well as strange segment sizes for the conversion.
|
||||
* Usually, it is able to detect most of the data-type and convertor problems. Any modifications
|
||||
* on the data-type engine should first pass all the tests from this file, before going into other
|
||||
* tests.
|
||||
*/
|
||||
int main( int argc, char* argv[] )
|
||||
{
|
||||
opal_datatype_init();
|
||||
|
||||
/**
|
||||
* By default simulate homogeneous architectures.
|
||||
*/
|
||||
remote_arch = opal_local_arch ^ OPAL_ARCH_ISBIGENDIAN;
|
||||
|
||||
opal_convertor_t * pConv;
|
||||
int sbuf[2], rbuf[2];
|
||||
size_t max_data;
|
||||
struct iovec a;
|
||||
uint32_t iov_count;
|
||||
|
||||
sbuf[0] = 0x01000000; sbuf[1] = 0x02000000;
|
||||
|
||||
printf( "\n\n#\n * TEST UNPACKING 1 int out of 1\n#\n\n" );
|
||||
|
||||
pConv = opal_convertor_create( remote_arch, 0 );
|
||||
rbuf[0] = -1; rbuf[1] = -1;
|
||||
if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 1, rbuf ) ) {
|
||||
printf( "Cannot attach the datatype to a convertor\n" );
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
a.iov_base = sbuf;
|
||||
a.iov_len = 4;
|
||||
iov_count = 1;
|
||||
max_data = 4;
|
||||
opal_unpack_general( pConv, &a, &iov_count, &max_data );
|
||||
|
||||
assert(1 == rbuf[0]);
|
||||
assert(-1 == rbuf[1]);
|
||||
OBJ_RELEASE(pConv);
|
||||
|
||||
printf( "\n\n#\n * TEST UNPACKING 1 int out of 2\n#\n\n" );
|
||||
pConv = opal_convertor_create( remote_arch, 0 );
|
||||
rbuf[0] = -1; rbuf[1] = -1;
|
||||
if( OPAL_SUCCESS != opal_convertor_prepare_for_recv( pConv, &opal_datatype_int4, 2, rbuf ) ) {
|
||||
printf( "Cannot attach the datatype to a convertor\n" );
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
a.iov_base = sbuf;
|
||||
a.iov_len = 4;
|
||||
iov_count = 1;
|
||||
max_data = 4;
|
||||
opal_unpack_general( pConv, &a, &iov_count, &max_data );
|
||||
|
||||
assert(1 == rbuf[0]);
|
||||
assert(-1 == rbuf[1]);
|
||||
OBJ_RELEASE(pConv);
|
||||
|
||||
/* clean-ups all data allocations */
|
||||
opal_datatype_finalize();
|
||||
opal_finalize();
|
||||
return OPAL_SUCCESS;
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user