Optimized datatype description.
Move toward a base type of vector (count, type, blocklen, extent, disp) with disp and extent applying toward the count repertition and blocklen being a contiguous memory of type type. Implement 2 optimizations on this description used during type_commit: - collapse: successive similar datatype descriptions are collapsed together with an increased count. - fusion: fuse successive datatype descriptions in order to minimize the number of resulting memcpy during pack/unpack. Fixes at the OMPI datatype level including: - Fix the create_hindexed and vector creation. - Fix the handling of [get|set]_elements and _count. - Correctly compute the dispacement for block indexed types. - Support the MPI_LB and MPI_UB deprecation, aka. OMPI_ENABLE_MPI1_COMPAT. Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
29468ec03f
Коммит
f25674291b
@ -1,6 +1,6 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009-2013 The University of Tennessee and The University
|
* Copyright (c) 2009-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||||
@ -76,7 +76,7 @@ struct ompi_datatype_t {
|
|||||||
struct opal_hash_table_t *d_keyhash; /**< Attribute fields */
|
struct opal_hash_table_t *d_keyhash; /**< Attribute fields */
|
||||||
|
|
||||||
void* args; /**< Data description for the user */
|
void* args; /**< Data description for the user */
|
||||||
opal_atomic_intptr_t packed_description; /**< Packed description of the datatype */
|
opal_atomic_intptr_t packed_description; /**< Packed description of the datatype */
|
||||||
uint64_t pml_data; /**< PML-specific information */
|
uint64_t pml_data; /**< PML-specific information */
|
||||||
/* --- cacheline 6 boundary (384 bytes) --- */
|
/* --- cacheline 6 boundary (384 bytes) --- */
|
||||||
char name[MPI_MAX_OBJECT_NAME];/**< Externally visible name */
|
char name[MPI_MAX_OBJECT_NAME];/**< Externally visible name */
|
||||||
|
@ -87,10 +87,10 @@ int32_t ompi_datatype_create_hindexed( int count, const int* pBlockLength, const
|
|||||||
return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType);
|
return ompi_datatype_duplicate( &ompi_mpi_datatype_null.dt, newType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ompi_datatype_type_extent( oldType, &extent );
|
||||||
disp = pDisp[i];
|
disp = pDisp[i];
|
||||||
dLength = pBlockLength[i];
|
dLength = pBlockLength[i];
|
||||||
endat = disp + dLength * extent;
|
endat = disp + dLength * extent;
|
||||||
ompi_datatype_type_extent( oldType, &extent );
|
|
||||||
|
|
||||||
pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) );
|
pdt = ompi_datatype_create( (count - i) * (2 + oldType->super.desc.used) );
|
||||||
for( i += 1; i < count; i++ ) {
|
for( i += 1; i < count; i++ ) {
|
||||||
@ -162,17 +162,17 @@ int32_t ompi_datatype_create_hindexed_block( int count, int bLength, const ptrdi
|
|||||||
pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) );
|
pdt = ompi_datatype_create( count * (2 + oldType->super.desc.used) );
|
||||||
disp = pDisp[0];
|
disp = pDisp[0];
|
||||||
dLength = bLength;
|
dLength = bLength;
|
||||||
endat = disp + dLength;
|
endat = disp + dLength * extent;
|
||||||
for( i = 1; i < count; i++ ) {
|
for( i = 1; i < count; i++ ) {
|
||||||
if( endat == pDisp[i] ) {
|
if( endat == pDisp[i] ) {
|
||||||
/* contiguous with the previsious */
|
/* contiguous with the previsious */
|
||||||
dLength += bLength;
|
dLength += bLength;
|
||||||
endat += bLength;
|
endat += bLength * extent;
|
||||||
} else {
|
} else {
|
||||||
ompi_datatype_add( pdt, oldType, dLength, disp, extent );
|
ompi_datatype_add( pdt, oldType, dLength, disp, extent );
|
||||||
disp = pDisp[i];
|
disp = pDisp[i];
|
||||||
dLength = bLength;
|
dLength = bLength;
|
||||||
endat = disp + bLength;
|
endat = disp + bLength * extent;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ompi_datatype_add( pdt, oldType, dLength, disp, extent );
|
ompi_datatype_add( pdt, oldType, dLength, disp, extent );
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2008 High Performance Computing Center Stuttgart,
|
||||||
@ -26,7 +26,6 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "ompi/runtime/params.h"
|
#include "ompi/runtime/params.h"
|
||||||
#include "ompi/communicator/communicator.h"
|
|
||||||
#include "ompi/datatype/ompi_datatype.h"
|
#include "ompi/datatype/ompi_datatype.h"
|
||||||
#include "opal/datatype/opal_convertor.h"
|
#include "opal/datatype/opal_convertor.h"
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2018 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||||
@ -324,8 +324,9 @@ complete_contiguous_data_unpack:
|
|||||||
return pConv->fAdvance( pConv, iov, out_size, max_data );
|
return pConv->fAdvance( pConv, iov, out_size, max_data );
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
|
static inline int
|
||||||
size_t starting_point, const size_t* sizes )
|
opal_convertor_create_stack_with_pos_contig( opal_convertor_t* pConvertor,
|
||||||
|
size_t starting_point, const size_t* sizes )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
const opal_datatype_t* pData = pConvertor->pDesc;
|
const opal_datatype_t* pData = pConvertor->pDesc;
|
||||||
@ -349,7 +350,7 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
|
|||||||
pStack[0].disp = count * extent;
|
pStack[0].disp = count * extent;
|
||||||
|
|
||||||
/* now compute the number of pending bytes */
|
/* now compute the number of pending bytes */
|
||||||
count = starting_point - count * pData->size;
|
count = starting_point % pData->size;
|
||||||
/**
|
/**
|
||||||
* We save the current displacement starting from the begining
|
* We save the current displacement starting from the begining
|
||||||
* of this data.
|
* of this data.
|
||||||
@ -370,9 +371,9 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
|
|||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline int
|
||||||
int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
|
opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
|
||||||
const size_t* sizes )
|
const size_t* sizes )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack = convertor->pStack;
|
dt_stack_t* pStack = convertor->pStack;
|
||||||
dt_elem_desc_t* pElems;
|
dt_elem_desc_t* pElems;
|
||||||
@ -402,7 +403,7 @@ int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
|
|||||||
pStack[1].count = pElems[0].loop.loops;
|
pStack[1].count = pElems[0].loop.loops;
|
||||||
pStack[1].type = OPAL_DATATYPE_LOOP;
|
pStack[1].type = OPAL_DATATYPE_LOOP;
|
||||||
} else {
|
} else {
|
||||||
pStack[1].count = pElems[0].elem.count;
|
pStack[1].count = pElems[0].elem.count * pElems[0].elem.blocklen;
|
||||||
pStack[1].type = pElems[0].elem.common.type;
|
pStack[1].type = pElems[0].elem.common.type;
|
||||||
}
|
}
|
||||||
return OPAL_SUCCESS;
|
return OPAL_SUCCESS;
|
||||||
|
@ -227,13 +227,41 @@ opal_datatype_is_contiguous_memory_layout( const opal_datatype_t* datatype, int3
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
OPAL_DECLSPEC void opal_datatype_dump( const opal_datatype_t* pData );
|
OPAL_DECLSPEC void
|
||||||
|
opal_datatype_dump( const opal_datatype_t* pData );
|
||||||
|
|
||||||
/* data creation functions */
|
/* data creation functions */
|
||||||
OPAL_DECLSPEC int32_t opal_datatype_clone( const opal_datatype_t * src_type, opal_datatype_t * dest_type );
|
|
||||||
OPAL_DECLSPEC int32_t opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType, opal_datatype_t** newType );
|
/**
|
||||||
OPAL_DECLSPEC int32_t opal_datatype_resize( opal_datatype_t* type, ptrdiff_t lb, ptrdiff_t extent );
|
* Create a duplicate of the source datatype.
|
||||||
OPAL_DECLSPEC int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtAdd, size_t count,
|
*/
|
||||||
ptrdiff_t disp, ptrdiff_t extent );
|
OPAL_DECLSPEC int32_t
|
||||||
|
opal_datatype_clone( const opal_datatype_t* src_type,
|
||||||
|
opal_datatype_t* dest_type );
|
||||||
|
/**
|
||||||
|
* A contiguous array of identical datatypes.
|
||||||
|
*/
|
||||||
|
OPAL_DECLSPEC int32_t
|
||||||
|
opal_datatype_create_contiguous( int count, const opal_datatype_t* oldType,
|
||||||
|
opal_datatype_t** newType );
|
||||||
|
/**
|
||||||
|
* Add a new datatype to the base type description. The count is the number
|
||||||
|
* repetitions of the same element to be added, and the extent is the extent
|
||||||
|
* of each element. The displacement is the initial displacement of the
|
||||||
|
* first element.
|
||||||
|
*/
|
||||||
|
OPAL_DECLSPEC int32_t
|
||||||
|
opal_datatype_add( opal_datatype_t* pdtBase,
|
||||||
|
const opal_datatype_t* pdtAdd, size_t count,
|
||||||
|
ptrdiff_t disp, ptrdiff_t extent );
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alter the lb and extent of an existing datatype in place.
|
||||||
|
*/
|
||||||
|
OPAL_DECLSPEC int32_t
|
||||||
|
opal_datatype_resize( opal_datatype_t* type,
|
||||||
|
ptrdiff_t lb,
|
||||||
|
ptrdiff_t extent );
|
||||||
|
|
||||||
static inline int32_t
|
static inline int32_t
|
||||||
opal_datatype_type_lb( const opal_datatype_t* pData, ptrdiff_t* disp )
|
opal_datatype_type_lb( const opal_datatype_t* pData, ptrdiff_t* disp )
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||||
@ -281,15 +281,23 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
|
|||||||
if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) {
|
if( (pdtAdd->flags & (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA)) == (OPAL_DATATYPE_FLAG_PREDEFINED | OPAL_DATATYPE_FLAG_DATA) ) {
|
||||||
if( NULL != pdtBase->ptypes )
|
if( NULL != pdtBase->ptypes )
|
||||||
pdtBase->ptypes[pdtAdd->id] += count;
|
pdtBase->ptypes[pdtAdd->id] += count;
|
||||||
pLast->elem.common.type = pdtAdd->id;
|
|
||||||
pLast->elem.count = count;
|
|
||||||
pLast->elem.disp = disp;
|
|
||||||
pLast->elem.extent = extent;
|
|
||||||
pdtBase->desc.used++;
|
|
||||||
pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED);
|
pLast->elem.common.flags = pdtAdd->flags & ~(OPAL_DATATYPE_FLAG_COMMITTED);
|
||||||
if( (extent != (ptrdiff_t)pdtAdd->size) && (count > 1) ) { /* gaps around the datatype */
|
pLast->elem.common.type = pdtAdd->id;
|
||||||
pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS);
|
pLast->elem.disp = disp;
|
||||||
|
pLast->elem.extent = count * extent;
|
||||||
|
/* assume predefined datatypes without extent, aka. contiguous */
|
||||||
|
pLast->elem.count = 1;
|
||||||
|
pLast->elem.blocklen = count;
|
||||||
|
if( extent != (ptrdiff_t)pdtAdd->size ) { /* not contiguous: let's fix */
|
||||||
|
pLast->elem.count = count;
|
||||||
|
pLast->elem.blocklen = 1;
|
||||||
|
pLast->elem.extent = extent;
|
||||||
|
if( count > 1 ) { /* gaps around the predefined datatype */
|
||||||
|
pLast->elem.common.flags &= ~(OPAL_DATATYPE_FLAG_CONTIGUOUS | OPAL_DATATYPE_FLAG_NO_GAPS);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
pdtBase->desc.used++;
|
||||||
} else {
|
} else {
|
||||||
/* keep trace of the total number of basic datatypes in the datatype definition */
|
/* keep trace of the total number of basic datatypes in the datatype definition */
|
||||||
pdtBase->loops += pdtAdd->loops;
|
pdtBase->loops += pdtAdd->loops;
|
||||||
@ -299,13 +307,40 @@ int32_t opal_datatype_add( opal_datatype_t* pdtBase, const opal_datatype_t* pdtA
|
|||||||
for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
|
for( i = OPAL_DATATYPE_FIRST_TYPE; i < OPAL_DATATYPE_MAX_PREDEFINED; i++ )
|
||||||
if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]);
|
if( pdtAdd->ptypes[i] != 0 ) pdtBase->ptypes[i] += (count * pdtAdd->ptypes[i]);
|
||||||
}
|
}
|
||||||
if( (1 == pdtAdd->desc.used) && (extent == (pdtAdd->ub - pdtAdd->lb)) &&
|
if( 1 == pdtAdd->desc.used ) {
|
||||||
(extent == pdtAdd->desc.desc[0].elem.extent) ){
|
|
||||||
pLast->elem = pdtAdd->desc.desc[0].elem;
|
pLast->elem = pdtAdd->desc.desc[0].elem;
|
||||||
pLast->elem.count *= count;
|
|
||||||
pLast->elem.disp += disp;
|
pLast->elem.disp += disp;
|
||||||
|
if( 1 == count ) {
|
||||||
|
/* Extent only has a meaning when there are multiple elements. Bail out */
|
||||||
|
} else if( 1 == pLast->elem.count ) {
|
||||||
|
/* The size and true_extent of the added datatype are identical, signaling a datatype
|
||||||
|
* that is mostly contiguous with the exception of the initial and final gaps. These
|
||||||
|
* gaps do not matter here as they will amended (the initial gaps being shifted by the
|
||||||
|
* new displacement and the final gap being replaced with the new gap
|
||||||
|
*/
|
||||||
|
if( pdtAdd->desc.desc[0].elem.extent == extent ) {
|
||||||
|
/* pure bliss everything is fully contiguous and we can collapse
|
||||||
|
* everything by updating the blocklen and extent
|
||||||
|
*/
|
||||||
|
pLast->elem.blocklen *= count;
|
||||||
|
pLast->elem.extent *= count;
|
||||||
|
} else {
|
||||||
|
pLast->elem.count = count;
|
||||||
|
pLast->elem.extent = extent;
|
||||||
|
}
|
||||||
|
} else if( extent == (ptrdiff_t)(pLast->elem.count * pLast->elem.extent) ) {
|
||||||
|
/* It's just a repetition of the same element, increase the count */
|
||||||
|
pLast->elem.count *= count;
|
||||||
|
} else {
|
||||||
|
/* No luck here, no optimization can be applied. Fall back to the
|
||||||
|
* normal case where we add a loop around the datatype.
|
||||||
|
*/
|
||||||
|
goto build_loop;
|
||||||
|
}
|
||||||
pdtBase->desc.used++;
|
pdtBase->desc.used++;
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
build_loop:
|
||||||
/* if the extent of the datatype is the same as the extent of the loop
|
/* if the extent of the datatype is the same as the extent of the loop
|
||||||
* description of the datatype then we simply have to update the main loop.
|
* description of the datatype then we simply have to update the main loop.
|
||||||
*/
|
*/
|
||||||
|
@ -48,37 +48,37 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM,
|
|||||||
unsigned char* DESTINATION,
|
unsigned char* DESTINATION,
|
||||||
size_t* SPACE )
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
size_t _copy_count = (COUNT);
|
|
||||||
size_t _copy_blength;
|
|
||||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
unsigned char* _source = (SOURCE) + _elem->disp;
|
unsigned char* _source = (SOURCE) + _elem->disp;
|
||||||
unsigned char* _destination = (DESTINATION) + _elem->disp;
|
unsigned char* _destination = (DESTINATION) + _elem->disp;
|
||||||
|
size_t total_count = _elem->count * _elem->blocklen;
|
||||||
|
size_t do_now, do_now_bytes;
|
||||||
|
|
||||||
_copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
assert( (COUNT) == total_count);
|
||||||
|
assert( total_count <= ((*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size) );
|
||||||
|
|
||||||
if( _copy_blength == (size_t)_elem->extent ) {
|
/* We don't a prologue and epilogue here as we are __always__ working
|
||||||
_copy_blength *= _copy_count;
|
* with full copies of the data description.
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE),
|
*/
|
||||||
(DATATYPE), (TOTAL_COUNT) );
|
|
||||||
/* the extent and the size of the basic datatype are equals */
|
/**
|
||||||
DO_DEBUG( opal_output( 0, "copy 1. %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n",
|
* Compute how many full blocklen we need to do and do them.
|
||||||
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, _copy_blength, *(SPACE) ); );
|
*/
|
||||||
MEM_OP( _destination, _source, _copy_blength );
|
do_now = _elem->count;
|
||||||
_source += _copy_blength;
|
if( 0 != do_now ) {
|
||||||
_destination += _copy_blength;
|
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
} else {
|
for(size_t _i = 0; _i < do_now; _i++ ) {
|
||||||
for(size_t _i = 0; _i < _copy_count; _i++ ) {
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE),
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE),
|
(DATATYPE), (TOTAL_COUNT) );
|
||||||
(DATATYPE), (TOTAL_COUNT) );
|
DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n",
|
||||||
DO_DEBUG( opal_output( 0, "copy 2. %s( %p, %p, %lu ) => space %lu\n",
|
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) ); );
|
||||||
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); );
|
MEM_OP( _destination, _source, do_now_bytes );
|
||||||
MEM_OP( _destination, _source, _copy_blength );
|
|
||||||
_source += _elem->extent;
|
|
||||||
_destination += _elem->extent;
|
_destination += _elem->extent;
|
||||||
|
_source += _elem->extent;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
}
|
}
|
||||||
_copy_blength *= _copy_count;
|
(COUNT) -= total_count;
|
||||||
}
|
}
|
||||||
*(SPACE) -= _copy_blength;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _contiguous_loop( const dt_elem_desc_t* ELEM,
|
static inline void _contiguous_loop( const dt_elem_desc_t* ELEM,
|
||||||
@ -147,12 +147,10 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i
|
|||||||
if( (ptrdiff_t)datatype->size == extent ) { /* all contiguous == no gaps around */
|
if( (ptrdiff_t)datatype->size == extent ) { /* all contiguous == no gaps around */
|
||||||
size_t total_length = iov_len_local;
|
size_t total_length = iov_len_local;
|
||||||
size_t memop_chunk = opal_datatype_memop_block_size;
|
size_t memop_chunk = opal_datatype_memop_block_size;
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( source, iov_len_local,
|
||||||
|
(unsigned char*)source_base, datatype, count );
|
||||||
while( total_length > 0 ) {
|
while( total_length > 0 ) {
|
||||||
if( memop_chunk > total_length ) memop_chunk = total_length;
|
if( memop_chunk > total_length ) memop_chunk = total_length;
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( destination, memop_chunk,
|
|
||||||
(unsigned char*)destination_base, datatype, count );
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( source, memop_chunk,
|
|
||||||
(unsigned char*)source_base, datatype, count );
|
|
||||||
DO_DEBUG( opal_output( 0, "copy c1. %s( %p, %p, %lu ) => space %lu\n",
|
DO_DEBUG( opal_output( 0, "copy c1. %s( %p, %p, %lu ) => space %lu\n",
|
||||||
STRINGIFY(MEM_OP_NAME), (void*)destination, (void*)source, (unsigned long)memop_chunk, (unsigned long)total_length ); );
|
STRINGIFY(MEM_OP_NAME), (void*)destination, (void*)source, (unsigned long)memop_chunk, (unsigned long)total_length ); );
|
||||||
MEM_OP( destination, source, memop_chunk );
|
MEM_OP( destination, source, memop_chunk );
|
||||||
@ -184,17 +182,12 @@ static inline int32_t _copy_content_same_ddt( const opal_datatype_t* datatype, i
|
|||||||
pos_desc = 0;
|
pos_desc = 0;
|
||||||
stack_pos = 0;
|
stack_pos = 0;
|
||||||
|
|
||||||
if( datatype->opt_desc.desc != NULL ) {
|
description = datatype->opt_desc.desc;
|
||||||
description = datatype->opt_desc.desc;
|
if( NULL == description ) {
|
||||||
} else {
|
|
||||||
description = datatype->desc.desc;
|
description = datatype->desc.desc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( description[0].elem.common.type == OPAL_DATATYPE_LOOP )
|
UPDATE_INTERNAL_COUNTERS( description, 0, pElem, count_desc );
|
||||||
count_desc = description[0].loop.loops;
|
|
||||||
else
|
|
||||||
count_desc = description[0].elem.count;
|
|
||||||
pElem = &(description[pos_desc]);
|
|
||||||
|
|
||||||
while( 1 ) {
|
while( 1 ) {
|
||||||
while( OPAL_LIKELY(pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) ) {
|
while( OPAL_LIKELY(pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA) ) {
|
||||||
|
@ -69,14 +69,14 @@ ssize_t opal_datatype_get_element_count( const opal_datatype_t* datatype, size_t
|
|||||||
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
||||||
/* now here we have a basic datatype */
|
/* now here we have a basic datatype */
|
||||||
const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
|
const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
|
||||||
local_size = pElems[pos_desc].elem.count * basic_type->size;
|
local_size = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen) * basic_type->size;
|
||||||
if( local_size >= iSize ) {
|
if( local_size >= iSize ) {
|
||||||
local_size = iSize / basic_type->size;
|
local_size = iSize / basic_type->size;
|
||||||
nbElems += (int32_t)local_size;
|
nbElems += (int32_t)local_size;
|
||||||
iSize -= local_size * basic_type->size;
|
iSize -= local_size * basic_type->size;
|
||||||
return (iSize == 0 ? nbElems : -1);
|
return (iSize == 0 ? nbElems : -1);
|
||||||
}
|
}
|
||||||
nbElems += pElems[pos_desc].elem.count;
|
nbElems += (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen);
|
||||||
iSize -= local_size;
|
iSize -= local_size;
|
||||||
pos_desc++; /* advance to the next data */
|
pos_desc++; /* advance to the next data */
|
||||||
}
|
}
|
||||||
@ -131,7 +131,7 @@ int32_t opal_datatype_set_element_count( const opal_datatype_t* datatype, size_t
|
|||||||
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
||||||
/* now here we have a basic datatype */
|
/* now here we have a basic datatype */
|
||||||
const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
|
const opal_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
|
||||||
local_length = pElems[pos_desc].elem.count;
|
local_length = (pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen);
|
||||||
if( local_length >= count ) {
|
if( local_length >= count ) {
|
||||||
*length += count * basic_type->size;
|
*length += count * basic_type->size;
|
||||||
return 0;
|
return 0;
|
||||||
@ -188,8 +188,8 @@ int opal_datatype_compute_ptypes( opal_datatype_t* datatype )
|
|||||||
}
|
}
|
||||||
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
while( pElems[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
||||||
/* now here we have a basic datatype */
|
/* now here we have a basic datatype */
|
||||||
datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count;
|
datatype->ptypes[pElems[pos_desc].elem.common.type] += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen;
|
||||||
nbElems += pElems[pos_desc].elem.count;
|
nbElems += pElems[pos_desc].elem.count * pElems[pos_desc].elem.blocklen;
|
||||||
|
|
||||||
DUMP( " compute_ptypes-add: type %d count %"PRIsize_t" (total type %"PRIsize_t" total %lld)\n",
|
DUMP( " compute_ptypes-add: type %d count %"PRIsize_t" (total type %"PRIsize_t" total %lld)\n",
|
||||||
pElems[pos_desc].elem.common.type, datatype->ptypes[pElems[pos_desc].elem.common.type],
|
pElems[pos_desc].elem.common.type, datatype->ptypes[pElems[pos_desc].elem.common.type],
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2018 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||||
@ -222,14 +222,14 @@ union dt_elem_desc {
|
|||||||
* elem.blocklen to create it. If the number is prime then create a second
|
* elem.blocklen to create it. If the number is prime then create a second
|
||||||
* element to account for the difference.
|
* element to account for the difference.
|
||||||
*/
|
*/
|
||||||
#define CREATE_ELEM( _place, _type, _flags, _count, _disp, _extent ) \
|
#define CREATE_ELEM(_place, _type, _flags, _blocklen, _count, _disp, _extent) \
|
||||||
do { \
|
do { \
|
||||||
(_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \
|
(_place)->elem.common.flags = (_flags) | OPAL_DATATYPE_FLAG_DATA; \
|
||||||
(_place)->elem.common.type = (_type); \
|
(_place)->elem.common.type = (_type); \
|
||||||
(_place)->elem.disp = (_disp); \
|
(_place)->elem.blocklen = (_blocklen); \
|
||||||
(_place)->elem.extent = (_extent); \
|
|
||||||
(_place)->elem.count = (_count); \
|
(_place)->elem.count = (_count); \
|
||||||
(_place)->elem.blocklen = 1; \
|
(_place)->elem.extent = (_extent); \
|
||||||
|
(_place)->elem.disp = (_disp); \
|
||||||
} while(0)
|
} while(0)
|
||||||
/*
|
/*
|
||||||
* This array holds the descriptions desc.desc[2] of the predefined basic datatypes.
|
* This array holds the descriptions desc.desc[2] of the predefined basic datatypes.
|
||||||
@ -498,22 +498,22 @@ static inline int GET_FIRST_NON_LOOP( const union dt_elem_desc* _pElem )
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \
|
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \
|
||||||
do { \
|
do { \
|
||||||
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
||||||
if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \
|
if( OPAL_DATATYPE_LOOP == (ELEMENT)->elem.common.type ) \
|
||||||
(COUNTER) = (ELEMENT)->loop.loops; \
|
(COUNTER) = (ELEMENT)->loop.loops; \
|
||||||
else \
|
else \
|
||||||
(COUNTER) = (ELEMENT)->elem.count; \
|
(COUNTER) = (ELEMENT)->elem.count * (ELEMENT)->elem.blocklen; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length );
|
OPAL_DECLSPEC int opal_datatype_contain_basic_datatypes( const struct opal_datatype_t* pData, char* ptr, size_t length );
|
||||||
OPAL_DECLSPEC int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t length );
|
OPAL_DECLSPEC int opal_datatype_dump_data_flags( unsigned short usflags, char* ptr, size_t length );
|
||||||
OPAL_DECLSPEC int opal_datatype_dump_data_desc( union dt_elem_desc* pDesc, int nbElems, char* ptr, size_t length );
|
OPAL_DECLSPEC int opal_datatype_dump_data_desc( union dt_elem_desc* pDesc, int nbElems, char* ptr, size_t length );
|
||||||
|
|
||||||
#if OPAL_ENABLE_DEBUG
|
|
||||||
extern bool opal_position_debug;
|
extern bool opal_position_debug;
|
||||||
extern bool opal_copy_debug;
|
extern bool opal_copy_debug;
|
||||||
#endif /* OPAL_ENABLE_DEBUG */
|
extern bool opal_unpack_debug;
|
||||||
|
extern bool opal_pack_debug;
|
||||||
|
|
||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
#endif /* OPAL_DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */
|
#endif /* OPAL_DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */
|
||||||
|
@ -254,6 +254,7 @@ int32_t opal_datatype_init( void )
|
|||||||
datatype->desc.desc[0].elem.common.type = i;
|
datatype->desc.desc[0].elem.common.type = i;
|
||||||
/* datatype->desc.desc[0].elem.blocklen XXX not set at the moment, it will be needed later */
|
/* datatype->desc.desc[0].elem.blocklen XXX not set at the moment, it will be needed later */
|
||||||
datatype->desc.desc[0].elem.count = 1;
|
datatype->desc.desc[0].elem.count = 1;
|
||||||
|
datatype->desc.desc[0].elem.blocklen = 1;
|
||||||
datatype->desc.desc[0].elem.disp = 0;
|
datatype->desc.desc[0].elem.disp = 0;
|
||||||
datatype->desc.desc[0].elem.extent = datatype->size;
|
datatype->desc.desc[0].elem.extent = datatype->size;
|
||||||
|
|
||||||
|
@ -2,6 +2,9 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018 Research Organization for Information Science
|
* Copyright (c) 2018 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
|
* Copyright (c) 2018-2019 The University of Tennessee and The University
|
||||||
|
* of Tennessee Research Foundation. All rights
|
||||||
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -18,35 +21,43 @@
|
|||||||
#include "opal/datatype/opal_datatype_internal.h"
|
#include "opal/datatype/opal_datatype_internal.h"
|
||||||
#include "opal/datatype/opal_convertor.h"
|
#include "opal/datatype/opal_convertor.h"
|
||||||
|
|
||||||
|
#define OPAL_DATATYPE_MAX_MONOTONIC_IOVEC 32
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the datatype describes a memory layout where the pointers to
|
||||||
|
* the contiguous pieces are always advancing in the same direction, i.e.
|
||||||
|
* there is no potential for overlap.
|
||||||
|
*/
|
||||||
int32_t opal_datatype_is_monotonic(opal_datatype_t* type )
|
int32_t opal_datatype_is_monotonic(opal_datatype_t* type )
|
||||||
{
|
{
|
||||||
|
struct iovec iov[OPAL_DATATYPE_MAX_MONOTONIC_IOVEC];
|
||||||
|
ptrdiff_t upper_limit = (ptrdiff_t)type->true_lb; /* as conversion base will be NULL the first address is true_lb */
|
||||||
|
size_t max_data = 0x7FFFFFFF;
|
||||||
opal_convertor_t *pConv;
|
opal_convertor_t *pConv;
|
||||||
uint32_t iov_count;
|
|
||||||
struct iovec iov[5];
|
|
||||||
size_t max_data = 0;
|
|
||||||
long prev = -1;
|
|
||||||
int rc;
|
|
||||||
bool monotonic = true;
|
bool monotonic = true;
|
||||||
|
uint32_t iov_count;
|
||||||
|
int rc;
|
||||||
|
|
||||||
pConv = opal_convertor_create( opal_local_arch, 0 );
|
pConv = opal_convertor_create( opal_local_arch, 0 );
|
||||||
if (OPAL_UNLIKELY(NULL == pConv)) {
|
if (OPAL_UNLIKELY(NULL == pConv)) {
|
||||||
return 0;
|
return -1;
|
||||||
}
|
}
|
||||||
rc = opal_convertor_prepare_for_send( pConv, type, 1, NULL );
|
rc = opal_convertor_prepare_for_send( pConv, type, 1, NULL );
|
||||||
if( OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
if( OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||||
OBJ_RELEASE(pConv);
|
OBJ_RELEASE(pConv);
|
||||||
return 0;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
iov_count = 5;
|
iov_count = OPAL_DATATYPE_MAX_MONOTONIC_IOVEC;
|
||||||
rc = opal_convertor_raw( pConv, iov, &iov_count, &max_data);
|
rc = opal_convertor_raw( pConv, iov, &iov_count, &max_data);
|
||||||
for (uint32_t i=0; i<iov_count; i++) {
|
for (uint32_t i = 0; i < iov_count; i++) {
|
||||||
if ((long)iov[i].iov_base < prev) {
|
if ((ptrdiff_t)iov[i].iov_base < upper_limit) {
|
||||||
monotonic = false;
|
monotonic = false;
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
prev = (long)iov[i].iov_base;
|
/* The new upper bound is at the end of the iovec */
|
||||||
|
upper_limit = (ptrdiff_t)iov[i].iov_base + iov[i].iov_len;
|
||||||
}
|
}
|
||||||
} while (rc != 1);
|
} while (rc != 1);
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||||
@ -30,32 +30,19 @@
|
|||||||
#include "opal/datatype/opal_convertor.h"
|
#include "opal/datatype/opal_convertor.h"
|
||||||
#include "opal/datatype/opal_datatype_internal.h"
|
#include "opal/datatype/opal_datatype_internal.h"
|
||||||
|
|
||||||
#define SET_EMPTY_ELEMENT( ELEM ) \
|
|
||||||
do { \
|
|
||||||
ddt_elem_desc_t* _elem = (ELEM); \
|
|
||||||
_elem->common.flags = OPAL_DATATYPE_FLAG_BASIC; \
|
|
||||||
_elem->common.type = OPAL_DATATYPE_LOOP; \
|
|
||||||
_elem->count = 0; \
|
|
||||||
_elem->disp = 0; \
|
|
||||||
_elem->extent = 0; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
static int32_t
|
static int32_t
|
||||||
opal_datatype_optimize_short( opal_datatype_t* pData,
|
opal_datatype_optimize_short( opal_datatype_t* pData,
|
||||||
size_t count,
|
size_t count,
|
||||||
dt_type_desc_t* pTypeDesc )
|
dt_type_desc_t* pTypeDesc )
|
||||||
{
|
{
|
||||||
dt_elem_desc_t* pElemDesc;
|
dt_elem_desc_t* pElemDesc;
|
||||||
ddt_elem_desc_t opt_elem;
|
dt_stack_t *pOrigStack, *pStack; /* pointer to the position on the stack */
|
||||||
dt_stack_t* pOrigStack;
|
int32_t pos_desc = 0; /* actual position in the description of the derived datatype */
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
int32_t stack_pos = 0;
|
||||||
int32_t pos_desc = 0; /* actual position in the description of the derived datatype */
|
int32_t nbElems = 0;
|
||||||
int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1;
|
ptrdiff_t total_disp = 0;
|
||||||
int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity;
|
ddt_elem_desc_t last = {.common.flags = 0xFFFF /* all on */, .count = 0, .disp = 0}, compress;
|
||||||
ptrdiff_t total_disp = 0, last_extent = 1, last_disp = 0;
|
ddt_elem_desc_t* current;
|
||||||
uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */
|
|
||||||
uint32_t i;
|
|
||||||
size_t last_length = 0;
|
|
||||||
|
|
||||||
pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) );
|
pOrigStack = pStack = (dt_stack_t*)malloc( sizeof(dt_stack_t) * (pData->loops+2) );
|
||||||
SAVE_STACK( pStack, -1, 0, count, 0 );
|
SAVE_STACK( pStack, -1, 0, count, 0 );
|
||||||
@ -64,22 +51,17 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
|
|||||||
pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length );
|
pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length );
|
||||||
pTypeDesc->used = 0;
|
pTypeDesc->used = 0;
|
||||||
|
|
||||||
SET_EMPTY_ELEMENT( &opt_elem );
|
|
||||||
assert( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type );
|
assert( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type );
|
||||||
opt_elem.common.type = OPAL_DATATYPE_LOOP;
|
|
||||||
opt_elem.common.flags = 0xFFFF; /* keep all for the first datatype */
|
|
||||||
opt_elem.count = 0;
|
|
||||||
opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp;
|
|
||||||
opt_elem.extent = 0;
|
|
||||||
|
|
||||||
while( stack_pos >= 0 ) {
|
while( stack_pos >= 0 ) {
|
||||||
if( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
|
if( OPAL_DATATYPE_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||||
ddt_endloop_desc_t* end_loop = &(pData->desc.desc[pos_desc].end_loop);
|
ddt_endloop_desc_t* end_loop = &(pData->desc.desc[pos_desc].end_loop);
|
||||||
if( last_length != 0 ) {
|
if( 0 != last.count ) {
|
||||||
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent );
|
CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC,
|
||||||
|
last.blocklen, last.count, last.disp, last.extent );
|
||||||
pElemDesc++; nbElems++;
|
pElemDesc++; nbElems++;
|
||||||
last_disp += last_length;
|
last.disp += last.count;
|
||||||
last_length = 0;
|
last.count= 0;
|
||||||
}
|
}
|
||||||
CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */
|
CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */
|
||||||
end_loop->first_elem_disp, end_loop->size, end_loop->common.flags );
|
end_loop->first_elem_disp, end_loop->size, end_loop->common.flags );
|
||||||
@ -97,153 +79,146 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
|
|||||||
ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
|
ddt_loop_desc_t* loop = (ddt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
|
||||||
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
|
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
|
||||||
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
|
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
|
||||||
ptrdiff_t loop_disp = pData->desc.desc[pos_desc + index].elem.disp;
|
|
||||||
|
|
||||||
continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
|
|
||||||
== (total_disp + loop_disp));
|
|
||||||
if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
if( loop->common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||||
/* the loop is contiguous or composed by contiguous elements with a gap */
|
assert(pData->desc.desc[pos_desc + index].elem.disp == end_loop->first_elem_disp);
|
||||||
if( loop->extent == (ptrdiff_t)end_loop->size ) {
|
compress.common.flags = loop->common.flags;
|
||||||
/* the whole loop is contiguous */
|
compress.common.type = pData->desc.desc[pos_desc + index].elem.common.type;
|
||||||
if( !continuity ) {
|
compress.blocklen = pData->desc.desc[pos_desc + index].elem.blocklen;
|
||||||
if( 0 != last_length ) {
|
for( uint32_t i = index+1; i < loop->items; i++ ) {
|
||||||
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC,
|
current = &pData->desc.desc[pos_desc + i].elem;
|
||||||
last_length, last_disp, last_extent );
|
assert(1 == current->count);
|
||||||
pElemDesc++; nbElems++;
|
if( (current->common.type == OPAL_DATATYPE_LOOP) ||
|
||||||
last_length = 0;
|
compress.common.type != current->common.type ) {
|
||||||
}
|
compress.common.type = OPAL_DATATYPE_UINT1;
|
||||||
last_disp = total_disp + loop_disp;
|
compress.blocklen = end_loop->size;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
last_length = (last_length * opal_datatype_basicDatatypes[last_type]->size
|
compress.blocklen += current->blocklen;
|
||||||
+ loop->loops * end_loop->size);
|
}
|
||||||
last_type = OPAL_DATATYPE_UINT1;
|
compress.count = loop->loops;
|
||||||
last_extent = 1;
|
compress.extent = loop->extent;
|
||||||
} else {
|
compress.disp = end_loop->first_elem_disp;
|
||||||
int counter = loop->loops;
|
|
||||||
ptrdiff_t merged_disp = 0;
|
/**
|
||||||
/* if the previous data is contiguous with this piece and it has a length not ZERO */
|
* The current loop has been compressed and can now be treated as if it
|
||||||
if( last_length != 0 ) {
|
* was a data element. We can now look if it can be fused with last,
|
||||||
if( continuity ) {
|
* as done in the fusion of 2 elements below. Let's use the same code.
|
||||||
last_length *= opal_datatype_basicDatatypes[last_type]->size;
|
*/
|
||||||
last_length += end_loop->size;
|
pos_desc += loop->items + 1;
|
||||||
last_type = OPAL_DATATYPE_UINT1;
|
current = &compress;
|
||||||
last_extent = 1;
|
goto fuse_loops;
|
||||||
counter--;
|
}
|
||||||
merged_disp = loop->extent; /* merged loop, update the disp of the remaining elems */
|
|
||||||
}
|
/**
|
||||||
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC,
|
* If the content of the loop is not contiguous there is little we can do
|
||||||
last_length, last_disp, last_extent );
|
* that would not incur significant optimization cost and still be beneficial
|
||||||
pElemDesc++; nbElems++;
|
* in reducing the number of memcpy during pack/unpack.
|
||||||
last_disp += last_length;
|
*/
|
||||||
last_length = 0;
|
|
||||||
last_type = OPAL_DATATYPE_LOOP;
|
if( 0 != last.count ) { /* Generate the pending element */
|
||||||
}
|
CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC,
|
||||||
/**
|
last.blocklen, last.count, last.disp, last.extent );
|
||||||
* The content of the loop is contiguous (maybe with a gap before or after).
|
pElemDesc++; nbElems++;
|
||||||
*
|
last.count = 0;
|
||||||
* If any of the loops have been merged with the previous element, then the
|
last.common.type = OPAL_DATATYPE_LOOP;
|
||||||
* displacement of the first element (or the displacement of all elements if the
|
}
|
||||||
* loop will be removed) must be updated accordingly.
|
|
||||||
*/
|
/* Can we unroll the loop? */
|
||||||
if( counter <= 2 ) {
|
if( (loop->items <= 3) && (loop->loops <= 2) ) {
|
||||||
merged_disp += end_loop->first_elem_disp;
|
ptrdiff_t elem_displ = 0;
|
||||||
while( counter > 0 ) {
|
for( uint32_t i = 0; i < loop->loops; i++ ) {
|
||||||
CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC,
|
for( uint32_t j = 0; j < (loop->items - 1); j++ ) {
|
||||||
end_loop->size, merged_disp, 1);
|
current = &pData->desc.desc[pos_desc + index + j].elem;
|
||||||
pElemDesc++; nbElems++; counter--;
|
CREATE_ELEM( pElemDesc, current->common.type, current->common.flags,
|
||||||
merged_disp += loop->extent;
|
current->blocklen, current->count, current->disp + elem_displ, current->extent );
|
||||||
}
|
|
||||||
} else {
|
|
||||||
CREATE_LOOP_START( pElemDesc, counter, 2, loop->extent, loop->common.flags );
|
|
||||||
pElemDesc++; nbElems++;
|
|
||||||
CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC,
|
|
||||||
end_loop->size, loop_disp, 1);
|
|
||||||
pElemDesc++; nbElems++;
|
|
||||||
CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp + merged_disp,
|
|
||||||
end_loop->size, end_loop->common.flags );
|
|
||||||
pElemDesc++; nbElems++;
|
pElemDesc++; nbElems++;
|
||||||
}
|
}
|
||||||
|
elem_displ += loop->extent;
|
||||||
}
|
}
|
||||||
pos_desc += loop->items + 1;
|
pos_desc += loop->items + 1;
|
||||||
} else {
|
goto complete_loop;
|
||||||
ddt_elem_desc_t* elem = (ddt_elem_desc_t*)&(pData->desc.desc[pos_desc+1]);
|
|
||||||
if( last_length != 0 ) {
|
|
||||||
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent );
|
|
||||||
pElemDesc++; nbElems++;
|
|
||||||
last_disp += last_length;
|
|
||||||
last_length = 0;
|
|
||||||
last_type = OPAL_DATATYPE_LOOP;
|
|
||||||
}
|
|
||||||
if( 2 == loop->items ) { /* small loop */
|
|
||||||
if( (1 == elem->count)
|
|
||||||
&& (elem->extent == (ptrdiff_t)opal_datatype_basicDatatypes[elem->common.type]->size) ) {
|
|
||||||
CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~OPAL_DATATYPE_FLAG_CONTIGUOUS,
|
|
||||||
loop->loops, elem->disp, loop->extent );
|
|
||||||
pElemDesc++; nbElems++;
|
|
||||||
pos_desc += loop->items + 1;
|
|
||||||
goto complete_loop;
|
|
||||||
} else if( loop->loops < 3 ) {
|
|
||||||
ptrdiff_t elem_displ = elem->disp;
|
|
||||||
for( i = 0; i < loop->loops; i++ ) {
|
|
||||||
CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags,
|
|
||||||
elem->count, elem_displ, elem->extent );
|
|
||||||
elem_displ += loop->extent;
|
|
||||||
pElemDesc++; nbElems++;
|
|
||||||
}
|
|
||||||
pos_desc += loop->items + 1;
|
|
||||||
goto complete_loop;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags );
|
|
||||||
pElemDesc++; nbElems++;
|
|
||||||
PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp );
|
|
||||||
pos_desc++;
|
|
||||||
DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags );
|
||||||
|
pElemDesc++; nbElems++;
|
||||||
|
PUSH_STACK( pStack, stack_pos, nbElems, OPAL_DATATYPE_LOOP, loop->loops, total_disp );
|
||||||
|
pos_desc++;
|
||||||
|
DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" );
|
||||||
|
|
||||||
complete_loop:
|
complete_loop:
|
||||||
total_disp = pStack->disp; /* update the displacement */
|
total_disp = pStack->disp; /* update the displacement */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
|
while( pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) { /* go over all basic datatype elements */
|
||||||
/* now here we have a basic datatype */
|
current = &pData->desc.desc[pos_desc].elem;
|
||||||
type = pData->desc.desc[pos_desc].elem.common.type;
|
pos_desc++; /* point to the next element as current points to the current one */
|
||||||
continuity = ((last_disp + (ptrdiff_t)last_length * (ptrdiff_t)opal_datatype_basicDatatypes[last_type]->size)
|
|
||||||
== (total_disp + pData->desc.desc[pos_desc].elem.disp));
|
|
||||||
|
|
||||||
if( (pData->desc.desc[pos_desc].elem.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && continuity &&
|
fuse_loops:
|
||||||
(pData->desc.desc[pos_desc].elem.extent == (int32_t)opal_datatype_basicDatatypes[type]->size) ) {
|
if( 0 == last.count ) { /* first data of the datatype */
|
||||||
if( type == last_type ) {
|
last = *current;
|
||||||
last_length += pData->desc.desc[pos_desc].elem.count;
|
continue; /* next data */
|
||||||
last_extent = pData->desc.desc[pos_desc].elem.extent;
|
|
||||||
} else {
|
|
||||||
if( last_length == 0 ) {
|
|
||||||
last_type = type;
|
|
||||||
last_length = pData->desc.desc[pos_desc].elem.count;
|
|
||||||
last_extent = pData->desc.desc[pos_desc].elem.extent;
|
|
||||||
} else {
|
|
||||||
last_length = last_length * opal_datatype_basicDatatypes[last_type]->size +
|
|
||||||
pData->desc.desc[pos_desc].elem.count * opal_datatype_basicDatatypes[type]->size;
|
|
||||||
last_type = OPAL_DATATYPE_UINT1;
|
|
||||||
last_extent = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
last_flags &= pData->desc.desc[pos_desc].elem.common.flags;
|
|
||||||
} else {
|
|
||||||
if( last_length != 0 ) {
|
|
||||||
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent );
|
|
||||||
pElemDesc++; nbElems++;
|
|
||||||
}
|
|
||||||
last_disp = total_disp + pData->desc.desc[pos_desc].elem.disp;
|
|
||||||
last_length = pData->desc.desc[pos_desc].elem.count;
|
|
||||||
last_extent = pData->desc.desc[pos_desc].elem.extent;
|
|
||||||
last_type = type;
|
|
||||||
}
|
}
|
||||||
pos_desc++; /* advance to the next data */
|
|
||||||
|
/* are the two elements compatible: aka they have very similar values and they
|
||||||
|
* can be merged together by increasing the count. This optimizes the memory
|
||||||
|
* required for storing the datatype description.
|
||||||
|
*/
|
||||||
|
if( ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
|
||||||
|
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size)) &&
|
||||||
|
(current->disp == (last.disp + (ptrdiff_t)last.count * last.extent)) &&
|
||||||
|
((last.count == 1) || (current->count == 1) || (last.extent == current->extent)) ) {
|
||||||
|
last.count += current->count;
|
||||||
|
if( last.count == 1 ) {
|
||||||
|
last.extent = current->extent;
|
||||||
|
} /* otherwise keep the last.extent */
|
||||||
|
/* find the lowest common denomitaor type */
|
||||||
|
if( last.common.type != current->common.type ) {
|
||||||
|
last.common.type = OPAL_DATATYPE_UINT1;
|
||||||
|
last.blocklen *= opal_datatype_basicDatatypes[last.common.type]->size;
|
||||||
|
}
|
||||||
|
continue; /* next data */
|
||||||
|
}
|
||||||
|
/* are the elements fusionable such that we can fusion the last blocklen of one with the first
|
||||||
|
* blocklen of the other.
|
||||||
|
*/
|
||||||
|
if( (ptrdiff_t)(last.disp + (last.count - 1) * last.extent + last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) ==
|
||||||
|
current->disp ) {
|
||||||
|
if( last.count != 1 ) {
|
||||||
|
CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC,
|
||||||
|
last.blocklen, last.count - 1, last.disp, last.extent );
|
||||||
|
pElemDesc++; nbElems++;
|
||||||
|
last.disp += (last.count - 1) * last.extent;
|
||||||
|
last.count = 1;
|
||||||
|
}
|
||||||
|
if( last.common.type == current->common.type ) {
|
||||||
|
last.blocklen += current->blocklen;
|
||||||
|
} else {
|
||||||
|
last.blocklen = ((last.blocklen * opal_datatype_basicDatatypes[last.common.type]->size) +
|
||||||
|
(current->blocklen * opal_datatype_basicDatatypes[current->common.type]->size));
|
||||||
|
last.common.type = OPAL_DATATYPE_UINT1;
|
||||||
|
}
|
||||||
|
last.extent += current->extent;
|
||||||
|
if( current->count != 1 ) {
|
||||||
|
CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC,
|
||||||
|
last.blocklen, last.count, last.disp, last.extent );
|
||||||
|
pElemDesc++; nbElems++;
|
||||||
|
last = *current;
|
||||||
|
last.count -= 1;
|
||||||
|
last.disp += last.extent;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC,
|
||||||
|
last.blocklen, last.count, last.disp, last.extent );
|
||||||
|
pElemDesc++; nbElems++;
|
||||||
|
last = *current;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( last_length != 0 ) {
|
if( 0 != last.count ) {
|
||||||
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent );
|
CREATE_ELEM( pElemDesc, last.common.type, OPAL_DATATYPE_FLAG_BASIC,
|
||||||
|
last.blocklen, last.count, last.disp, last.extent );
|
||||||
pElemDesc++; nbElems++;
|
pElemDesc++; nbElems++;
|
||||||
}
|
}
|
||||||
/* cleanup the stack */
|
/* cleanup the stack */
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||||
@ -19,8 +19,6 @@
|
|||||||
|
|
||||||
#include "opal_config.h"
|
#include "opal_config.h"
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#if !defined(CHECKSUM) && OPAL_CUDA_SUPPORT
|
#if !defined(CHECKSUM) && OPAL_CUDA_SUPPORT
|
||||||
/* Make use of existing macro to do CUDA style memcpy */
|
/* Make use of existing macro to do CUDA style memcpy */
|
||||||
#undef MEMCPY_CSUM
|
#undef MEMCPY_CSUM
|
||||||
@ -28,75 +26,117 @@
|
|||||||
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
|
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void pack_predefined_data( opal_convertor_t* CONVERTOR,
|
static inline void
|
||||||
const dt_elem_desc_t* ELEM,
|
pack_predefined_data( opal_convertor_t* CONVERTOR,
|
||||||
size_t* COUNT,
|
const dt_elem_desc_t* ELEM,
|
||||||
unsigned char** SOURCE,
|
size_t* COUNT,
|
||||||
unsigned char** DESTINATION,
|
unsigned char** memory,
|
||||||
size_t* SPACE )
|
unsigned char** packed,
|
||||||
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
size_t _copy_count = *(COUNT);
|
|
||||||
size_t _copy_blength;
|
|
||||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
unsigned char* _source = (*SOURCE) + _elem->disp;
|
size_t total_count = _elem->count * _elem->blocklen;
|
||||||
|
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
size_t do_now, do_now_bytes;
|
||||||
|
unsigned char* _memory = (*memory) + _elem->disp;
|
||||||
|
|
||||||
_copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
||||||
if( (_copy_count * _copy_blength) > *(SPACE) ) {
|
|
||||||
_copy_count = (*(SPACE) / _copy_blength);
|
|
||||||
if( 0 == _copy_count ) return; /* nothing to do */
|
|
||||||
}
|
|
||||||
|
|
||||||
if( (ptrdiff_t)_copy_blength == _elem->extent ) {
|
if( cando_count > *(COUNT) )
|
||||||
_copy_blength *= _copy_count;
|
cando_count = *(COUNT);
|
||||||
/* the extent and the size of the basic datatype are equal */
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf,
|
/**
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
* First check if we already did something on this element ?
|
||||||
DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu\n",
|
*/
|
||||||
(void*)*(DESTINATION), (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); );
|
do_now = (total_count - *(COUNT)); /* done elements */
|
||||||
MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) );
|
if( 0 != do_now ) {
|
||||||
_source += _copy_blength;
|
do_now = do_now % _elem->blocklen; /* partial blocklen? */
|
||||||
*(DESTINATION) += _copy_blength;
|
|
||||||
} else {
|
if( 0 != do_now ) {
|
||||||
for(size_t _i = 0; _i < _copy_count; _i++ ) {
|
size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf,
|
do_now = (left_in_block > cando_count ) ? cando_count : left_in_block;
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
|
||||||
(void*)*(DESTINATION), (void*)_source, (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); );
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
MEMCPY_CSUM( *(DESTINATION), _source, _copy_blength, (CONVERTOR) );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
*(DESTINATION) += _copy_blength;
|
DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n",
|
||||||
_source += _elem->extent;
|
(void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) );
|
||||||
|
_memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes;
|
||||||
|
/* compensate if we just completed a blocklen */
|
||||||
|
if( do_now == left_in_block )
|
||||||
|
_memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size);
|
||||||
|
*(packed) += do_now_bytes;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= do_now;
|
||||||
|
cando_count -= do_now;
|
||||||
}
|
}
|
||||||
_copy_blength *= _copy_count;
|
|
||||||
}
|
}
|
||||||
*(SOURCE) = _source - _elem->disp;
|
|
||||||
*(SPACE) -= _copy_blength;
|
/**
|
||||||
*(COUNT) -= _copy_count;
|
* Compute how many full blocklen we need to do and do them.
|
||||||
|
*/
|
||||||
|
do_now = cando_count / _elem->blocklen;
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
for(size_t _i = 0; _i < do_now; _i++ ) {
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
|
(void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); );
|
||||||
|
MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) );
|
||||||
|
*(packed) += do_now_bytes;
|
||||||
|
_memory += _elem->extent;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= _elem->blocklen;
|
||||||
|
cando_count -= _elem->blocklen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As an epilog do anything left from the last blocklen.
|
||||||
|
*/
|
||||||
|
do_now = cando_count;
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
|
||||||
|
(void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) );
|
||||||
|
_memory += do_now_bytes;
|
||||||
|
*(packed) += do_now_bytes;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= do_now;
|
||||||
|
}
|
||||||
|
|
||||||
|
*(memory) = _memory - _elem->disp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
||||||
const dt_elem_desc_t* ELEM,
|
const dt_elem_desc_t* ELEM,
|
||||||
size_t* COUNT,
|
size_t* COUNT,
|
||||||
unsigned char** SOURCE,
|
unsigned char** memory,
|
||||||
unsigned char** DESTINATION,
|
unsigned char** packed,
|
||||||
size_t* SPACE )
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||||
const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||||
unsigned char* _source = (*SOURCE) + _end_loop->first_elem_disp;
|
unsigned char* _memory = (*memory) + _end_loop->first_elem_disp;
|
||||||
size_t _copy_loops = *(COUNT);
|
size_t _copy_loops = *(COUNT);
|
||||||
|
|
||||||
if( (_copy_loops * _end_loop->size) > *(SPACE) )
|
if( (_copy_loops * _end_loop->size) > *(SPACE) )
|
||||||
_copy_loops = (*(SPACE) / _end_loop->size);
|
_copy_loops = (*(SPACE) / _end_loop->size);
|
||||||
for(size_t _i = 0; _i < _copy_loops; _i++ ) {
|
for(size_t _i = 0; _i < _copy_loops; _i++ ) {
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, _end_loop->size, (CONVERTOR)->pBaseBuf,
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, _end_loop->size, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu\n",
|
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
(void*)*(DESTINATION), (void*)_source, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); );
|
(void*)*(packed), (void*)_memory, (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); );
|
||||||
MEMCPY_CSUM( *(DESTINATION), _source, _end_loop->size, (CONVERTOR) );
|
MEMCPY_CSUM( *(packed), _memory, _end_loop->size, (CONVERTOR) );
|
||||||
*(DESTINATION) += _end_loop->size;
|
*(packed) += _end_loop->size;
|
||||||
_source += _loop->extent;
|
_memory += _loop->extent;
|
||||||
}
|
}
|
||||||
*(SOURCE) = _source - _end_loop->first_elem_disp;
|
*(memory) = _memory - _end_loop->first_elem_disp;
|
||||||
*(SPACE) -= _copy_loops * _end_loop->size;
|
*(SPACE) -= _copy_loops * _end_loop->size;
|
||||||
*(COUNT) -= _copy_loops;
|
*(COUNT) -= _copy_loops;
|
||||||
}
|
}
|
||||||
@ -104,12 +144,12 @@ static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
|||||||
#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \
|
#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \
|
||||||
ELEM, /* the basic element to be packed */ \
|
ELEM, /* the basic element to be packed */ \
|
||||||
COUNT, /* the number of elements */ \
|
COUNT, /* the number of elements */ \
|
||||||
SOURCE, /* the source pointer (char*) */ \
|
MEMORY, /* the source pointer (char*) */ \
|
||||||
DESTINATION, /* the destination pointer (char*) */ \
|
PACKED, /* the destination pointer (char*) */ \
|
||||||
SPACE ) /* the space in the destination buffer */ \
|
SPACE ) /* the space in the destination buffer */ \
|
||||||
pack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
pack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) )
|
||||||
|
|
||||||
#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, MEMORY, PACKED, SPACE ) \
|
||||||
pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(MEMORY), &(PACKED), &(SPACE) )
|
||||||
|
|
||||||
#endif /* OPAL_DATATYPE_PACK_H_HAS_BEEN_INCLUDED */
|
#endif /* OPAL_DATATYPE_PACK_H_HAS_BEEN_INCLUDED */
|
||||||
|
@ -61,22 +61,77 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
unsigned char** POINTER,
|
unsigned char** POINTER,
|
||||||
size_t* SPACE )
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
size_t _copy_count = *(COUNT);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
size_t _copy_blength;
|
size_t total_count = _elem->count * _elem->blocklen;
|
||||||
ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
size_t do_now, do_now_bytes;
|
||||||
|
unsigned char* _memory = (*POINTER) + _elem->disp;
|
||||||
|
|
||||||
_copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
||||||
if( (_copy_count * _copy_blength) > *(SPACE) ) {
|
|
||||||
_copy_count = *(SPACE) / _copy_blength;
|
if( cando_count > *(COUNT) )
|
||||||
if( 0 == _copy_count ) return; /* nothing to do */
|
cando_count = *(COUNT);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* First check if we already did something on this element ?
|
||||||
|
*/
|
||||||
|
do_now = (total_count - *(COUNT)); /* done elements */
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
do_now = do_now % _elem->blocklen; /* partial blocklen? */
|
||||||
|
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */
|
||||||
|
do_now = (left_in_block > cando_count ) ? cando_count : left_in_block;
|
||||||
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n",
|
||||||
|
(void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
_memory = *(POINTER) + _elem->disp + (ptrdiff_t)do_now_bytes;
|
||||||
|
/* compensate if we just completed a blocklen */
|
||||||
|
if( do_now == left_in_block )
|
||||||
|
_memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size);
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= do_now;
|
||||||
|
cando_count -= do_now;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_copy_blength *= _copy_count;
|
|
||||||
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _elem->disp, _copy_blength, (CONVERTOR)->pBaseBuf,
|
/**
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
* Compute how many full blocklen we need to do and do them.
|
||||||
*(POINTER) += (_copy_count * _elem->extent);
|
*/
|
||||||
*(SPACE) -= _copy_blength;
|
do_now = cando_count / _elem->blocklen;
|
||||||
*(COUNT) -= _copy_count;
|
if( 0 != do_now ) {
|
||||||
|
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
for(size_t _i = 0; _i < do_now; _i++ ) {
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu\n",
|
||||||
|
(void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); );
|
||||||
|
_memory += _elem->extent;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= _elem->blocklen;
|
||||||
|
cando_count -= _elem->blocklen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As an epilog do anything left from the last blocklen.
|
||||||
|
*/
|
||||||
|
do_now = cando_count;
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [epilog]\n",
|
||||||
|
(void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
_memory += do_now_bytes;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= do_now;
|
||||||
|
}
|
||||||
|
|
||||||
|
*(POINTER) = _memory - _elem->disp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -128,8 +183,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
|||||||
|
|
||||||
/* We dont want to have to parse the datatype multiple times. What we are interested in
|
/* We dont want to have to parse the datatype multiple times. What we are interested in
|
||||||
* here is to compute the number of completed datatypes that we can move forward, update
|
* here is to compute the number of completed datatypes that we can move forward, update
|
||||||
* the counters and finally compute the position taking in account only the remaining
|
* the counters and compute the position taking in account only the remaining elements.
|
||||||
* elements. The only problem is that we have to modify all the elements on the stack.
|
* The only problem is that we have to modify all the elements on the stack.
|
||||||
*/
|
*/
|
||||||
iov_len_local = *position - pConvertor->bConverted;
|
iov_len_local = *position - pConvertor->bConverted;
|
||||||
if( iov_len_local > pConvertor->pDesc->size ) {
|
if( iov_len_local > pConvertor->pDesc->size ) {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||||
@ -27,83 +27,124 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the convertor */
|
unpack_predefined_data( opal_convertor_t* CONVERTOR,
|
||||||
const dt_elem_desc_t* ELEM, /* the element description */
|
const dt_elem_desc_t* ELEM,
|
||||||
size_t* COUNT, /* the number of elements */
|
size_t* COUNT,
|
||||||
unsigned char** SOURCE, /* the source pointer */
|
unsigned char** packed,
|
||||||
unsigned char** DESTINATION, /* the destination pointer */
|
unsigned char** memory,
|
||||||
size_t* SPACE ) /* the space in the destination buffer */
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
size_t _copy_count = *(COUNT);
|
|
||||||
size_t _copy_blength;
|
|
||||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
unsigned char* _destination = (*DESTINATION) + _elem->disp;
|
size_t total_count = _elem->count * _elem->blocklen;
|
||||||
|
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
size_t do_now, do_now_bytes;
|
||||||
|
unsigned char* _memory = (*memory) + _elem->disp;
|
||||||
|
|
||||||
_copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
||||||
if( (_copy_count * _copy_blength) > *(SPACE) ) {
|
|
||||||
_copy_count = (*(SPACE) / _copy_blength);
|
|
||||||
if( 0 == _copy_count ) return; /* nothing to do */
|
|
||||||
}
|
|
||||||
|
|
||||||
if( (ptrdiff_t)_copy_blength == _elem->extent ) {
|
if( cando_count > *(COUNT) )
|
||||||
_copy_blength *= _copy_count;
|
cando_count = *(COUNT);
|
||||||
/* the extent and the size of the basic datatype are equal */
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf,
|
/**
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
* First check if we already did something on this element ?
|
||||||
DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu\n",
|
*/
|
||||||
(void*)_destination, (void*)*(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE)) ); );
|
do_now = (total_count - *(COUNT)); /* done elements */
|
||||||
MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) );
|
if( 0 != do_now ) {
|
||||||
*(SOURCE) += _copy_blength;
|
do_now = do_now % _elem->blocklen; /* partial blocklen? */
|
||||||
_destination += _copy_blength;
|
|
||||||
} else {
|
if( 0 != do_now ) {
|
||||||
for(size_t _i = 0; _i < _copy_count; _i++ ) {
|
size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf,
|
do_now = (left_in_block > cando_count ) ? cando_count : left_in_block;
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
|
||||||
(void*)_destination, (void*)*(SOURCE), (unsigned long)_copy_blength, (unsigned long)(*(SPACE) - (_i * _copy_blength)) ); );
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
MEMCPY_CSUM( _destination, *(SOURCE), _copy_blength, (CONVERTOR) );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
*(SOURCE) += _copy_blength;
|
DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n",
|
||||||
_destination += _elem->extent;
|
(void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) );
|
||||||
|
_memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes;
|
||||||
|
/* compensate if we just completed a blocklen */
|
||||||
|
if( do_now == left_in_block )
|
||||||
|
_memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size);
|
||||||
|
*(packed) += do_now_bytes;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= do_now;
|
||||||
|
cando_count -= do_now;
|
||||||
}
|
}
|
||||||
_copy_blength *= _copy_count;
|
|
||||||
}
|
}
|
||||||
(*DESTINATION) = _destination - _elem->disp;
|
|
||||||
*(SPACE) -= _copy_blength;
|
/**
|
||||||
*(COUNT) -= _copy_count;
|
* Compute how many full blocklen we need to do and do them.
|
||||||
|
*/
|
||||||
|
do_now = cando_count / _elem->blocklen;
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
for(size_t _i = 0; _i < do_now; _i++ ) {
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
|
(void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); );
|
||||||
|
MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) );
|
||||||
|
*(packed) += do_now_bytes;
|
||||||
|
_memory += _elem->extent;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= _elem->blocklen;
|
||||||
|
cando_count -= _elem->blocklen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* As an epilog do anything left from the last blocklen.
|
||||||
|
*/
|
||||||
|
do_now = cando_count;
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
|
||||||
|
(void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) );
|
||||||
|
_memory += do_now_bytes;
|
||||||
|
*(packed) += do_now_bytes;
|
||||||
|
*(SPACE) -= do_now_bytes;
|
||||||
|
*(COUNT) -= do_now;
|
||||||
|
}
|
||||||
|
|
||||||
|
*(memory) = _memory - _elem->disp;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
||||||
const dt_elem_desc_t* ELEM,
|
const dt_elem_desc_t* ELEM,
|
||||||
size_t* COUNT,
|
size_t* COUNT,
|
||||||
unsigned char** SOURCE,
|
unsigned char** packed,
|
||||||
unsigned char** DESTINATION,
|
unsigned char** memory,
|
||||||
size_t* SPACE )
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||||
const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||||
unsigned char* _destination = (*DESTINATION) + _end_loop->first_elem_disp;
|
unsigned char* _memory = (*memory) + _end_loop->first_elem_disp;
|
||||||
size_t _copy_loops = *(COUNT);
|
size_t _copy_loops = *(COUNT);
|
||||||
|
|
||||||
if( (_copy_loops * _end_loop->size) > *(SPACE) )
|
if( (_copy_loops * _end_loop->size) > *(SPACE) )
|
||||||
_copy_loops = (*(SPACE) / _end_loop->size);
|
_copy_loops = (*(SPACE) / _end_loop->size);
|
||||||
for(size_t _i = 0; _i < _copy_loops; _i++ ) {
|
for(size_t _i = 0; _i < _copy_loops; _i++ ) {
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _destination, _end_loop->size, (CONVERTOR)->pBaseBuf,
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, _end_loop->size, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu\n",
|
DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
(void*)_destination, (void*)*(SOURCE), (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); );
|
(void*)_memory, (void*)*(packed), (unsigned long)_end_loop->size, (unsigned long)(*(SPACE) - _i * _end_loop->size) ); );
|
||||||
MEMCPY_CSUM( _destination, *(SOURCE), _end_loop->size, (CONVERTOR) );
|
MEMCPY_CSUM( _memory, *(packed), _end_loop->size, (CONVERTOR) );
|
||||||
*(SOURCE) += _end_loop->size;
|
*(packed) += _end_loop->size;
|
||||||
_destination += _loop->extent;
|
_memory += _loop->extent;
|
||||||
}
|
}
|
||||||
*(DESTINATION) = _destination - _end_loop->first_elem_disp;
|
*(memory) = _memory - _end_loop->first_elem_disp;
|
||||||
*(SPACE) -= _copy_loops * _end_loop->size;
|
*(SPACE) -= _copy_loops * _end_loop->size;
|
||||||
*(COUNT) -= _copy_loops;
|
*(COUNT) -= _copy_loops;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, PACKED, MEMORY, SPACE ) \
|
||||||
unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) )
|
||||||
|
|
||||||
#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, PACKED, MEMORY, SPACE ) \
|
||||||
unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(PACKED), &(MEMORY), &(SPACE) )
|
||||||
|
|
||||||
#endif /* OPAL_DATATYPE_UNPACK_H_HAS_BEEN_INCLUDED */
|
#endif /* OPAL_DATATYPE_UNPACK_H_HAS_BEEN_INCLUDED */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user