The shiny new pack and unpack functions. The big difference is that the displacement is
never stored on the stack. It is partially stored on the stack depending on the loops but every time we pack/unpack a basic datatype we take in account again it's displacement. This approach make the whole logic a lot simpler. In same time I split the big functions in several basic block. This commit was SVN r8021.
Этот коммит содержится в:
родитель
334ca349fe
Коммит
8799d1799a
@ -44,54 +44,56 @@ static int ompi_pack_debug=0;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \
|
#define PACK_PREDEFINED_DATATYPE( CONVERTOR, /* the convertor */ \
|
||||||
TYPE, /* the basic type to be packed */ \
|
ELEM, /* the basic element to be packed */ \
|
||||||
COUNT, /* the number of elements */ \
|
COUNT, /* the number of elements */ \
|
||||||
EXTENT, /* the extent in bytes of each element */ \
|
|
||||||
SOURCE, /* the source pointer (char*) */ \
|
SOURCE, /* the source pointer (char*) */ \
|
||||||
DESTINATION, /* the destination pointer (char*) */ \
|
DESTINATION, /* the destination pointer (char*) */ \
|
||||||
SPACE ) /* the space in the destination buffer */ \
|
SPACE ) /* the space in the destination buffer */ \
|
||||||
pack_predefined_data( (CONVERTOR), (TYPE), &(COUNT), (EXTENT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
pack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
||||||
|
|
||||||
static inline void pack_predefined_data( ompi_convertor_t* CONVERTOR,
|
static inline void pack_predefined_data( ompi_convertor_t* CONVERTOR,
|
||||||
uint16_t TYPE,
|
dt_elem_desc_t* ELEM,
|
||||||
uint32_t* COUNT,
|
uint32_t* COUNT,
|
||||||
long EXTENT,
|
|
||||||
char** SOURCE,
|
char** SOURCE,
|
||||||
char** DESTINATION,
|
char** DESTINATION,
|
||||||
uint32_t* SPACE )
|
uint32_t* SPACE )
|
||||||
{
|
{
|
||||||
uint32_t _copy_count = *(COUNT), _copy_blength;
|
uint32_t _copy_count = *(COUNT), _copy_blength;
|
||||||
|
ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
|
char* _source = (*SOURCE) + _elem->disp;
|
||||||
|
|
||||||
if( (_copy_count * ompi_ddt_basicDatatypes[(TYPE)]->size) > *(SPACE) ) {
|
_copy_blength = ompi_ddt_basicDatatypes[_elem->common.type]->size;
|
||||||
_copy_count = *(SPACE) / ompi_ddt_basicDatatypes[(TYPE)]->size;
|
if( (_copy_count * _copy_blength) > *(SPACE) ) {
|
||||||
|
_copy_count = *(SPACE) / _copy_blength;
|
||||||
if( 0 == _copy_count ) return; /* nothing to do */
|
if( 0 == _copy_count ) return; /* nothing to do */
|
||||||
}
|
}
|
||||||
_copy_blength = _copy_count * ompi_ddt_basicDatatypes[(TYPE)]->size;
|
|
||||||
|
|
||||||
if( ompi_ddt_basicDatatypes[(TYPE)]->size == (uint32_t)(EXTENT) ) {
|
if( _copy_blength == (uint32_t)_elem->extent ) {
|
||||||
|
_copy_blength *= _copy_count;
|
||||||
/* the extent and the size of the basic datatype are equals */
|
/* the extent and the size of the basic datatype are equals */
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( *(SOURCE), _copy_blength, (CONVERTOR)->pBaseBuf,
|
OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %ld ) => space %d\n",
|
DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %ld ) => space %d\n",
|
||||||
*(DESTINATION), *(SOURCE), _copy_blength, *(SPACE) ); );
|
*(DESTINATION), _source, _copy_blength, *(SPACE) ); );
|
||||||
MEMCPY( *(DESTINATION), *(SOURCE), _copy_blength );
|
MEMCPY( *(DESTINATION), _source, _copy_blength );
|
||||||
*(SOURCE) += _copy_blength;
|
_source += _copy_blength;
|
||||||
*(DESTINATION) += _copy_blength;
|
*(DESTINATION) += _copy_blength;
|
||||||
} else {
|
} else {
|
||||||
uint32_t _i;
|
uint32_t _i;
|
||||||
for( _i = 0; _i < _copy_count; _i++ ) {
|
for( _i = 0; _i < _copy_count; _i++ ) {
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( *(SOURCE), _copy_blength, (CONVERTOR)->pBaseBuf,
|
OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %ld ) => space %d\n",
|
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %ld ) => space %d\n",
|
||||||
*(DESTINATION), *(SOURCE), _copy_blength,
|
*(DESTINATION), _source, _copy_blength, *(SPACE) - (_i * _copy_blength) ); );
|
||||||
*(SPACE) - (_i * ompi_ddt_basicDatatypes[(TYPE)]->size) ); );
|
MEMCPY( *(DESTINATION), _source, _copy_blength );
|
||||||
MEMCPY( *(DESTINATION), *(SOURCE), ompi_ddt_basicDatatypes[(TYPE)]->size );
|
*(DESTINATION) += _copy_blength;
|
||||||
*(DESTINATION) += ompi_ddt_basicDatatypes[(TYPE)]->size;
|
_source += _elem->extent;
|
||||||
*(SOURCE) += (EXTENT);
|
|
||||||
}
|
}
|
||||||
|
_copy_blength *= _copy_count;
|
||||||
}
|
}
|
||||||
*(SPACE) -= _copy_blength;
|
*(SOURCE) = _source - _elem->disp;
|
||||||
*(COUNT) -= _copy_count;
|
*(SPACE) -= _copy_blength;
|
||||||
|
*(COUNT) -= _copy_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void pack_contiguous_loop( ompi_convertor_t* CONVERTOR,
|
static inline void pack_contiguous_loop( ompi_convertor_t* CONVERTOR,
|
||||||
@ -101,35 +103,35 @@ static inline void pack_contiguous_loop( ompi_convertor_t* CONVERTOR,
|
|||||||
char** DESTINATION,
|
char** DESTINATION,
|
||||||
uint32_t* SPACE )
|
uint32_t* SPACE )
|
||||||
{
|
{
|
||||||
ddt_loop_desc_t *loop = (ddt_loop_desc_t*)(ELEM);
|
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||||
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items);
|
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||||
|
char* _source = (*SOURCE) + _end_loop->first_elem_disp;
|
||||||
size_t _copy_loops = *(COUNT);
|
size_t _copy_loops = *(COUNT);
|
||||||
uint32_t _i;
|
uint32_t _i;
|
||||||
|
|
||||||
if( (_copy_loops * end_loop->size) > *(SPACE) )
|
if( (_copy_loops * _end_loop->size) > *(SPACE) )
|
||||||
_copy_loops = *(SPACE) / end_loop->size;
|
_copy_loops = *(SPACE) / _end_loop->size;
|
||||||
for( _i = 0; _i < _copy_loops; _i++ ) {
|
for( _i = 0; _i < _copy_loops; _i++ ) {
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( *(SOURCE),
|
OMPI_DDT_SAFEGUARD_POINTER( _source, _end_loop->size, (CONVERTOR)->pBaseBuf,
|
||||||
end_loop->size, (CONVERTOR)->pBaseBuf,
|
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %ld )\n",
|
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %ld )\n",
|
||||||
*(DESTINATION), *(SOURCE), end_loop->size ); );
|
*(DESTINATION), _source, _end_loop->size ); );
|
||||||
MEMCPY( *(DESTINATION), *(SOURCE), end_loop->size );
|
MEMCPY( *(DESTINATION), _source, _end_loop->size );
|
||||||
*(DESTINATION) += end_loop->size;
|
*(DESTINATION) += _end_loop->size;
|
||||||
*(SOURCE) += loop->extent;
|
_source += _loop->extent;
|
||||||
}
|
}
|
||||||
*(SPACE) -= _copy_loops * end_loop->size;
|
*(SOURCE) = _source - _end_loop->first_elem_disp;
|
||||||
|
*(SPACE) -= _copy_loops * _end_loop->size;
|
||||||
*(COUNT) -= _copy_loops;
|
*(COUNT) -= _copy_loops;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
#define PACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
||||||
pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
pack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
||||||
|
|
||||||
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER, DISPLACEMENT ) \
|
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \
|
||||||
do { \
|
do { \
|
||||||
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
||||||
(COUNTER) = (ELEMENT)->elem.count; \
|
(COUNTER) = (ELEMENT)->elem.count; \
|
||||||
(DISPLACEMENT) = (ELEMENT)->elem.disp; \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
||||||
@ -141,36 +143,39 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
|||||||
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||||
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
|
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
|
||||||
uint16_t type; /* type at current position */
|
uint16_t type; /* type at current position */
|
||||||
long disp_desc = 0; /* compute displacement for truncated data */
|
|
||||||
size_t total_packed = 0; /* total amount packed this time */
|
size_t total_packed = 0; /* total amount packed this time */
|
||||||
dt_elem_desc_t* description;
|
dt_elem_desc_t* description;
|
||||||
dt_elem_desc_t* pElem;
|
dt_elem_desc_t* pElem;
|
||||||
const ompi_datatype_t *pData = pConvertor->pDesc;
|
const ompi_datatype_t *pData = pConvertor->pDesc;
|
||||||
char *source_base, *source, *destination;
|
char *source_base, *destination;
|
||||||
uint32_t iov_len_local, iov_count;
|
uint32_t iov_len_local, iov_count, required_space = 0;
|
||||||
|
|
||||||
DUMP( "ompi_convertor_generic_simple_pack( %p, {%p, %d}, %d )\n", (void*)pConvertor,
|
DUMP( "ompi_convertor_generic_simple_pack( %p, {%p, %d}, %d )\n", (void*)pConvertor,
|
||||||
iov[0].iov_base, iov[0].iov_len, *out_size );
|
iov[0].iov_base, iov[0].iov_len, *out_size );
|
||||||
|
|
||||||
description = pConvertor->use_desc->desc;
|
description = pConvertor->use_desc->desc;
|
||||||
|
|
||||||
|
/* For the first step we have to add both displacement to the source. After in the
|
||||||
|
* main while loop we will set back the source_base to the correct value. This is
|
||||||
|
* due to the fact that the convertor can stop in the middle of a data with a count
|
||||||
|
*/
|
||||||
|
source_base = pConvertor->pBaseBuf;
|
||||||
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
||||||
pos_desc = pStack->index;
|
pos_desc = pStack->index;
|
||||||
disp_desc = pStack->disp;
|
source_base += pStack->disp;
|
||||||
count_desc = pStack->count;
|
count_desc = pStack->count;
|
||||||
pStack--;
|
pStack--;
|
||||||
pConvertor->stack_pos--;
|
pConvertor->stack_pos--;
|
||||||
pElem = &(description[pos_desc]);
|
pElem = &(description[pos_desc]);
|
||||||
source_base = pConvertor->pBaseBuf + pStack->disp;
|
source_base += pStack->disp;
|
||||||
source = source_base + disp_desc;
|
|
||||||
|
|
||||||
DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n"
|
DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n"
|
||||||
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||||
pos_desc, count_desc, disp_desc,
|
pos_desc, count_desc, source_base - pConvertor->pBaseBuf,
|
||||||
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
||||||
|
|
||||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||||
if( pConvertor->flags & CONVERTOR_COMPLETED )
|
if( required_space > ((*max_data) - total_packed) )
|
||||||
break; /* do not pack over the boundaries even if there are more iovecs */
|
break; /* do not pack over the boundaries even if there are more iovecs */
|
||||||
if( iov[iov_count].iov_base == NULL ) {
|
if( iov[iov_count].iov_base == NULL ) {
|
||||||
/*
|
/*
|
||||||
@ -181,6 +186,7 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
|||||||
length = pConvertor->count * pData->size - pConvertor->bConverted;
|
length = pConvertor->count * pData->size - pConvertor->bConverted;
|
||||||
if( ((*max_data) - total_packed) < length )
|
if( ((*max_data) - total_packed) < length )
|
||||||
length = (*max_data) - total_packed;
|
length = (*max_data) - total_packed;
|
||||||
|
assert( 0 < length );
|
||||||
iov[iov_count].iov_base = pConvertor->memAlloc_fn( &length, pConvertor->memAlloc_userdata );
|
iov[iov_count].iov_base = pConvertor->memAlloc_fn( &length, pConvertor->memAlloc_userdata );
|
||||||
iov[iov_count].iov_len = length;
|
iov[iov_count].iov_len = length;
|
||||||
*freeAfter = (*freeAfter) | (1 << iov_count);
|
*freeAfter = (*freeAfter) | (1 << iov_count);
|
||||||
@ -193,6 +199,10 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
|||||||
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
||||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||||
if( pConvertor->stack_pos == 0 ) {
|
if( pConvertor->stack_pos == 0 ) {
|
||||||
|
/* we lie about the size of the next element in order to
|
||||||
|
* make sure we exit the main loop.
|
||||||
|
*/
|
||||||
|
required_space = 0xffffffff;
|
||||||
pConvertor->flags |= CONVERTOR_COMPLETED;
|
pConvertor->flags |= CONVERTOR_COMPLETED;
|
||||||
goto complete_loop; /* completed */
|
goto complete_loop; /* completed */
|
||||||
}
|
}
|
||||||
@ -209,41 +219,43 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
source_base = pConvertor->pBaseBuf + pStack->disp;
|
source_base = pConvertor->pBaseBuf + pStack->disp;
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc, disp_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
DO_DEBUG( opal_output( 0, "pack new_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
|
DO_DEBUG( opal_output( 0, "pack new_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
|
||||||
pStack->count, pConvertor->stack_pos, pos_desc, disp_desc, iov_len_local ); );
|
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
||||||
}
|
}
|
||||||
if( DT_LOOP == pElem->elem.common.type ) {
|
if( DT_LOOP == pElem->elem.common.type ) {
|
||||||
|
long local_disp = (long)source_base;
|
||||||
if( pElem->loop.common.flags & DT_FLAG_CONTIGUOUS ) {
|
if( pElem->loop.common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||||
source = source_base + disp_desc;
|
|
||||||
PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
|
PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
|
||||||
source, destination, iov_len_local );
|
source_base, destination, iov_len_local );
|
||||||
if( 0 == count_desc ) { /* completed */
|
if( 0 == count_desc ) { /* completed */
|
||||||
pos_desc += pElem->loop.items + 1;
|
pos_desc += pElem->loop.items + 1;
|
||||||
goto update_loop_description;
|
goto update_loop_description;
|
||||||
}
|
}
|
||||||
/* Save the stack with the correct last_count value. */
|
/* Save the stack with the correct last_count value. */
|
||||||
}
|
}
|
||||||
|
local_disp = (long)source_base - local_disp;
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_LOOP, count_desc,
|
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_LOOP, count_desc,
|
||||||
pStack->disp, pos_desc + pElem->elem.disp + 1);
|
pStack->disp + local_disp, pos_desc + pElem->elem.disp + 1);
|
||||||
pos_desc++;
|
pos_desc++;
|
||||||
update_loop_description:
|
update_loop_description: /* update the current state */
|
||||||
/* update the current state */
|
source_base = pConvertor->pBaseBuf + pStack->disp;
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc, disp_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
|
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
|
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
|
||||||
/* now here we have a basic datatype */
|
/* now here we have a basic datatype */
|
||||||
type = pElem->elem.common.type;
|
PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
|
||||||
source = source_base + disp_desc;
|
source_base, destination, iov_len_local );
|
||||||
PACK_PREDEFINED_DATATYPE( pConvertor, type, count_desc, pElem->elem.extent,
|
|
||||||
source, destination, iov_len_local );
|
|
||||||
if( 0 != count_desc ) { /* completed */
|
if( 0 != count_desc ) { /* completed */
|
||||||
|
type = pElem->elem.common.type;
|
||||||
|
required_space = ompi_ddt_basicDatatypes[type]->size;
|
||||||
goto complete_loop;
|
goto complete_loop;
|
||||||
}
|
}
|
||||||
|
source_base = pConvertor->pBaseBuf + pStack->disp;
|
||||||
pos_desc++; /* advance to the next data */
|
pos_desc++; /* advance to the next data */
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc, disp_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
complete_loop:
|
complete_loop:
|
||||||
@ -253,10 +265,11 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
|||||||
assert( iov_len_local >= 0 );
|
assert( iov_len_local >= 0 );
|
||||||
}
|
}
|
||||||
*max_data = total_packed;
|
*max_data = total_packed;
|
||||||
|
*out_size = iov_count;
|
||||||
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
||||||
/* I complete an element, next step I should go to the next one */
|
/* I complete an element, next step I should go to the next one */
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_BYTE, count_desc,
|
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_BYTE, count_desc,
|
||||||
source - source_base, pos_desc );
|
source_base - pStack->disp - pConvertor->pBaseBuf, pos_desc );
|
||||||
DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||||
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -44,45 +44,48 @@ static int ompi_unpack_debug = 0;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
static inline void unpack_predefined_data( ompi_convertor_t* CONVERTOR, /* the convertor */
|
static inline void unpack_predefined_data( ompi_convertor_t* CONVERTOR, /* the convertor */
|
||||||
uint16_t TYPE, /* the basic type to be packed */
|
dt_elem_desc_t* ELEM, /* the element description */
|
||||||
uint32_t* COUNT, /* the number of elements */
|
uint32_t* COUNT, /* the number of elements */
|
||||||
long EXTENT, /* the extent in bytes of each element */
|
char** SOURCE, /* the source pointer */
|
||||||
char** SOURCE, /* the source pointer */
|
char** DESTINATION, /* the destination pointer */
|
||||||
char** DESTINATION, /* the destination pointer */
|
uint32_t* SPACE ) /* the space in the destination buffer */
|
||||||
uint32_t* SPACE ) /* the space in the destination buffer */
|
|
||||||
{
|
{
|
||||||
uint32_t _copy_count = *(COUNT), _copy_blength;
|
uint32_t _copy_count = *(COUNT), _copy_blength;
|
||||||
|
ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
|
char* _destination = (*DESTINATION) + _elem->disp;
|
||||||
|
|
||||||
if( (_copy_count * ompi_ddt_basicDatatypes[(TYPE)]->size) > *(SPACE) ) {
|
_copy_blength = ompi_ddt_basicDatatypes[_elem->common.type]->size;
|
||||||
_copy_count = *(SPACE) / ompi_ddt_basicDatatypes[(TYPE)]->size;
|
if( (_copy_count * _copy_blength) > *(SPACE) ) {
|
||||||
|
_copy_count = *(SPACE) / _copy_blength;
|
||||||
if( 0 == _copy_count ) return; /* nothing to do */
|
if( 0 == _copy_count ) return; /* nothing to do */
|
||||||
}
|
}
|
||||||
_copy_blength = _copy_count * ompi_ddt_basicDatatypes[(TYPE)]->size;
|
|
||||||
|
|
||||||
if( ompi_ddt_basicDatatypes[(TYPE)]->size == (uint32_t)(EXTENT) ) {
|
if( _copy_blength == (uint32_t)_elem->disp ) {
|
||||||
|
_copy_blength *= _copy_count;
|
||||||
/* the extent and the size of the basic datatype are equals */
|
/* the extent and the size of the basic datatype are equals */
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( *(DESTINATION), _copy_blength, (CONVERTOR)->pBaseBuf,
|
OMPI_DDT_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %ld ) => space %d\n",
|
DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %ld ) => space %d\n",
|
||||||
*(DESTINATION), *(SOURCE), _copy_blength, *(SPACE) ); );
|
_destination, *(SOURCE), _copy_blength, *(SPACE) ); );
|
||||||
MEMCPY( *(DESTINATION), *(SOURCE), _copy_blength );
|
MEMCPY( _destination, *(SOURCE), _copy_blength );
|
||||||
*(SOURCE) += _copy_blength;
|
*(SOURCE) += _copy_blength;
|
||||||
*(DESTINATION) += _copy_blength;
|
_destination += _copy_blength;
|
||||||
} else {
|
} else {
|
||||||
uint32_t _i;
|
uint32_t _i;
|
||||||
for( _i = 0; _i < _copy_count; _i++ ) {
|
for( _i = 0; _i < _copy_count; _i++ ) {
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( *(DESTINATION), _copy_blength, (CONVERTOR)->pBaseBuf,
|
OMPI_DDT_SAFEGUARD_POINTER( _destination, _copy_blength, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %ld ) => space %d\n",
|
DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %ld ) => space %d\n",
|
||||||
*(DESTINATION), *(SOURCE), _copy_blength,
|
_destination, *(SOURCE), _copy_blength, *(SPACE) - (_i * _copy_blength) ); );
|
||||||
*(SPACE) - (_i * ompi_ddt_basicDatatypes[(TYPE)]->size) ); );
|
MEMCPY( _destination, *(SOURCE), _copy_blength );
|
||||||
MEMCPY( *(DESTINATION), *(SOURCE), ompi_ddt_basicDatatypes[(TYPE)]->size );
|
*(SOURCE) += _copy_blength;
|
||||||
*(SOURCE) += ompi_ddt_basicDatatypes[(TYPE)]->size;
|
_destination += _elem->extent;
|
||||||
*(DESTINATION) += (EXTENT);
|
|
||||||
}
|
}
|
||||||
|
_copy_blength *= _copy_count;
|
||||||
}
|
}
|
||||||
*(SPACE) -= _copy_blength;
|
(*DESTINATION) = _destination - _elem->disp;
|
||||||
*(COUNT) -= _copy_count;
|
*(SPACE) -= _copy_blength;
|
||||||
|
*(COUNT) -= _copy_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void unpack_contiguous_loop( ompi_convertor_t* CONVERTOR,
|
static inline void unpack_contiguous_loop( ompi_convertor_t* CONVERTOR,
|
||||||
@ -92,38 +95,38 @@ static inline void unpack_contiguous_loop( ompi_convertor_t* CONVERTOR,
|
|||||||
char** DESTINATION,
|
char** DESTINATION,
|
||||||
uint32_t* SPACE )
|
uint32_t* SPACE )
|
||||||
{
|
{
|
||||||
ddt_loop_desc_t *loop = (ddt_loop_desc_t*)(ELEM);
|
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||||
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items);
|
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||||
|
char* _destination = (*DESTINATION) + _end_loop->first_elem_disp;
|
||||||
size_t _copy_loops = *(COUNT);
|
size_t _copy_loops = *(COUNT);
|
||||||
uint32_t _i;
|
uint32_t _i;
|
||||||
|
|
||||||
if( (_copy_loops * end_loop->size) > *(SPACE) )
|
if( (_copy_loops * _end_loop->size) > *(SPACE) )
|
||||||
_copy_loops = *(SPACE) / end_loop->size;
|
_copy_loops = *(SPACE) / _end_loop->size;
|
||||||
for( _i = 0; _i < _copy_loops; _i++ ) {
|
for( _i = 0; _i < _copy_loops; _i++ ) {
|
||||||
OMPI_DDT_SAFEGUARD_POINTER( *(DESTINATION),
|
OMPI_DDT_SAFEGUARD_POINTER( _destination, _end_loop->size, (CONVERTOR)->pBaseBuf,
|
||||||
end_loop->size, (CONVERTOR)->pBaseBuf,
|
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %ld )\n",
|
DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %ld )\n",
|
||||||
*(DESTINATION), *(SOURCE), end_loop->size ); );
|
_destination, *(SOURCE), _end_loop->size ); );
|
||||||
MEMCPY( *(DESTINATION), *(SOURCE), end_loop->size );
|
MEMCPY( _destination, *(SOURCE), _end_loop->size );
|
||||||
*(SOURCE) += end_loop->size;
|
*(SOURCE) += _end_loop->size;
|
||||||
*(DESTINATION) += loop->extent;
|
_destination += _loop->extent;
|
||||||
}
|
}
|
||||||
*(SPACE) -= _copy_loops * end_loop->size;
|
*(DESTINATION) = _destination - _end_loop->first_elem_disp;
|
||||||
*(COUNT) -= _copy_loops;
|
*(SPACE) -= _copy_loops * _end_loop->size;
|
||||||
|
*(COUNT) -= _copy_loops;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, TYPE, COUNT, EXTENT, SOURCE, DESTINATION, SPACE ) \
|
#define UNPACK_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
||||||
unpack_predefined_data( (CONVERTOR), (TYPE), &(COUNT), (EXTENT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
unpack_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
||||||
|
|
||||||
#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
#define UNPACK_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, SOURCE, DESTINATION, SPACE ) \
|
||||||
unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
unpack_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(SOURCE), &(DESTINATION), &(SPACE) )
|
||||||
|
|
||||||
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER, DISPLACEMENT ) \
|
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \
|
||||||
do { \
|
do { \
|
||||||
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
||||||
(COUNTER) = (ELEMENT)->elem.count; \
|
(COUNTER) = (ELEMENT)->elem.count; \
|
||||||
(DISPLACEMENT) = (ELEMENT)->elem.disp; \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor,
|
int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor,
|
||||||
@ -131,41 +134,44 @@ int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor,
|
|||||||
size_t* max_data,
|
size_t* max_data,
|
||||||
int32_t* freeAfter )
|
int32_t* freeAfter )
|
||||||
{
|
{
|
||||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||||
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||||
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
|
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
|
||||||
uint16_t type; /* type at current position */
|
uint16_t type; /* type at current position */
|
||||||
long disp_desc = 0; /* compute displacement for truncated data */
|
size_t total_unpacked = 0; /* total size unpacked this time */
|
||||||
size_t total_unpack = 0; /* total size unpacked this time */
|
|
||||||
dt_elem_desc_t* description;
|
dt_elem_desc_t* description;
|
||||||
dt_elem_desc_t* pElem;
|
dt_elem_desc_t* pElem;
|
||||||
const ompi_datatype_t *pData = pConvertor->pDesc;
|
const ompi_datatype_t *pData = pConvertor->pDesc;
|
||||||
char *user_memory_base, *user_memory, *packed_buffer;
|
char *user_memory_base, *packed_buffer;
|
||||||
uint32_t iov_len_local, iov_count;
|
uint32_t iov_len_local, iov_count, required_space = 0;
|
||||||
|
|
||||||
DUMP( "ompi_convertor_generic_simple_unpack( %p, {%p, %d}, %d )\n", (void*)pConvertor,
|
DUMP( "ompi_convertor_generic_simple_unpack( %p, {%p, %d}, %d )\n", (void*)pConvertor,
|
||||||
iov[0].iov_base, iov[0].iov_len, *out_size );
|
iov[0].iov_base, iov[0].iov_len, *out_size );
|
||||||
|
|
||||||
description = pConvertor->use_desc->desc;
|
description = pConvertor->use_desc->desc;
|
||||||
|
|
||||||
|
/* For the first step we have to add both displacement to the source. After in the
|
||||||
|
* main while loop we will set back the source_base to the correct value. This is
|
||||||
|
* due to the fact that the convertor can stop in the middle of a data with a count
|
||||||
|
*/
|
||||||
|
user_memory_base = pConvertor->pBaseBuf;
|
||||||
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
||||||
pos_desc = pStack->index;
|
pos_desc = pStack->index;
|
||||||
disp_desc = pStack->disp;
|
user_memory_base += pStack->disp;
|
||||||
count_desc = pStack->count;
|
count_desc = pStack->count;
|
||||||
pStack--;
|
pStack--;
|
||||||
pConvertor->stack_pos--;
|
pConvertor->stack_pos--;
|
||||||
pElem = &(description[pos_desc]);
|
pElem = &(description[pos_desc]);
|
||||||
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
|
user_memory_base += pStack->disp;
|
||||||
user_memory = user_memory_base + disp_desc;
|
|
||||||
|
|
||||||
DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n"
|
DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n"
|
||||||
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||||
pos_desc, count_desc, disp_desc,
|
pos_desc, count_desc, user_memory_base - pConvertor->pBaseBuf,
|
||||||
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
||||||
|
|
||||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||||
if( pConvertor->flags & CONVERTOR_COMPLETED )
|
if( required_space > ((*max_data) - total_unpacked) )
|
||||||
break; /* do not unpack over the boundaries even if there are more iovecs */
|
break; /* do not pack over the boundaries even if there are more iovecs */
|
||||||
|
|
||||||
packed_buffer = iov[iov_count].iov_base;
|
packed_buffer = iov[iov_count].iov_base;
|
||||||
iov_len_local = iov[iov_count].iov_len;
|
iov_len_local = iov[iov_count].iov_len;
|
||||||
@ -175,6 +181,10 @@ int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor,
|
|||||||
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
||||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||||
if( pConvertor->stack_pos == 0 ) {
|
if( pConvertor->stack_pos == 0 ) {
|
||||||
|
/* we lie about the size of the next element in order to
|
||||||
|
* make sure we exit the main loop.
|
||||||
|
*/
|
||||||
|
required_space = 0xffffffff;
|
||||||
pConvertor->flags |= CONVERTOR_COMPLETED;
|
pConvertor->flags |= CONVERTOR_COMPLETED;
|
||||||
goto complete_loop; /* completed */
|
goto complete_loop; /* completed */
|
||||||
}
|
}
|
||||||
@ -191,54 +201,57 @@ int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
|
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc, disp_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
DO_DEBUG( opal_output( 0, "unpack new_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
|
DO_DEBUG( opal_output( 0, "unpack new_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
|
||||||
pStack->count, pConvertor->stack_pos, pos_desc, disp_desc, iov_len_local ); );
|
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
||||||
}
|
}
|
||||||
if( DT_LOOP == pElem->elem.common.type ) {
|
if( DT_LOOP == pElem->elem.common.type ) {
|
||||||
|
long local_disp = (long)user_memory_base;
|
||||||
if( pElem->loop.common.flags & DT_FLAG_CONTIGUOUS ) {
|
if( pElem->loop.common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||||
user_memory = user_memory_base + disp_desc;
|
|
||||||
UNPACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
|
UNPACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
|
||||||
packed_buffer, user_memory, iov_len_local );
|
packed_buffer, user_memory_base, iov_len_local );
|
||||||
if( 0 == count_desc ) { /* completed */
|
if( 0 == count_desc ) { /* completed */
|
||||||
pos_desc += pElem->loop.items + 1;
|
pos_desc += pElem->loop.items + 1;
|
||||||
goto update_loop_description;
|
goto update_loop_description;
|
||||||
}
|
}
|
||||||
/* Save the stack with the correct last_count value. */
|
/* Save the stack with the correct last_count value. */
|
||||||
}
|
}
|
||||||
|
local_disp = (long)user_memory_base - local_disp;
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_LOOP, count_desc,
|
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_LOOP, count_desc,
|
||||||
pStack->disp, pos_desc + pElem->elem.disp + 1);
|
pStack->disp + local_disp, pos_desc + pElem->elem.disp + 1);
|
||||||
pos_desc++;
|
pos_desc++;
|
||||||
update_loop_description:
|
update_loop_description: /* update the current state */
|
||||||
/* update the current state */
|
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc, disp_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
|
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
|
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
|
||||||
/* now here we have a basic datatype */
|
/* now here we have a basic datatype */
|
||||||
type = pElem->elem.common.type;
|
UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
|
||||||
user_memory = user_memory_base + disp_desc;
|
packed_buffer, user_memory_base, iov_len_local );
|
||||||
UNPACK_PREDEFINED_DATATYPE( pConvertor, type, count_desc, pElem->elem.extent,
|
|
||||||
packed_buffer, user_memory, iov_len_local );
|
|
||||||
if( 0 != count_desc ) { /* completed */
|
if( 0 != count_desc ) { /* completed */
|
||||||
|
type = pElem->elem.common.type;
|
||||||
|
required_space = ompi_ddt_basicDatatypes[type]->size;
|
||||||
goto complete_loop;
|
goto complete_loop;
|
||||||
}
|
}
|
||||||
|
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
|
||||||
pos_desc++; /* advance to the next data */
|
pos_desc++; /* advance to the next data */
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc, disp_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
complete_loop:
|
complete_loop:
|
||||||
iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */
|
iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */
|
||||||
total_unpack += iov[iov_count].iov_len;
|
total_unpacked += iov[iov_count].iov_len;
|
||||||
pConvertor->bConverted += iov[iov_count].iov_len; /* update the already converted bytes */
|
pConvertor->bConverted += iov[iov_count].iov_len; /* update the already converted bytes */
|
||||||
assert( iov_len_local >= 0 );
|
assert( iov_len_local >= 0 );
|
||||||
}
|
}
|
||||||
*max_data = total_unpack;
|
*max_data = total_unpacked;
|
||||||
|
*out_size = iov_count;
|
||||||
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
||||||
/* I complete an element, next step I should go to the next one */
|
/* I complete an element, next step I should go to the next one */
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_BYTE, count_desc,
|
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_BYTE, count_desc,
|
||||||
user_memory - user_memory_base, pos_desc );
|
user_memory_base - pStack->disp - pConvertor->pBaseBuf, pos_desc );
|
||||||
DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||||
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
|
||||||
return 0;
|
return 0;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user