Clean and sync the pack and unpack functions.
- optimize handling of contiguous with gaps datatypes. - fixes a performance issue for all datatypes with a count of 1. - optimize the pack/unpack of contiguous with gaps datatype. - optimize the case of blocklen == 1 Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
0a00b02e48
Коммит
012a004806
@ -31,8 +31,8 @@
|
|||||||
#endif /* OPAL_ENABLE_DEBUG */
|
#endif /* OPAL_ENABLE_DEBUG */
|
||||||
|
|
||||||
/* Take a new iovec (base + len) and try to merge it with what we already
|
/* Take a new iovec (base + len) and try to merge it with what we already
|
||||||
* have. If we succeed return 0 and move forward, if not save it into a new
|
* have. If we succeed return 0 and move forward, otherwise save it into a new
|
||||||
* iovec location. If we need to go to a new position and we reach the end
|
* iovec location. If we need to advance position and we reach the end
|
||||||
* of the iovec array, return 1 to signal we did not saved the last iovec.
|
* of the iovec array, return 1 to signal we did not saved the last iovec.
|
||||||
*/
|
*/
|
||||||
static inline int
|
static inline int
|
||||||
@ -46,7 +46,7 @@ opal_convertor_merge_iov( struct iovec* iov, uint32_t* iov_count,
|
|||||||
return 0;
|
return 0;
|
||||||
} /* cannot merge, move to the next position */
|
} /* cannot merge, move to the next position */
|
||||||
*idx = *idx + 1;
|
*idx = *idx + 1;
|
||||||
if( *idx == *iov_count ) return 1; /* do not overwrite outside the iove array boundaries */
|
if( *idx == *iov_count ) return 1; /* do not overwrite outside the iovec array boundaries */
|
||||||
}
|
}
|
||||||
iov[*idx].iov_base = base;
|
iov[*idx].iov_base = base;
|
||||||
iov[*idx].iov_len = len;
|
iov[*idx].iov_len = len;
|
||||||
|
@ -51,11 +51,9 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM,
|
|||||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
unsigned char* _source = (SOURCE) + _elem->disp;
|
unsigned char* _source = (SOURCE) + _elem->disp;
|
||||||
unsigned char* _destination = (DESTINATION) + _elem->disp;
|
unsigned char* _destination = (DESTINATION) + _elem->disp;
|
||||||
size_t total_count = _elem->count * _elem->blocklen;
|
size_t do_now = _elem->count, do_now_bytes;
|
||||||
size_t do_now, do_now_bytes;
|
|
||||||
|
|
||||||
assert( (COUNT) == total_count);
|
assert( (COUNT) == (do_now * _elem->blocklen));
|
||||||
assert( total_count <= ((*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size) );
|
|
||||||
|
|
||||||
/* We don't a prologue and epilogue here as we are __always__ working
|
/* We don't a prologue and epilogue here as we are __always__ working
|
||||||
* with full copies of the data description.
|
* with full copies of the data description.
|
||||||
@ -64,21 +62,19 @@ static inline void _predefined_data( const dt_elem_desc_t* ELEM,
|
|||||||
/**
|
/**
|
||||||
* Compute how many full blocklen we need to do and do them.
|
* Compute how many full blocklen we need to do and do them.
|
||||||
*/
|
*/
|
||||||
do_now = _elem->count;
|
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
if( 0 != do_now ) {
|
assert( (do_now * do_now_bytes) <= (*SPACE) );
|
||||||
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
|
||||||
for(size_t _i = 0; _i < do_now; _i++ ) {
|
for(size_t _i = 0; _i < do_now; _i++ ) {
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE),
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, do_now_bytes, (SOURCE_BASE),
|
||||||
(DATATYPE), (TOTAL_COUNT) );
|
(DATATYPE), (TOTAL_COUNT) );
|
||||||
DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n",
|
DO_DEBUG( opal_output( 0, "copy %s( %p, %p, %" PRIsize_t " ) => space %" PRIsize_t "\n",
|
||||||
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) ); );
|
STRINGIFY(MEM_OP_NAME), (void*)_destination, (void*)_source, do_now_bytes, *(SPACE) - _i * do_now_bytes ); );
|
||||||
MEM_OP( _destination, _source, do_now_bytes );
|
MEM_OP( _destination, _source, do_now_bytes );
|
||||||
_destination += _elem->extent;
|
_destination += _elem->extent;
|
||||||
_source += _elem->extent;
|
_source += _elem->extent;
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
}
|
|
||||||
(COUNT) -= total_count;
|
|
||||||
}
|
}
|
||||||
|
*(SPACE) -= (do_now_bytes * do_now);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void _contiguous_loop( const dt_elem_desc_t* ELEM,
|
static inline void _contiguous_loop( const dt_elem_desc_t* ELEM,
|
||||||
|
@ -224,7 +224,6 @@ int32_t opal_datatype_init( void )
|
|||||||
OPAL_DATATYPE_FLAG_CONTIGUOUS |
|
OPAL_DATATYPE_FLAG_CONTIGUOUS |
|
||||||
OPAL_DATATYPE_FLAG_NO_GAPS;
|
OPAL_DATATYPE_FLAG_NO_GAPS;
|
||||||
datatype->desc.desc[0].elem.common.type = i;
|
datatype->desc.desc[0].elem.common.type = i;
|
||||||
/* datatype->desc.desc[0].elem.blocklen XXX not set at the moment, it will be needed later */
|
|
||||||
datatype->desc.desc[0].elem.count = 1;
|
datatype->desc.desc[0].elem.count = 1;
|
||||||
datatype->desc.desc[0].elem.blocklen = 1;
|
datatype->desc.desc[0].elem.blocklen = 1;
|
||||||
datatype->desc.desc[0].elem.disp = 0;
|
datatype->desc.desc[0].elem.disp = 0;
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||||
@ -53,8 +53,6 @@
|
|||||||
#endif /* defined(CHECKSUM) */
|
#endif /* defined(CHECKSUM) */
|
||||||
|
|
||||||
|
|
||||||
#define IOVEC_MEM_LIMIT 8192
|
|
||||||
|
|
||||||
/* the contig versions does not use the stack. They can easily retrieve
|
/* the contig versions does not use the stack. They can easily retrieve
|
||||||
* the status with just the informations from pConvertor->bConverted.
|
* the status with just the informations from pConvertor->bConverted.
|
||||||
*/
|
*/
|
||||||
@ -68,9 +66,8 @@ opal_pack_homogeneous_contig_function( opal_convertor_t* pConv,
|
|||||||
unsigned char *source_base = NULL;
|
unsigned char *source_base = NULL;
|
||||||
uint32_t iov_count;
|
uint32_t iov_count;
|
||||||
size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted;
|
size_t length = pConv->local_size - pConv->bConverted, initial_amount = pConv->bConverted;
|
||||||
ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
|
|
||||||
|
|
||||||
source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp);
|
source_base = (pConv->pBaseBuf + pConv->pDesc->true_lb + pStack[0].disp + pStack[1].disp);
|
||||||
|
|
||||||
/* There are some optimizations that can be done if the upper level
|
/* There are some optimizations that can be done if the upper level
|
||||||
* does not provide a buffer.
|
* does not provide a buffer.
|
||||||
@ -111,155 +108,116 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
|
|||||||
uint32_t* out_size,
|
uint32_t* out_size,
|
||||||
size_t* max_data )
|
size_t* max_data )
|
||||||
{
|
{
|
||||||
|
size_t remaining, length, initial_bytes_converted = pConv->bConverted;
|
||||||
const opal_datatype_t* pData = pConv->pDesc;
|
const opal_datatype_t* pData = pConv->pDesc;
|
||||||
dt_stack_t* stack = pConv->pStack;
|
dt_stack_t* stack = pConv->pStack;
|
||||||
|
ptrdiff_t extent = pData->ub - pData->lb;
|
||||||
unsigned char *user_memory, *packed_buffer;
|
unsigned char *user_memory, *packed_buffer;
|
||||||
uint32_t iov_count, index;
|
uint32_t idx;
|
||||||
size_t i;
|
size_t i;
|
||||||
size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted;
|
|
||||||
ptrdiff_t extent= pData->ub - pData->lb;
|
|
||||||
ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
|
|
||||||
|
|
||||||
|
/* The memory layout is contiguous with gaps in the begining and at the end. The datatype true_lb
|
||||||
|
* is the initial displacement, the size the length of the contiguous area and the extent represent
|
||||||
|
* how much we should jump between elements.
|
||||||
|
*/
|
||||||
assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) );
|
assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((ptrdiff_t)pData->size != extent) );
|
||||||
DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
|
DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
|
||||||
(void*)pConv->pBaseBuf, *out_size ); );
|
(void*)pConv->pBaseBuf, *out_size ); );
|
||||||
if( stack[1].type != opal_datatype_uint1.id ) {
|
if( stack[1].type != opal_datatype_uint1.id ) {
|
||||||
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
|
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
|
||||||
stack[1].type = opal_datatype_uint1.id;
|
stack[1].type = opal_datatype_uint1.id;
|
||||||
|
}
|
||||||
|
/* We can provide directly the pointers in the user buffers (like the convertor_raw) */
|
||||||
|
if( NULL == iov[0].iov_base ) {
|
||||||
|
user_memory = pConv->pBaseBuf + pData->true_lb;
|
||||||
|
|
||||||
|
for( idx = 0; (idx < (*out_size)) && stack[0].count; idx++ ) {
|
||||||
|
iov[idx].iov_base = user_memory + stack[0].disp + stack[1].disp;
|
||||||
|
iov[idx].iov_len = stack[1].count;
|
||||||
|
COMPUTE_CSUM( iov[idx].iov_base, iov[idx].iov_len, pConv );
|
||||||
|
|
||||||
|
pConv->bConverted += stack[1].count;
|
||||||
|
|
||||||
|
stack[0].disp += extent;
|
||||||
|
stack[0].count--;
|
||||||
|
stack[1].disp = 0;
|
||||||
|
stack[1].count = pData->size; /* we might need this to update the partial
|
||||||
|
* length for the first iteration */
|
||||||
|
}
|
||||||
|
goto update_status_and_return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* There are some optimizations that can be done if the upper level
|
for( idx = 0; idx < (*out_size); idx++ ) {
|
||||||
* does not provide a buffer.
|
|
||||||
*/
|
|
||||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
|
||||||
/* Limit the amount of packed data to the data left over on this convertor */
|
/* Limit the amount of packed data to the data left over on this convertor */
|
||||||
remaining = pConv->local_size - pConv->bConverted;
|
remaining = pConv->local_size - pConv->bConverted;
|
||||||
if( 0 == remaining ) break; /* we're done this time */
|
if( 0 == remaining ) break; /* we're done this time */
|
||||||
if( remaining > iov[iov_count].iov_len )
|
if( remaining > iov[idx].iov_len )
|
||||||
remaining = iov[iov_count].iov_len;
|
remaining = iov[idx].iov_len;
|
||||||
packed_buffer = (unsigned char *)iov[iov_count].iov_base;
|
packed_buffer = (unsigned char *)iov[idx].iov_base;
|
||||||
bConverted = remaining; /* how much will get unpacked this time */
|
pConv->bConverted += remaining;
|
||||||
user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp + stack[1].disp;
|
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
|
||||||
i = pConv->count - stack[0].count; /* how many we already packed */
|
|
||||||
assert(i == (pConv->bConverted / pData->size));
|
|
||||||
|
|
||||||
if( packed_buffer == NULL ) {
|
DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n",
|
||||||
/* special case for small data. We avoid allocating memory if we
|
(void*)user_memory, (void*)packed_buffer, remaining ); );
|
||||||
* can fill the iovec directly with the address of the remaining
|
|
||||||
* data.
|
length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */
|
||||||
*/
|
/* data left from last round and enough space in the buffer */
|
||||||
if( stack->count < (size_t)((*out_size) - iov_count) ) {
|
if( (pData->size != length) && (length <= remaining)) {
|
||||||
stack[1].count = pData->size - (pConv->bConverted % pData->size);
|
/* copy the partial left-over from the previous round */
|
||||||
for( index = iov_count; i < pConv->count; i++, index++ ) {
|
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
|
||||||
iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
|
pData, pConv->count );
|
||||||
iov[index].iov_len = stack[1].count;
|
DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [prologue]\n",
|
||||||
stack[0].disp += extent;
|
(void*)user_memory, (void*)packed_buffer, length ); );
|
||||||
pConv->bConverted += stack[1].count;
|
MEMCPY_CSUM( packed_buffer, user_memory, length, pConv );
|
||||||
stack[1].disp = 0; /* reset it for the next round */
|
packed_buffer += length;
|
||||||
stack[1].count = pData->size;
|
remaining -= length;
|
||||||
user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp;
|
stack[1].count -= length;
|
||||||
COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv );
|
stack[1].disp += length; /* just in case, we overwrite this below */
|
||||||
}
|
if( 0 == stack[1].count) { /* one completed element */
|
||||||
*out_size = iov_count + index;
|
stack[0].count--;
|
||||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
stack[0].disp += extent;
|
||||||
pConv->flags |= CONVERTOR_COMPLETED;
|
if( 0 == stack[0].count ) /* not yet done */
|
||||||
return 1; /* we're done */
|
break;
|
||||||
}
|
stack[1].count = pData->size;
|
||||||
/* now special case for big contiguous data with gaps around */
|
stack[1].disp = 0;
|
||||||
if( pData->size >= IOVEC_MEM_LIMIT ) {
|
|
||||||
/* as we dont have to copy any data, we can simply fill the iovecs
|
|
||||||
* with data from the user data description.
|
|
||||||
*/
|
|
||||||
for( index = iov_count; (i < pConv->count) && (index < (*out_size));
|
|
||||||
i++, index++ ) {
|
|
||||||
if( remaining < pData->size ) {
|
|
||||||
iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
|
|
||||||
iov[index].iov_len = remaining;
|
|
||||||
remaining = 0;
|
|
||||||
COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv );
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
|
|
||||||
iov[index].iov_len = pData->size;
|
|
||||||
user_memory += extent;
|
|
||||||
COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv );
|
|
||||||
}
|
|
||||||
remaining -= iov[index].iov_len;
|
|
||||||
pConv->bConverted += iov[index].iov_len;
|
|
||||||
}
|
|
||||||
*out_size = index;
|
|
||||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
|
||||||
if( pConv->bConverted == pConv->local_size ) {
|
|
||||||
pConv->flags |= CONVERTOR_COMPLETED;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
for( i = 0; pData->size <= remaining; i++ ) {
|
||||||
DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n",
|
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
|
pData, pConv->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "pack dest %p src %p length %" PRIsize_t " [%" PRIsize_t "/%" PRIsize_t "\n",
|
||||||
|
(void*)user_memory, (void*)packed_buffer, pData->size, remaining, iov[idx].iov_len ); );
|
||||||
|
MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv );
|
||||||
|
packed_buffer += pData->size;
|
||||||
|
user_memory += extent;
|
||||||
|
remaining -= pData->size;
|
||||||
|
}
|
||||||
|
stack[0].count -= i; /* the entire datatype copied above */
|
||||||
|
stack[0].disp += (i * extent);
|
||||||
|
|
||||||
length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */
|
/* Copy the last bits */
|
||||||
/* data left from last round and enough space in the buffer */
|
if( 0 != remaining ) {
|
||||||
if( (0 != length) && (length <= remaining)) {
|
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
|
||||||
/* copy the partial left-over from the previous round */
|
pData, pConv->count );
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
|
DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %" PRIsize_t "\n",
|
||||||
pData, pConv->count );
|
(void*)user_memory, (void*)packed_buffer, remaining ); );
|
||||||
DO_DEBUG( opal_output( 0, "2. pack dest %p src %p length %lu\n",
|
MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv );
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)length ); );
|
stack[1].count -= remaining;
|
||||||
MEMCPY_CSUM( packed_buffer, user_memory, length, pConv );
|
stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */
|
||||||
packed_buffer += length;
|
|
||||||
user_memory += (extent - pData->size + length);
|
|
||||||
remaining -= length;
|
|
||||||
stack[1].count -= length;
|
|
||||||
if( 0 == stack[1].count) { /* one completed element */
|
|
||||||
stack[0].count--;
|
|
||||||
stack[0].disp += extent;
|
|
||||||
if( 0 != stack[0].count ) { /* not yet done */
|
|
||||||
stack[1].count = pData->size;
|
|
||||||
stack[1].disp = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for( i = 0; pData->size <= remaining; i++ ) {
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
|
|
||||||
pData, pConv->count );
|
|
||||||
DO_DEBUG( opal_output( 0, "3. pack dest %p src %p length %lu\n",
|
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)pData->size ); );
|
|
||||||
MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv );
|
|
||||||
packed_buffer += pData->size;
|
|
||||||
user_memory += extent;
|
|
||||||
remaining -= pData->size;
|
|
||||||
}
|
|
||||||
stack[0].count -= i; /* the filled up and the entire types */
|
|
||||||
stack[0].disp += (i * extent);
|
|
||||||
stack[1].disp += remaining;
|
|
||||||
/* Copy the last bits */
|
|
||||||
if( 0 != remaining ) {
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
|
|
||||||
pData, pConv->count );
|
|
||||||
DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %lu\n",
|
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
|
|
||||||
MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv );
|
|
||||||
user_memory += remaining;
|
|
||||||
stack[1].count -= remaining;
|
|
||||||
}
|
|
||||||
if( 0 == stack[1].count ) { /* prepare for the next element */
|
if( 0 == stack[1].count ) { /* prepare for the next element */
|
||||||
stack[1].count = pData->size;
|
stack[1].count = pData->size;
|
||||||
stack[1].disp = 0;
|
stack[1].disp = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pConv->bConverted += bConverted;
|
|
||||||
}
|
}
|
||||||
*out_size = iov_count;
|
|
||||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
update_status_and_return:
|
||||||
if( pConv->bConverted == pConv->local_size ) {
|
*out_size = idx;
|
||||||
pConv->flags |= CONVERTOR_COMPLETED;
|
*max_data = pConv->bConverted - initial_bytes_converted;
|
||||||
return 1;
|
if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED;
|
||||||
}
|
return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The pack/unpack functions need a cleanup. I have to create a proper interface to access
|
/* The pack/unpack functions need a cleanup. I have to create a proper interface to access
|
||||||
|
@ -35,82 +35,90 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
size_t* SPACE )
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
size_t total_count = _elem->count * _elem->blocklen;
|
|
||||||
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
size_t do_now, do_now_bytes;
|
size_t do_now, do_now_bytes;
|
||||||
|
size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
unsigned char* _memory = (*memory) + _elem->disp;
|
unsigned char* _memory = (*memory) + _elem->disp;
|
||||||
|
unsigned char* _packed = *packed;
|
||||||
|
|
||||||
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
||||||
|
|
||||||
if( cando_count > *(COUNT) )
|
if( cando_count > *(COUNT) )
|
||||||
cando_count = *(COUNT);
|
cando_count = *(COUNT);
|
||||||
|
|
||||||
/**
|
if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */
|
||||||
* First check if we already did something on this element ?
|
*(COUNT) -= cando_count;
|
||||||
*/
|
for(; cando_count > 0; cando_count--) {
|
||||||
do_now = (total_count - *(COUNT)); /* done elements */
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
if( 0 != do_now ) {
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
do_now = do_now % _elem->blocklen; /* partial blocklen? */
|
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
|
(void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
|
||||||
if( 0 != do_now ) {
|
MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) );
|
||||||
size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */
|
_packed += blocklen_bytes;
|
||||||
do_now = (left_in_block > cando_count ) ? cando_count : left_in_block;
|
_memory += _elem->extent;
|
||||||
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
|
||||||
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
|
||||||
DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n",
|
|
||||||
(void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
|
||||||
MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) );
|
|
||||||
_memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes;
|
|
||||||
/* compensate if we just completed a blocklen */
|
|
||||||
if( do_now == left_in_block )
|
|
||||||
_memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size);
|
|
||||||
*(packed) += do_now_bytes;
|
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= do_now;
|
|
||||||
cando_count -= do_now;
|
|
||||||
}
|
}
|
||||||
|
goto update_and_return;
|
||||||
|
}
|
||||||
|
blocklen_bytes *= _elem->blocklen;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* First check if we already did something on this element ? The COUNT is the number
|
||||||
|
* of remaining predefined types in the current elem, not how many predefined types
|
||||||
|
* should be manipulated in the current call (this number is instead reflected on the
|
||||||
|
* SPACE).
|
||||||
|
*/
|
||||||
|
do_now = *(COUNT) % _elem->blocklen; /* any partial elements ? */
|
||||||
|
/* premptively update the number of COUNT we will return. */
|
||||||
|
*(COUNT) -= cando_count;
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
size_t left_in_block = do_now; /* left in the current blocklen */
|
||||||
|
do_now = (do_now > cando_count ) ? cando_count : do_now;
|
||||||
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "pack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n",
|
||||||
|
_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) );
|
||||||
|
_memory += (ptrdiff_t)do_now_bytes;
|
||||||
|
/* compensate if we just completed a blocklen */
|
||||||
|
if( do_now == left_in_block )
|
||||||
|
_memory += _elem->extent - blocklen_bytes;
|
||||||
|
_packed += do_now_bytes;
|
||||||
|
cando_count -= do_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/* Do as many full blocklen as possible */
|
||||||
* Compute how many full blocklen we need to do and do them.
|
for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) {
|
||||||
*/
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
do_now = cando_count / _elem->blocklen;
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
if( 0 != do_now ) {
|
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
(void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
|
||||||
for(size_t _i = 0; _i < do_now; _i++ ) {
|
MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) );
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
_packed += blocklen_bytes;
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
_memory += _elem->extent;
|
||||||
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
cando_count -= _elem->blocklen;
|
||||||
(void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); );
|
|
||||||
MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) );
|
|
||||||
*(packed) += do_now_bytes;
|
|
||||||
_memory += _elem->extent;
|
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= _elem->blocklen;
|
|
||||||
cando_count -= _elem->blocklen;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* As an epilog do anything left from the last blocklen.
|
* As an epilog do anything left from the last blocklen.
|
||||||
*/
|
*/
|
||||||
do_now = cando_count;
|
if( 0 != cando_count ) {
|
||||||
if( 0 != do_now ) {
|
assert( cando_count < _elem->blocklen );
|
||||||
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
|
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
|
||||||
(void*)*(packed), (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
(void*)_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
MEMCPY_CSUM( *(packed), _memory, do_now_bytes, (CONVERTOR) );
|
MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) );
|
||||||
_memory += do_now_bytes;
|
_memory += do_now_bytes;
|
||||||
*(packed) += do_now_bytes;
|
_packed += do_now_bytes;
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= do_now;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_and_return:
|
||||||
*(memory) = _memory - _elem->disp;
|
*(memory) = _memory - _elem->disp;
|
||||||
|
*(SPACE) -= (_packed - *packed);
|
||||||
|
*(packed) = _packed;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
||||||
|
@ -49,10 +49,24 @@
|
|||||||
* - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless.
|
* - the DT_CONTIGUOUS flag for the type OPAL_DATATYPE_END_LOOP is meaningless.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
position_single_block(opal_convertor_t* CONVERTOR,
|
||||||
|
unsigned char** mem, ptrdiff_t mem_update,
|
||||||
|
size_t* space, size_t space_update,
|
||||||
|
size_t* cnt, size_t cnt_update)
|
||||||
|
{
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( *mem, mem_update, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n",
|
||||||
|
(void*)*mem, (unsigned long)space_update, (unsigned long)(*space) ); );
|
||||||
|
*mem += mem_update;
|
||||||
|
*space -= space_update;
|
||||||
|
*cnt -= cnt_update;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Advance the current position in the convertor based using the
|
* Advance the convertors' position according. Update the pointer and the remaining space
|
||||||
* current element and a left-over counter. Update the head pointer
|
* accordingly.
|
||||||
* and the leftover byte space.
|
|
||||||
*/
|
*/
|
||||||
static inline void
|
static inline void
|
||||||
position_predefined_data( opal_convertor_t* CONVERTOR,
|
position_predefined_data( opal_convertor_t* CONVERTOR,
|
||||||
@ -64,7 +78,7 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
size_t total_count = _elem->count * _elem->blocklen;
|
size_t total_count = _elem->count * _elem->blocklen;
|
||||||
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
size_t do_now, do_now_bytes;
|
size_t do_now, do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
unsigned char* _memory = (*POINTER) + _elem->disp;
|
unsigned char* _memory = (*POINTER) + _elem->disp;
|
||||||
|
|
||||||
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
||||||
@ -72,6 +86,15 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
if( cando_count > *(COUNT) )
|
if( cando_count > *(COUNT) )
|
||||||
cando_count = *(COUNT);
|
cando_count = *(COUNT);
|
||||||
|
|
||||||
|
if( 1 == _elem->blocklen ) {
|
||||||
|
DO_DEBUG( opal_output( 0, "position( %p, %" PRIsize_t " ) x (count %" PRIsize_t ", extent %ld) => space %lu [prolog]\n",
|
||||||
|
(void*)_memory, (unsigned long)do_now_bytes, cando_count, _elem->extent, (unsigned long)(*SPACE) ); );
|
||||||
|
_memory += cando_count * _elem->extent;
|
||||||
|
*SPACE -= cando_count * do_now_bytes;
|
||||||
|
*COUNT -= cando_count;
|
||||||
|
goto update_and_return;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* First check if we already did something on this element ?
|
* First check if we already did something on this element ?
|
||||||
*/
|
*/
|
||||||
@ -84,16 +107,12 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
do_now = (left_in_block > cando_count ) ? cando_count : left_in_block;
|
do_now = (left_in_block > cando_count ) ? cando_count : left_in_block;
|
||||||
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
position_single_block( CONVERTOR, &_memory, do_now_bytes,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
SPACE, do_now_bytes, COUNT, do_now );
|
||||||
DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [prolog]\n",
|
|
||||||
(void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
|
||||||
_memory = *(POINTER) + _elem->disp + (ptrdiff_t)do_now_bytes;
|
|
||||||
/* compensate if we just completed a blocklen */
|
/* compensate if we just completed a blocklen */
|
||||||
if( do_now == left_in_block )
|
if( do_now == left_in_block )
|
||||||
_memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size);
|
_memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size);
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= do_now;
|
|
||||||
cando_count -= do_now;
|
cando_count -= do_now;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -105,13 +124,8 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
if( 0 != do_now ) {
|
if( 0 != do_now ) {
|
||||||
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
for(size_t _i = 0; _i < do_now; _i++ ) {
|
for(size_t _i = 0; _i < do_now; _i++ ) {
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
position_single_block( CONVERTOR, &_memory, _elem->extent,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
SPACE, do_now_bytes, COUNT, _elem->blocklen );
|
||||||
DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu\n",
|
|
||||||
(void*)_memory, (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); );
|
|
||||||
_memory += _elem->extent;
|
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= _elem->blocklen;
|
|
||||||
cando_count -= _elem->blocklen;
|
cando_count -= _elem->blocklen;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -122,15 +136,11 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
do_now = cando_count;
|
do_now = cando_count;
|
||||||
if( 0 != do_now ) {
|
if( 0 != do_now ) {
|
||||||
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
position_single_block( CONVERTOR, &_memory, do_now_bytes,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
SPACE, do_now_bytes, COUNT, do_now );
|
||||||
DO_DEBUG( opal_output( 0, "position( %p, %lu ) => space %lu [epilog]\n",
|
|
||||||
(void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
|
||||||
_memory += do_now_bytes;
|
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= do_now;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_and_return:
|
||||||
*(POINTER) = _memory - _elem->disp;
|
*(POINTER) = _memory - _elem->disp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2017 The University of Tennessee and The University
|
* Copyright (c) 2004-2019 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||||
@ -70,98 +70,82 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
|||||||
{
|
{
|
||||||
const opal_datatype_t *pData = pConv->pDesc;
|
const opal_datatype_t *pData = pConv->pDesc;
|
||||||
unsigned char *user_memory, *packed_buffer;
|
unsigned char *user_memory, *packed_buffer;
|
||||||
uint32_t iov_count, i;
|
uint32_t iov_idx, i;
|
||||||
size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted;
|
size_t remaining, initial_bytes_converted = pConv->bConverted;
|
||||||
dt_stack_t* stack = pConv->pStack;
|
dt_stack_t* stack = pConv->pStack;
|
||||||
ptrdiff_t extent = pData->ub - pData->lb;
|
ptrdiff_t extent = pData->ub - pData->lb;
|
||||||
ptrdiff_t initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
|
|
||||||
|
|
||||||
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
|
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov count %d )\n",
|
||||||
(void*)pConv->pBaseBuf, *out_size ); );
|
(void*)pConv->pBaseBuf, *out_size ); );
|
||||||
if( stack[1].type != opal_datatype_uint1.id ) {
|
if( stack[1].type != opal_datatype_uint1.id ) {
|
||||||
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
|
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
|
||||||
stack[1].type = opal_datatype_uint1.id;
|
stack[1].type = opal_datatype_uint1.id;
|
||||||
}
|
}
|
||||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
|
||||||
remaining = pConv->local_size - pConv->bConverted;
|
|
||||||
if( 0 == remaining ) break; /* we're done this time */
|
|
||||||
if( remaining > iov[iov_count].iov_len )
|
|
||||||
remaining = iov[iov_count].iov_len;
|
|
||||||
packed_buffer = (unsigned char*)iov[iov_count].iov_base;
|
|
||||||
bConverted = remaining; /* how much will get unpacked this time */
|
|
||||||
user_memory = pConv->pBaseBuf + initial_displ;
|
|
||||||
|
|
||||||
if( (ptrdiff_t)pData->size == extent ) {
|
if( (ptrdiff_t)pData->size == extent ) {
|
||||||
user_memory += pConv->bConverted;
|
for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) {
|
||||||
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n",
|
remaining = pConv->local_size - pConv->bConverted;
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
|
if( 0 == remaining ) break; /* we're done this time */
|
||||||
|
if( remaining > iov[iov_idx].iov_len )
|
||||||
|
remaining = iov[iov_idx].iov_len;
|
||||||
|
|
||||||
|
packed_buffer = (unsigned char*)iov[iov_idx].iov_base;
|
||||||
|
user_memory = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
|
||||||
|
|
||||||
/* contiguous data or basic datatype with count */
|
/* contiguous data or basic datatype with count */
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining,
|
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining,
|
||||||
pConv->pBaseBuf, pData, pConv->count );
|
pConv->pBaseBuf, pData, pConv->count );
|
||||||
DO_DEBUG( opal_output( 0, "1. unpack contig dest %p src %p length %lu\n",
|
DO_DEBUG( opal_output( 0, "unpack contig [%d] dest %p src %p length %" PRIsize_t "\n",
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
|
iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); );
|
||||||
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
|
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
|
||||||
} else {
|
pConv->bConverted += remaining; /* how much will get unpacked this time */
|
||||||
user_memory += stack[0].disp + stack[1].disp;
|
}
|
||||||
|
} else {
|
||||||
|
for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) {
|
||||||
|
remaining = pConv->local_size - pConv->bConverted;
|
||||||
|
if( 0 == remaining ) break; /* we're done this time */
|
||||||
|
if( remaining > iov[iov_idx].iov_len )
|
||||||
|
remaining = iov[iov_idx].iov_len;
|
||||||
|
|
||||||
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n",
|
packed_buffer = (unsigned char*)iov[iov_idx].iov_base;
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
|
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
|
||||||
|
pConv->bConverted += remaining; /* how much will get unpacked this time */
|
||||||
|
|
||||||
length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last unpack */
|
for( i = 0; stack[1].count <= remaining; i++ ) { /* partial or full data */
|
||||||
/* complete the last copy */
|
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, stack[1].count, pConv->pBaseBuf,
|
||||||
if( (0 != length) && (length <= remaining) ) {
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
|
|
||||||
pData, pConv->count );
|
pData, pConv->count );
|
||||||
DO_DEBUG( opal_output( 0, "2. unpack dest %p src %p length %lu\n",
|
DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [%d]\n",
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)length ); );
|
iov_idx, (void*)user_memory, (void*)packed_buffer, stack[1].count, i ); );
|
||||||
MEMCPY_CSUM( user_memory, packed_buffer, length, pConv );
|
MEMCPY_CSUM( user_memory, packed_buffer, stack[1].count, pConv );
|
||||||
packed_buffer += length;
|
|
||||||
user_memory += (extent - (pData->size - length));
|
packed_buffer += stack[1].count;
|
||||||
remaining -= length;
|
remaining -= stack[1].count;
|
||||||
stack[1].count -= length;
|
|
||||||
if( 0 == stack[1].count) { /* one completed element */
|
stack[0].count--;
|
||||||
stack[0].count--;
|
stack[0].disp += extent;
|
||||||
stack[0].disp += extent;
|
stack[1].count = pData->size;
|
||||||
if( 0 != stack[0].count ) { /* not yet done */
|
stack[1].disp = 0;
|
||||||
stack[1].count = pData->size;
|
|
||||||
stack[1].disp = 0;
|
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
for( i = 0; pData->size <= remaining; i++ ) {
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
|
/* Copy the last bits */
|
||||||
pData, pConv->count );
|
|
||||||
DO_DEBUG( opal_output( 0, "3. unpack dest %p src %p length %lu\n",
|
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)pData->size ); );
|
|
||||||
MEMCPY_CSUM( user_memory, packed_buffer, pData->size, pConv );
|
|
||||||
packed_buffer += pData->size;
|
|
||||||
user_memory += extent;
|
|
||||||
remaining -= pData->size;
|
|
||||||
}
|
|
||||||
stack[0].count -= i;
|
|
||||||
stack[0].disp += (i * extent);
|
|
||||||
stack[1].disp += remaining;
|
|
||||||
/* copy the last bits */
|
|
||||||
if( 0 != remaining ) {
|
if( 0 != remaining ) {
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
|
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
|
||||||
pData, pConv->count );
|
pData, pConv->count );
|
||||||
DO_DEBUG( opal_output( 0, "4. unpack dest %p src %p length %lu\n",
|
DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [epilog]\n",
|
||||||
(void*)user_memory, (void*)packed_buffer, (unsigned long)remaining ); );
|
iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); );
|
||||||
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
|
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
|
||||||
user_memory += remaining;
|
|
||||||
stack[1].count -= remaining;
|
stack[1].count -= remaining;
|
||||||
|
stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */
|
||||||
|
assert( stack[1].count );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pConv->bConverted += bConverted;
|
|
||||||
}
|
}
|
||||||
*out_size = iov_count; /* we only reach this line after the for loop succesfully complete */
|
*out_size = iov_idx; /* we only reach this line after the for loop succesfully complete */
|
||||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
*max_data = pConv->bConverted - initial_bytes_converted;
|
||||||
if( pConv->bConverted == pConv->local_size ) {
|
if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED;
|
||||||
pConv->flags |= CONVERTOR_COMPLETED;
|
return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -179,7 +163,7 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
|||||||
static inline void
|
static inline void
|
||||||
opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem,
|
opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem,
|
||||||
unsigned char* partial_data,
|
unsigned char* partial_data,
|
||||||
ptrdiff_t start_position, ptrdiff_t length,
|
ptrdiff_t start_position, size_t length,
|
||||||
unsigned char** user_buffer )
|
unsigned char** user_buffer )
|
||||||
{
|
{
|
||||||
char unused_byte = 0x7F, saved_data[16];
|
char unused_byte = 0x7F, saved_data[16];
|
||||||
@ -195,7 +179,7 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
|
|||||||
|
|
||||||
/* Find a byte that is not used in the partial buffer */
|
/* Find a byte that is not used in the partial buffer */
|
||||||
find_unused_byte:
|
find_unused_byte:
|
||||||
for(ptrdiff_t i = 0; i < length; i++ ) {
|
for(size_t i = 0; i < length; i++ ) {
|
||||||
if( unused_byte == partial_data[i] ) {
|
if( unused_byte == partial_data[i] ) {
|
||||||
unused_byte--;
|
unused_byte--;
|
||||||
goto find_unused_byte;
|
goto find_unused_byte;
|
||||||
@ -306,7 +290,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor,
|
|||||||
COMPUTE_CSUM( iov_ptr, missing_length, pConvertor );
|
COMPUTE_CSUM( iov_ptr, missing_length, pConvertor );
|
||||||
opal_unpack_partial_datatype( pConvertor, pElem,
|
opal_unpack_partial_datatype( pConvertor, pElem,
|
||||||
iov_ptr,
|
iov_ptr,
|
||||||
pConvertor->partial_length, element_length - pConvertor->partial_length,
|
pConvertor->partial_length, (size_t)(element_length - pConvertor->partial_length),
|
||||||
&conv_ptr );
|
&conv_ptr );
|
||||||
--count_desc;
|
--count_desc;
|
||||||
if( 0 == count_desc ) {
|
if( 0 == count_desc ) {
|
||||||
|
@ -35,82 +35,90 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
|
|||||||
size_t* SPACE )
|
size_t* SPACE )
|
||||||
{
|
{
|
||||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||||
size_t total_count = _elem->count * _elem->blocklen;
|
|
||||||
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
size_t do_now, do_now_bytes;
|
size_t do_now, do_now_bytes;
|
||||||
|
size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
unsigned char* _memory = (*memory) + _elem->disp;
|
unsigned char* _memory = (*memory) + _elem->disp;
|
||||||
|
unsigned char* _packed = *packed;
|
||||||
|
|
||||||
assert( *(COUNT) <= _elem->count * _elem->blocklen);
|
assert( *(COUNT) <= (_elem->count * _elem->blocklen));
|
||||||
|
|
||||||
if( cando_count > *(COUNT) )
|
if( cando_count > *(COUNT) )
|
||||||
cando_count = *(COUNT);
|
cando_count = *(COUNT);
|
||||||
|
|
||||||
/**
|
if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */
|
||||||
* First check if we already did something on this element ?
|
*(COUNT) -= cando_count;
|
||||||
*/
|
for(; cando_count > 0; cando_count--) {
|
||||||
do_now = (total_count - *(COUNT)); /* done elements */
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
if( 0 != do_now ) {
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
do_now = do_now % _elem->blocklen; /* partial blocklen? */
|
DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
|
(void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
|
||||||
if( 0 != do_now ) {
|
MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) );
|
||||||
size_t left_in_block = _elem->blocklen - do_now; /* left in the current blocklen */
|
_packed += blocklen_bytes;
|
||||||
do_now = (left_in_block > cando_count ) ? cando_count : left_in_block;
|
_memory += _elem->extent;
|
||||||
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
|
||||||
|
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
|
||||||
DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n",
|
|
||||||
(void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
|
||||||
MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) );
|
|
||||||
_memory = (*memory) + _elem->disp + (ptrdiff_t)do_now_bytes;
|
|
||||||
/* compensate if we just completed a blocklen */
|
|
||||||
if( do_now == left_in_block )
|
|
||||||
_memory += _elem->extent - (_elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size);
|
|
||||||
*(packed) += do_now_bytes;
|
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= do_now;
|
|
||||||
cando_count -= do_now;
|
|
||||||
}
|
}
|
||||||
|
goto update_and_return;
|
||||||
|
}
|
||||||
|
blocklen_bytes *= _elem->blocklen;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* First check if we already did something on this element ? The COUNT is the number
|
||||||
|
* of remaining predefined types in the current elem, not how many predefined types
|
||||||
|
* should be manipulated in the current call (this number is instead reflected on the
|
||||||
|
* SPACE).
|
||||||
|
*/
|
||||||
|
do_now = *(COUNT) % _elem->blocklen; /* any partial elements ? */
|
||||||
|
/* premptively update the number of COUNT we will return. */
|
||||||
|
*(COUNT) -= cando_count;
|
||||||
|
if( 0 != do_now ) {
|
||||||
|
size_t left_in_block = do_now; /* left in the current blocklen */
|
||||||
|
do_now = (do_now > cando_count ) ? cando_count : do_now;
|
||||||
|
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
|
|
||||||
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
|
DO_DEBUG( opal_output( 0, "unpack 1. memcpy( %p, %p, %lu ) => space %lu [prolog]\n",
|
||||||
|
(void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
||||||
|
MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) );
|
||||||
|
_memory += (ptrdiff_t)do_now_bytes;
|
||||||
|
/* compensate if we just completed a blocklen */
|
||||||
|
if( do_now == left_in_block )
|
||||||
|
_memory += _elem->extent - blocklen_bytes;
|
||||||
|
_packed += do_now_bytes;
|
||||||
|
cando_count -= do_now;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/* Do as many full blocklen as possible */
|
||||||
* Compute how many full blocklen we need to do and do them.
|
for(size_t _i = 0; _elem->blocklen <= cando_count; _i++ ) {
|
||||||
*/
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
do_now = cando_count / _elem->blocklen;
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
if( 0 != do_now ) {
|
DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
||||||
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
(void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
|
||||||
for(size_t _i = 0; _i < do_now; _i++ ) {
|
MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) );
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
_packed += blocklen_bytes;
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
_memory += _elem->extent;
|
||||||
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
|
cando_count -= _elem->blocklen;
|
||||||
(void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)*(SPACE) ); );
|
|
||||||
MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) );
|
|
||||||
*(packed) += do_now_bytes;
|
|
||||||
_memory += _elem->extent;
|
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= _elem->blocklen;
|
|
||||||
cando_count -= _elem->blocklen;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* As an epilog do anything left from the last blocklen.
|
* As an epilog do anything left from the last blocklen.
|
||||||
*/
|
*/
|
||||||
do_now = cando_count;
|
if( 0 != cando_count ) {
|
||||||
if( 0 != do_now ) {
|
assert( cando_count < _elem->blocklen );
|
||||||
do_now_bytes = do_now * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
|
||||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||||
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
|
DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
|
||||||
(void*)_memory, (void*)*(packed), (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
|
(void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
|
||||||
MEMCPY_CSUM( _memory, *(packed), do_now_bytes, (CONVERTOR) );
|
MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) );
|
||||||
_memory += do_now_bytes;
|
_memory += do_now_bytes;
|
||||||
*(packed) += do_now_bytes;
|
_packed += do_now_bytes;
|
||||||
*(SPACE) -= do_now_bytes;
|
|
||||||
*(COUNT) -= do_now;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
update_and_return:
|
||||||
*(memory) = _memory - _elem->disp;
|
*(memory) = _memory - _elem->disp;
|
||||||
|
*(SPACE) -= (_packed - *packed);
|
||||||
|
*(packed) = _packed;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user