Refactor the datatype engine to reduce the critical path for the most
trivial (and thus used) datatype usages. Make the gaps_contiguous pack and unpack functions similar.
Этот коммит содержится в:
родитель
8ee501350b
Коммит
e640673372
@ -362,13 +362,12 @@ static inline int opal_convertor_create_stack_with_pos_contig( opal_convertor_t*
|
||||
if( OPAL_LIKELY(0 == count) ) {
|
||||
pStack[1].type = pElems->elem.common.type;
|
||||
pStack[1].count = pElems->elem.count;
|
||||
pStack[1].disp = 0;
|
||||
} else {
|
||||
pStack[1].type = OPAL_DATATYPE_UINT1;
|
||||
pStack[1].count = pData->size - count;
|
||||
pStack[1].disp = count;
|
||||
}
|
||||
pStack[1].index = 0; /* useless */
|
||||
pStack[1].disp = count;
|
||||
pStack[1].index = 0; /* useless */
|
||||
|
||||
pConvertor->bConverted = starting_point;
|
||||
pConvertor->stack_pos = 1;
|
||||
@ -400,13 +399,16 @@ int opal_convertor_create_stack_at_begining( opal_convertor_t* convertor,
|
||||
pStack[0].index = -1;
|
||||
pStack[0].count = convertor->count;
|
||||
pStack[0].disp = 0;
|
||||
pStack[0].type = OPAL_DATATYPE_LOOP;
|
||||
|
||||
pStack[1].index = 0;
|
||||
pStack[1].disp = 0;
|
||||
if( pElems[0].elem.common.type == OPAL_DATATYPE_LOOP ) {
|
||||
pStack[1].count = pElems[0].loop.loops;
|
||||
pStack[1].type = OPAL_DATATYPE_LOOP;
|
||||
} else {
|
||||
pStack[1].count = pElems[0].elem.count;
|
||||
pStack[1].type = pElems[0].elem.common.type;
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -108,49 +108,55 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
|
||||
size_t* max_data )
|
||||
{
|
||||
const opal_datatype_t* pData = pConv->pDesc;
|
||||
dt_stack_t* pStack = pConv->pStack;
|
||||
dt_stack_t* stack = pConv->pStack;
|
||||
unsigned char *user_memory, *packed_buffer;
|
||||
uint32_t i, index, iov_count;
|
||||
size_t max_allowed, total_bytes_converted = 0;
|
||||
OPAL_PTRDIFF_TYPE extent;
|
||||
size_t bConverted, remaining, length, initial_bytes_converted = pConv->bConverted;
|
||||
OPAL_PTRDIFF_TYPE extent= pData->ub - pData->lb;
|
||||
OPAL_PTRDIFF_TYPE initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
|
||||
|
||||
extent = pData->ub - pData->lb;
|
||||
assert( (pData->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && ((OPAL_PTRDIFF_TYPE)pData->size != extent) );
|
||||
|
||||
/* Limit the amount of packed data to the data left over on this convertor */
|
||||
max_allowed = pConv->local_size - pConv->bConverted;
|
||||
if( max_allowed > (*max_data) )
|
||||
max_allowed = (*max_data);
|
||||
|
||||
i = (uint32_t)(pConv->bConverted / pData->size); /* how many we already pack */
|
||||
DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
|
||||
pConv->pBaseBuf, *out_size ); );
|
||||
if( stack[1].type != opal_datatype_uint1.id ) {
|
||||
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
|
||||
stack[1].type = opal_datatype_uint1.id;
|
||||
}
|
||||
|
||||
/* There are some optimizations that can be done if the upper level
|
||||
* does not provide a buffer.
|
||||
*/
|
||||
user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp;
|
||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||
if( 0 == max_allowed ) break; /* we're done this time */
|
||||
if( iov[iov_count].iov_base == NULL ) {
|
||||
/* Limit the amount of packed data to the data left over on this convertor */
|
||||
remaining = pConv->local_size - pConv->bConverted;
|
||||
if( 0 == remaining ) break; /* we're done this time */
|
||||
if( remaining > (uint32_t)iov[iov_count].iov_len )
|
||||
remaining = iov[iov_count].iov_len;
|
||||
packed_buffer = (unsigned char *)iov[iov_count].iov_base;
|
||||
bConverted = remaining; /* how much will get unpacked this time */
|
||||
user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp + stack[1].disp;
|
||||
i = pConv->count - stack[0].count; /* how many we already packed */
|
||||
assert(i == ((uint32_t)(pConv->bConverted / pData->size)));
|
||||
|
||||
if( packed_buffer == NULL ) {
|
||||
/* special case for small data. We avoid allocating memory if we
|
||||
* can fill the iovec directly with the address of the remaining
|
||||
* data.
|
||||
*/
|
||||
if( (uint32_t)pStack->count < ((*out_size) - iov_count) ) {
|
||||
pStack[1].count = pData->size - (pConv->bConverted % pData->size);
|
||||
if( (uint32_t)stack->count < ((*out_size) - iov_count) ) {
|
||||
stack[1].count = pData->size - (pConv->bConverted % pData->size);
|
||||
for( index = iov_count; i < pConv->count; i++, index++ ) {
|
||||
iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
|
||||
iov[index].iov_len = pStack[1].count;
|
||||
pStack[0].disp += extent;
|
||||
total_bytes_converted += pStack[1].count;
|
||||
pStack[1].disp = 0; /* reset it for the next round */
|
||||
pStack[1].count = pData->size;
|
||||
user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp;
|
||||
iov[index].iov_len = stack[1].count;
|
||||
stack[0].disp += extent;
|
||||
pConv->bConverted += stack[1].count;
|
||||
stack[1].disp = 0; /* reset it for the next round */
|
||||
stack[1].count = pData->size;
|
||||
user_memory = pConv->pBaseBuf + initial_displ + stack[0].disp;
|
||||
COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv );
|
||||
}
|
||||
*out_size = iov_count + index;
|
||||
pConv->bConverted += total_bytes_converted;
|
||||
*max_data = total_bytes_converted;
|
||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
||||
pConv->flags |= CONVERTOR_COMPLETED;
|
||||
return 1; /* we're done */
|
||||
}
|
||||
@ -161,10 +167,10 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
|
||||
*/
|
||||
for( index = iov_count; (i < pConv->count) && (index < (*out_size));
|
||||
i++, index++ ) {
|
||||
if( max_allowed < pData->size ) {
|
||||
if( remaining < pData->size ) {
|
||||
iov[index].iov_base = (IOVBASE_TYPE *) user_memory;
|
||||
iov[index].iov_len = max_allowed;
|
||||
max_allowed = 0;
|
||||
iov[index].iov_len = remaining;
|
||||
remaining = 0;
|
||||
COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv );
|
||||
break;
|
||||
} else {
|
||||
@ -173,12 +179,11 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
|
||||
user_memory += extent;
|
||||
COMPUTE_CSUM( iov[index].iov_base, (size_t)iov[index].iov_len, pConv );
|
||||
}
|
||||
max_allowed -= iov[index].iov_len;
|
||||
total_bytes_converted += iov[index].iov_len;
|
||||
remaining -= iov[index].iov_len;
|
||||
pConv->bConverted += iov[index].iov_len;
|
||||
}
|
||||
*out_size = index;
|
||||
*max_data = total_bytes_converted;
|
||||
pConv->bConverted += total_bytes_converted;
|
||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
||||
if( pConv->bConverted == pConv->local_size ) {
|
||||
pConv->flags |= CONVERTOR_COMPLETED;
|
||||
return 1;
|
||||
@ -188,52 +193,63 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
|
||||
}
|
||||
|
||||
{
|
||||
uint32_t counter;
|
||||
size_t done;
|
||||
DO_DEBUG( opal_output( 0, "pack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)remaining ); );
|
||||
|
||||
packed_buffer = (unsigned char *) iov[iov_count].iov_base;
|
||||
done = pConv->bConverted - i * pData->size; /* partial data from last pack */
|
||||
length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last pack */
|
||||
/* data left from last round and enough space in the buffer */
|
||||
if( (done + max_allowed) >= pData->size ) {
|
||||
if( (0 != length) && (length <= remaining)) {
|
||||
/* copy the partial left-over from the previous round */
|
||||
done = pData->size - done;
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, done, pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY_CSUM( packed_buffer, user_memory, done, pConv );
|
||||
packed_buffer += done;
|
||||
max_allowed -= done;
|
||||
total_bytes_converted += done;
|
||||
user_memory += (extent - pData->size + done);
|
||||
|
||||
/* copy entire types */
|
||||
counter = (uint32_t)(max_allowed / pData->size);
|
||||
if( counter > pConv->count ) counter = pConv->count;
|
||||
for( i = 0; i < counter; i++ ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv );
|
||||
packed_buffer+= pData->size;
|
||||
user_memory += extent;
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
|
||||
pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "2. pack dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)length ); );
|
||||
MEMCPY_CSUM( packed_buffer, user_memory, length, pConv );
|
||||
packed_buffer += length;
|
||||
user_memory += (extent - pData->size + length);
|
||||
remaining -= length;
|
||||
stack[1].count -= length;
|
||||
if( 0 == stack[1].count) { /* one completed element */
|
||||
stack[0].count--;
|
||||
stack[0].disp += extent;
|
||||
if( 0 != stack[0].count ) { /* not yet done */
|
||||
stack[1].count = pData->size;
|
||||
stack[1].disp = 0;
|
||||
}
|
||||
}
|
||||
done = (counter * pData->size);
|
||||
max_allowed -= done;
|
||||
total_bytes_converted += done;
|
||||
}
|
||||
/* If there is anything pending ... */
|
||||
if( 0 != max_allowed ) {
|
||||
done = max_allowed;
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, done, pConv->pBaseBuf, pData, pConv->count );
|
||||
MEMCPY_CSUM( packed_buffer, user_memory, done, pConv );
|
||||
packed_buffer += done;
|
||||
max_allowed = 0;
|
||||
total_bytes_converted += done;
|
||||
user_memory += done;
|
||||
for( i = 0; pData->size <= remaining; i++ ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
|
||||
pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "3. pack dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)pData->size ); );
|
||||
MEMCPY_CSUM( packed_buffer, user_memory, pData->size, pConv );
|
||||
packed_buffer += pData->size;
|
||||
user_memory += extent;
|
||||
remaining -= pData->size;
|
||||
}
|
||||
stack[0].count -= i; /* the filled up and the entire types */
|
||||
stack[0].disp += (i * extent);
|
||||
stack[1].disp += remaining;
|
||||
/* Copy the last bits */
|
||||
if( 0 != remaining ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
|
||||
pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "4. pack dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)remaining ); );
|
||||
MEMCPY_CSUM( packed_buffer, user_memory, remaining, pConv );
|
||||
user_memory += remaining;
|
||||
stack[1].count -= remaining;
|
||||
}
|
||||
if( 0 == stack[1].count ) { /* prepare for the next element */
|
||||
stack[1].count = pData->size;
|
||||
stack[1].disp = 0;
|
||||
}
|
||||
}
|
||||
pConv->bConverted += bConverted;
|
||||
}
|
||||
pStack[0].disp = (intptr_t)user_memory - (intptr_t)pConv->pBaseBuf - initial_displ;
|
||||
pStack[1].disp = max_allowed;
|
||||
*max_data = total_bytes_converted;
|
||||
pConv->bConverted += total_bytes_converted;
|
||||
*out_size = iov_count;
|
||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
||||
if( pConv->bConverted == pConv->local_size ) {
|
||||
pConv->flags |= CONVERTOR_COMPLETED;
|
||||
return 1;
|
||||
@ -371,7 +387,7 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
|
||||
return 1;
|
||||
}
|
||||
/* Save the global position for the next round */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_INT8, count_desc,
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc,
|
||||
conv_ptr - pConvertor->pBaseBuf );
|
||||
DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
|
||||
|
@ -27,7 +27,7 @@
|
||||
#endif
|
||||
|
||||
static inline void pack_predefined_data( opal_convertor_t* CONVERTOR,
|
||||
dt_elem_desc_t* ELEM,
|
||||
const dt_elem_desc_t* ELEM,
|
||||
uint32_t* COUNT,
|
||||
unsigned char** SOURCE,
|
||||
unsigned char** DESTINATION,
|
||||
@ -35,7 +35,7 @@ static inline void pack_predefined_data( opal_convertor_t* CONVERTOR,
|
||||
{
|
||||
uint32_t _copy_count = *(COUNT);
|
||||
size_t _copy_blength;
|
||||
ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||
unsigned char* _source = (*SOURCE) + _elem->disp;
|
||||
|
||||
_copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||
@ -73,14 +73,14 @@ static inline void pack_predefined_data( opal_convertor_t* CONVERTOR,
|
||||
}
|
||||
|
||||
static inline void pack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
||||
dt_elem_desc_t* ELEM,
|
||||
const dt_elem_desc_t* ELEM,
|
||||
uint32_t* COUNT,
|
||||
unsigned char** SOURCE,
|
||||
unsigned char** DESTINATION,
|
||||
size_t* SPACE )
|
||||
{
|
||||
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||
const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||
const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||
unsigned char* _source = (*SOURCE) + _end_loop->first_elem_disp;
|
||||
uint32_t _copy_loops = *(COUNT);
|
||||
uint32_t _i;
|
||||
|
@ -109,9 +109,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
|
||||
uint16_t type; /* type at current position */
|
||||
dt_elem_desc_t* description = pConvertor->use_desc->desc;
|
||||
dt_elem_desc_t* pElem;
|
||||
dt_elem_desc_t* pElem; /* current position */
|
||||
unsigned char *base_pointer = pConvertor->pBaseBuf;
|
||||
size_t iov_len_local;
|
||||
OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb;
|
||||
@ -133,8 +132,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
(unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size,
|
||||
(unsigned long)iov_len_local, count_desc ); );
|
||||
/* Update all the stack including the last one */
|
||||
for( type = 0; type <= pConvertor->stack_pos; type++ )
|
||||
pStack[type].disp += count_desc * extent;
|
||||
for( pos_desc = 0; pos_desc <= pConvertor->stack_pos; pos_desc++ )
|
||||
pStack[pos_desc].disp += count_desc * extent;
|
||||
pConvertor->bConverted += count_desc * pConvertor->pDesc->size;
|
||||
iov_len_local = *position - pConvertor->bConverted;
|
||||
pStack[0].count -= count_desc;
|
||||
@ -228,7 +227,6 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
POSITION_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
|
||||
base_pointer, iov_len_local );
|
||||
if( 0 != count_desc ) { /* completed */
|
||||
type = pElem->elem.common.type;
|
||||
pConvertor->partial_length = (uint32_t)iov_len_local;
|
||||
goto complete_loop;
|
||||
}
|
||||
@ -245,7 +243,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
|
||||
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
||||
/* I complete an element, next step I should go to the next one */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc,
|
||||
base_pointer - pConvertor->pBaseBuf );
|
||||
DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n",
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
|
||||
|
@ -76,11 +76,16 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
||||
|
||||
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
|
||||
pConv->pBaseBuf, *out_size ); );
|
||||
if( stack[1].type != opal_datatype_uint1.id ) {
|
||||
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
|
||||
stack[1].type = opal_datatype_uint1.id;
|
||||
}
|
||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||
packed_buffer = (unsigned char*)iov[iov_count].iov_base;
|
||||
remaining = pConv->local_size - pConv->bConverted;
|
||||
if( 0 == remaining ) break; /* we're done this time */
|
||||
if( remaining > (uint32_t)iov[iov_count].iov_len )
|
||||
remaining = iov[iov_count].iov_len;
|
||||
packed_buffer = (unsigned char*)iov[iov_count].iov_base;
|
||||
bConverted = remaining; /* how much will get unpacked this time */
|
||||
user_memory = pConv->pBaseBuf + initial_displ;
|
||||
|
||||
@ -91,7 +96,7 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
||||
|
||||
/* contiguous data or basic datatype with count */
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining,
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
pConv->pBaseBuf, pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "1. unpack contig dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)remaining ); );
|
||||
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
|
||||
@ -101,26 +106,30 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
||||
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)remaining ); );
|
||||
|
||||
length = pConv->bConverted / pData->size; /* already done */
|
||||
length = pConv->bConverted - length * pData->size; /* how much of the last data we convert */
|
||||
|
||||
length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last unpack */
|
||||
/* complete the last copy */
|
||||
if( length != 0 ) {
|
||||
length = pData->size - length;
|
||||
if( length <= remaining ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
|
||||
pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "2. unpack dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)length ); );
|
||||
MEMCPY_CSUM( user_memory, packed_buffer, length, pConv );
|
||||
packed_buffer += length;
|
||||
user_memory += (extent - (pData->size - length));
|
||||
remaining -= length;
|
||||
if( (0 != length) && (length <= remaining) ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
|
||||
pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "2. unpack dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)length ); );
|
||||
MEMCPY_CSUM( user_memory, packed_buffer, length, pConv );
|
||||
packed_buffer += length;
|
||||
user_memory += (extent - (pData->size - length));
|
||||
remaining -= length;
|
||||
stack[1].count -= length;
|
||||
if( 0 == stack[1].count) { /* one completed element */
|
||||
stack[0].count--;
|
||||
stack[0].disp += extent;
|
||||
if( 0 != stack[0].count ) { /* not yet done */
|
||||
stack[1].count = pData->size;
|
||||
stack[1].disp = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
for( i = 0; pData->size <= remaining; i++ ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
|
||||
pData, pConv->count );
|
||||
pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "3. unpack dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)pData->size ); );
|
||||
MEMCPY_CSUM( user_memory, packed_buffer, pData->size, pConv );
|
||||
@ -128,16 +137,18 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
||||
user_memory += extent;
|
||||
remaining -= pData->size;
|
||||
}
|
||||
stack[0].disp = (intptr_t)user_memory - (intptr_t)pConv->pBaseBuf - initial_displ;
|
||||
stack[1].disp = remaining;
|
||||
stack[0].count -= i;
|
||||
stack[0].disp += (i * extent);
|
||||
stack[1].disp += remaining;
|
||||
/* copy the last bits */
|
||||
if( remaining != 0 ) {
|
||||
if( 0 != remaining ) {
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
|
||||
pData, pConv->count );
|
||||
pData, pConv->count );
|
||||
DO_DEBUG( opal_output( 0, "4. unpack dest %p src %p length %lu\n",
|
||||
user_memory, packed_buffer, (unsigned long)remaining ); );
|
||||
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
|
||||
user_memory += remaining;
|
||||
stack[1].count -= remaining;
|
||||
}
|
||||
}
|
||||
pConv->bConverted += bConverted;
|
||||
@ -400,7 +411,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor,
|
||||
return 1;
|
||||
}
|
||||
/* Save the global position for the next round */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc,
|
||||
conv_ptr - pConvertor->pBaseBuf );
|
||||
DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
|
||||
@ -564,7 +575,7 @@ opal_unpack_general_function( opal_convertor_t* pConvertor,
|
||||
return 1;
|
||||
}
|
||||
/* Save the global position for the next round */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc,
|
||||
conv_ptr - pConvertor->pBaseBuf );
|
||||
DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
|
||||
|
@ -24,16 +24,17 @@
|
||||
CONVERTOR->cbmemcpy( (DST), (SRC), (BLENGTH), (CONVERTOR) )
|
||||
#endif
|
||||
|
||||
static inline void unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the convertor */
|
||||
dt_elem_desc_t* ELEM, /* the element description */
|
||||
uint32_t* COUNT, /* the number of elements */
|
||||
unsigned char** SOURCE, /* the source pointer */
|
||||
unsigned char** DESTINATION, /* the destination pointer */
|
||||
size_t* SPACE ) /* the space in the destination buffer */
|
||||
static inline void
|
||||
unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the convertor */
|
||||
const dt_elem_desc_t* ELEM, /* the element description */
|
||||
uint32_t* COUNT, /* the number of elements */
|
||||
unsigned char** SOURCE, /* the source pointer */
|
||||
unsigned char** DESTINATION, /* the destination pointer */
|
||||
size_t* SPACE ) /* the space in the destination buffer */
|
||||
{
|
||||
uint32_t _copy_count = *(COUNT);
|
||||
size_t _copy_blength;
|
||||
ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||
unsigned char* _destination = (*DESTINATION) + _elem->disp;
|
||||
|
||||
_copy_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||
@ -71,14 +72,14 @@ static inline void unpack_predefined_data( opal_convertor_t* CONVERTOR, /* the c
|
||||
}
|
||||
|
||||
static inline void unpack_contiguous_loop( opal_convertor_t* CONVERTOR,
|
||||
dt_elem_desc_t* ELEM,
|
||||
const dt_elem_desc_t* ELEM,
|
||||
uint32_t* COUNT,
|
||||
unsigned char** SOURCE,
|
||||
unsigned char** DESTINATION,
|
||||
size_t* SPACE )
|
||||
{
|
||||
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||
const ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||
const ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||
unsigned char* _destination = (*DESTINATION) + _end_loop->first_elem_disp;
|
||||
uint32_t _copy_loops = *(COUNT);
|
||||
uint32_t _i;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user