Always build support for HETEROGENEOUS environment (this is needed to
provide external32 support). Add a pack function allowing to provide send conversion (needed on little endian machine in order to pack in the external32 format).
Этот коммит содержится в:
родитель
639f4b1086
Коммит
cf2bb20bac
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -449,16 +449,17 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
|
||||
|
||||
/**
|
||||
* Compute the remote size.
|
||||
* Compute the remote size. If necessary remove the homogeneous flag
|
||||
* and redirect the convertor description toward the non-optimized
|
||||
* datatype representation.
|
||||
*/
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
|
||||
{ \
|
||||
if( OPAL_UNLIKELY(0 != (bdt_mask)) ) { \
|
||||
opal_convertor_master_t* master; \
|
||||
int i; \
|
||||
uint32_t mask = datatype->bdt_used; \
|
||||
convertor->flags ^= CONVERTOR_HOMOGENEOUS; \
|
||||
convertor->flags &= (~CONVERTOR_HOMOGENEOUS); \
|
||||
master = convertor->master; \
|
||||
convertor->remote_size = 0; \
|
||||
for( i = OPAL_DATATYPE_FIRST_TYPE; mask && (i < OPAL_DATATYPE_MAX_PREDEFINED); i++ ) { \
|
||||
@ -472,13 +473,6 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
convertor->use_desc = &(datatype->desc); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define OPAL_CONVERTOR_COMPUTE_REMOTE_SIZE(convertor, datatype, bdt_mask) \
|
||||
{ \
|
||||
assert(0 == (bdt_mask)); \
|
||||
(void)bdt_mask; /* silence compiler warning */ \
|
||||
}
|
||||
#endif /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
|
||||
|
||||
/**
|
||||
* This macro will initialize a convertor based on a previously created
|
||||
@ -511,16 +505,13 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
convertor->flags |= (CONVERTOR_NO_OP | CONVERTOR_HOMOGENEOUS); \
|
||||
convertor->pDesc = (opal_datatype_t*)datatype; \
|
||||
convertor->bConverted = 0; \
|
||||
/* By default consider the optimized description */ \
|
||||
convertor->use_desc = &(datatype->opt_desc); \
|
||||
\
|
||||
convertor->remote_size = convertor->local_size; \
|
||||
if( OPAL_LIKELY(convertor->remoteArch == opal_local_arch) ) { \
|
||||
if( (convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS)) == OPAL_DATATYPE_FLAG_NO_GAPS ) { \
|
||||
return OPAL_SUCCESS; \
|
||||
} \
|
||||
if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_CONTIGUOUS)) \
|
||||
== OPAL_DATATYPE_FLAG_CONTIGUOUS) && (1 == count) ) { \
|
||||
if( !(convertor->flags & CONVERTOR_WITH_CHECKSUM) && \
|
||||
((convertor->flags & OPAL_DATATYPE_FLAG_NO_GAPS) || \
|
||||
((convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) && (1 == count))) ) { \
|
||||
return OPAL_SUCCESS; \
|
||||
} \
|
||||
} \
|
||||
@ -532,8 +523,9 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
/* For predefined datatypes (contiguous) do nothing more */ \
|
||||
/* if checksum is enabled then always continue */ \
|
||||
if( ((convertor->flags & (CONVERTOR_WITH_CHECKSUM | OPAL_DATATYPE_FLAG_NO_GAPS)) \
|
||||
== OPAL_DATATYPE_FLAG_NO_GAPS) && \
|
||||
(convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { \
|
||||
== OPAL_DATATYPE_FLAG_NO_GAPS) && \
|
||||
((convertor->flags & (CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) == \
|
||||
(CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS)) ) { \
|
||||
return OPAL_SUCCESS; \
|
||||
} \
|
||||
convertor->flags &= ~CONVERTOR_NO_OP; \
|
||||
@ -566,26 +558,24 @@ int32_t opal_convertor_prepare_for_recv( opal_convertor_t* convertor,
|
||||
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
|
||||
|
||||
if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
|
||||
convertor->fAdvance = opal_unpack_general_checksum;
|
||||
} else
|
||||
#endif
|
||||
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
convertor->fAdvance = opal_unpack_homogeneous_contig_checksum;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_unpack_checksum;
|
||||
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
convertor->fAdvance = opal_unpack_homogeneous_contig_checksum;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_unpack_checksum;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
|
||||
convertor->fAdvance = opal_unpack_general;
|
||||
} else
|
||||
#endif
|
||||
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
convertor->fAdvance = opal_unpack_homogeneous_contig;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_unpack;
|
||||
if( convertor->pDesc->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
convertor->fAdvance = opal_unpack_homogeneous_contig;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_unpack;
|
||||
}
|
||||
}
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
@ -605,24 +595,32 @@ int32_t opal_convertor_prepare_for_send( opal_convertor_t* convertor,
|
||||
OPAL_CONVERTOR_PREPARE( convertor, datatype, count, pUserBuf );
|
||||
|
||||
if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) {
|
||||
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
|
||||
|| (1 >= convertor->count) )
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig_checksum;
|
||||
else
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum;
|
||||
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
|
||||
convertor->fAdvance = opal_pack_general_checksum;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_pack_checksum;
|
||||
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
|
||||
|| (1 >= convertor->count) )
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig_checksum;
|
||||
else
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps_checksum;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_pack_checksum;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
|
||||
|| (1 >= convertor->count) )
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig;
|
||||
else
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps;
|
||||
if( !(convertor->flags & CONVERTOR_HOMOGENEOUS) ) {
|
||||
convertor->fAdvance = opal_pack_general;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_pack;
|
||||
if( datatype->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
if( ((datatype->ub - datatype->lb) == (OPAL_PTRDIFF_TYPE)datatype->size)
|
||||
|| (1 >= convertor->count) )
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig;
|
||||
else
|
||||
convertor->fAdvance = opal_pack_homogeneous_contig_with_gaps;
|
||||
} else {
|
||||
convertor->fAdvance = opal_generic_simple_pack;
|
||||
}
|
||||
}
|
||||
}
|
||||
return OPAL_SUCCESS;
|
||||
@ -678,15 +676,33 @@ int opal_convertor_clone( const opal_convertor_t* source,
|
||||
|
||||
void opal_convertor_dump( opal_convertor_t* convertor )
|
||||
{
|
||||
printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor,
|
||||
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted );
|
||||
printf( "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n",
|
||||
(unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,
|
||||
convertor->flags, convertor->stack_size, convertor->partial_length );
|
||||
opal_output( 0, "Convertor %p count %d stack position %d bConverted %ld\n"
|
||||
"\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n"
|
||||
"\tremote_arch %u local_arch %u\n",
|
||||
(void*)convertor,
|
||||
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted,
|
||||
(unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,
|
||||
convertor->flags, convertor->stack_size, convertor->partial_length,
|
||||
convertor->remoteArch, opal_local_arch );
|
||||
if( convertor->flags & CONVERTOR_RECV ) opal_output( 0, "unpack ");
|
||||
if( convertor->flags & CONVERTOR_SEND ) opal_output( 0, "pack ");
|
||||
if( convertor->flags & CONVERTOR_SEND_CONVERSION ) opal_output( 0, "conversion ");
|
||||
if( convertor->flags & CONVERTOR_HOMOGENEOUS ) opal_output( 0, "homogeneous " );
|
||||
else opal_output( 0, "heterogeneous ");
|
||||
if( convertor->flags & CONVERTOR_NO_OP ) opal_output( 0, "no_op ");
|
||||
if( convertor->flags & CONVERTOR_WITH_CHECKSUM ) opal_output( 0, "checksum ");
|
||||
if( convertor->flags & CONVERTOR_CUDA ) opal_output( 0, "CUDA ");
|
||||
if( convertor->flags & CONVERTOR_CUDA_ASYNC ) opal_output( 0, "CUDA Async ");
|
||||
if( convertor->flags & CONVERTOR_COMPLETED ) opal_output( 0, "COMPLETED ");
|
||||
|
||||
opal_datatype_dump( convertor->pDesc );
|
||||
printf( "Actual stack representation\n" );
|
||||
opal_datatype_dump_stack( convertor->pStack, convertor->stack_pos,
|
||||
convertor->pDesc->desc.desc, convertor->pDesc->name );
|
||||
if( !((0 == convertor->stack_pos) &&
|
||||
((size_t)convertor->pStack[convertor->stack_pos].index > convertor->pDesc->desc.length)) ) {
|
||||
/* only if the convertor is completely initialized */
|
||||
opal_output( 0, "Actual stack representation\n" );
|
||||
opal_datatype_dump_stack( convertor->pStack, convertor->stack_pos,
|
||||
convertor->pDesc->desc.desc, convertor->pDesc->name );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -175,9 +175,7 @@ static inline int opal_convertor_cleanup( opal_convertor_t* convertor )
|
||||
*/
|
||||
static inline int32_t opal_convertor_need_buffers( const opal_convertor_t* pConvertor )
|
||||
{
|
||||
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
|
||||
if (OPAL_UNLIKELY(0 == (pConvertor->flags & CONVERTOR_HOMOGENEOUS))) return 1;
|
||||
#endif
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
if( pConvertor->flags & (CONVERTOR_CUDA | CONVERTOR_CUDA_UNIFIED)) return 1;
|
||||
#endif
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -42,10 +42,12 @@
|
||||
#define opal_pack_homogeneous_contig_function opal_pack_homogeneous_contig_checksum
|
||||
#define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps_checksum
|
||||
#define opal_generic_simple_pack_function opal_generic_simple_pack_checksum
|
||||
#define opal_pack_general_function opal_pack_general_checksum
|
||||
#else
|
||||
#define opal_pack_homogeneous_contig_function opal_pack_homogeneous_contig
|
||||
#define opal_pack_homogeneous_contig_with_gaps_function opal_pack_homogeneous_contig_with_gaps
|
||||
#define opal_generic_simple_pack_function opal_generic_simple_pack
|
||||
#define opal_pack_general_function opal_pack_general
|
||||
#endif /* defined(CHECKSUM) */
|
||||
|
||||
|
||||
@ -393,3 +395,199 @@ opal_generic_simple_pack_function( opal_convertor_t* pConvertor,
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remember that the first item in the stack (ie. position 0) is the number
|
||||
* of times the datatype is involved in the operation (ie. the count argument
|
||||
* in the MPI_ call).
|
||||
*/
|
||||
/* Convert data from multiple input buffers (as received from the network layer)
|
||||
* to a contiguous output buffer with a predefined size.
|
||||
* return OPAL_SUCCESS if everything went OK and if there is still room before the complete
|
||||
* conversion of the data (need additional call with others input buffers )
|
||||
* 1 if everything went fine and the data was completly converted
|
||||
* -1 something wrong occurs.
|
||||
*/
|
||||
|
||||
static inline void
|
||||
pack_predefined_heterogeneous( opal_convertor_t* CONVERTOR,
|
||||
const dt_elem_desc_t* ELEM,
|
||||
uint32_t* COUNT,
|
||||
unsigned char** SOURCE,
|
||||
unsigned char** DESTINATION,
|
||||
size_t* SPACE )
|
||||
{
|
||||
uint32_t _count = *(COUNT);
|
||||
size_t _r_blength, _l_blength;
|
||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||
unsigned char* _source = (*SOURCE) + _elem->disp;
|
||||
const opal_convertor_master_t* master = (CONVERTOR)->master;
|
||||
OPAL_PTRDIFF_TYPE advance;
|
||||
|
||||
_r_blength = master->remote_sizes[_elem->common.type];
|
||||
_l_blength = opal_datatype_basicDatatypes[_elem->common.type]->size;
|
||||
if( (_count * _r_blength) > *(SPACE) ) {
|
||||
_count = (uint32_t)(*(SPACE) / _r_blength);
|
||||
if( 0 == _count ) return; /* nothing to do */
|
||||
}
|
||||
|
||||
OPAL_DATATYPE_SAFEGUARD_POINTER( _source, (_count * _elem->extent), (CONVERTOR)->pBaseBuf,
|
||||
(CONVERTOR)->pDesc, (CONVERTOR)->count );
|
||||
DO_DEBUG( opal_output( 0, "pack [l %s r %s] memcpy( %p, %p, %lu ) => space %lu\n",
|
||||
((OPAL_PTRDIFF_TYPE)_l_blength == _elem->extent) ? "cont" : "----",
|
||||
((OPAL_PTRDIFF_TYPE)_r_blength == _elem->extent) ? "cont" : "----",
|
||||
*(DESTINATION), _source, (unsigned long)_r_blength,
|
||||
(unsigned long)(*(SPACE)) ); );
|
||||
master->pFunctions[_elem->common.type]( CONVERTOR, _count,
|
||||
_source, *SPACE, _elem->extent,
|
||||
*DESTINATION, *SPACE, _r_blength,
|
||||
&advance );
|
||||
_r_blength *= _count; /* update the remote length to encompass all the elements */
|
||||
*(SOURCE) += _count * _elem->extent;
|
||||
*(DESTINATION) += _r_blength;
|
||||
*(SPACE) -= _r_blength;
|
||||
*(COUNT) -= _count;
|
||||
}
|
||||
|
||||
int32_t
|
||||
opal_pack_general_function( opal_convertor_t* pConvertor,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
|
||||
size_t total_packed = 0; /* total amount packed this time */
|
||||
dt_elem_desc_t* description;
|
||||
dt_elem_desc_t* pElem;
|
||||
const opal_datatype_t *pData = pConvertor->pDesc;
|
||||
unsigned char *conv_ptr, *iov_ptr;
|
||||
size_t iov_len_local;
|
||||
uint32_t iov_count;
|
||||
int type, rc;
|
||||
const opal_convertor_master_t* master = pConvertor->master;
|
||||
ptrdiff_t advance;
|
||||
|
||||
DO_DEBUG( opal_output( 0, "opal_convertor_general_pack( %p:%p, {%p, %lu}, %d )\n",
|
||||
(void*)pConvertor, (void*)pConvertor->pBaseBuf,
|
||||
iov[0].iov_base, (unsigned long)iov[0].iov_len, *out_size ); );
|
||||
|
||||
description = pConvertor->use_desc->desc;
|
||||
|
||||
/* For the first step we have to add both displacement to the source. After in the
|
||||
* main while loop we will set back the conv_ptr to the correct value. This is
|
||||
* due to the fact that the convertor can stop in the middle of a data with a count
|
||||
*/
|
||||
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
||||
pos_desc = pStack->index;
|
||||
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
||||
count_desc = (uint32_t)pStack->count;
|
||||
pStack--;
|
||||
pConvertor->stack_pos--;
|
||||
pElem = &(description[pos_desc]);
|
||||
|
||||
DO_DEBUG( opal_output( 0, "pack start pos_desc %d count_desc %d disp %ld\n"
|
||||
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||
pos_desc, count_desc, (long)(conv_ptr - pConvertor->pBaseBuf),
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
|
||||
|
||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||
iov_ptr = (unsigned char *) iov[iov_count].iov_base;
|
||||
iov_len_local = iov[iov_count].iov_len;
|
||||
while( 1 ) {
|
||||
while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
|
||||
type = description[pos_desc].elem.common.type;
|
||||
/* now here we have a basic datatype */
|
||||
DO_DEBUG( opal_output( 0, "pack (%p:%ld, %d, %ld) -> (%p, %ld) type %s\n",
|
||||
pConvertor->pBaseBuf, conv_ptr + pElem->elem.disp - pConvertor->pBaseBuf,
|
||||
count_desc, description[pos_desc].elem.extent,
|
||||
iov_ptr, iov_len_local,
|
||||
opal_datatype_basicDatatypes[type]->name ); );
|
||||
|
||||
pack_predefined_heterogeneous( pConvertor, pElem, &count_desc,
|
||||
&conv_ptr, &iov_ptr, &iov_len_local);
|
||||
#if 0
|
||||
PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
|
||||
conv_ptr, iov_ptr, iov_len_local );
|
||||
#endif
|
||||
if( 0 == count_desc ) { /* completed */
|
||||
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
||||
pos_desc++; /* advance to the next data */
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
continue;
|
||||
}
|
||||
goto complete_loop;
|
||||
}
|
||||
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
|
||||
DO_DEBUG( opal_output( 0, "pack end_loop count %d stack_pos %d"
|
||||
" pos_desc %d disp %ld space %lu\n",
|
||||
(int)pStack->count, pConvertor->stack_pos,
|
||||
pos_desc, (long)pStack->disp, (unsigned long)iov_len_local ); );
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
if( 0 == pConvertor->stack_pos ) {
|
||||
/* we lie about the size of the next element in order to
|
||||
* make sure we exit the main loop.
|
||||
*/
|
||||
*out_size = iov_count;
|
||||
goto complete_loop; /* completed */
|
||||
}
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
pos_desc++;
|
||||
} else {
|
||||
pos_desc = pStack->index + 1;
|
||||
if( pStack->index == -1 ) {
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
} else {
|
||||
assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type );
|
||||
pStack->disp += description[pStack->index].loop.extent;
|
||||
}
|
||||
}
|
||||
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
DO_DEBUG( opal_output( 0, "pack new_loop count %d stack_pos %d pos_desc %d count_desc %d disp %ld space %lu\n",
|
||||
(int)pStack->count, pConvertor->stack_pos, pos_desc,
|
||||
count_desc, (long)pStack->disp, (unsigned long)iov_len_local ); );
|
||||
}
|
||||
if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) {
|
||||
OPAL_PTRDIFF_TYPE local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr;
|
||||
#if 0
|
||||
if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
|
||||
PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
|
||||
conv_ptr, iov_ptr, iov_len_local );
|
||||
if( 0 == count_desc ) { /* completed */
|
||||
pos_desc += pElem->loop.items + 1;
|
||||
goto update_loop_description;
|
||||
}
|
||||
/* Save the stack with the correct last_count value. */
|
||||
}
|
||||
#endif /* in a heterogeneous environment we can't handle the contiguous loops */
|
||||
local_disp = (OPAL_PTRDIFF_TYPE)conv_ptr - local_disp;
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_LOOP, count_desc,
|
||||
pStack->disp + local_disp);
|
||||
pos_desc++;
|
||||
update_loop_description: /* update the current state */
|
||||
conv_ptr = pConvertor->pBaseBuf + pStack->disp;
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
complete_loop:
|
||||
iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */
|
||||
total_packed += iov[iov_count].iov_len;
|
||||
}
|
||||
*max_data = total_packed;
|
||||
pConvertor->bConverted += total_packed; /* update the already converted bytes */
|
||||
*out_size = iov_count;
|
||||
if( pConvertor->bConverted == pConvertor->local_size ) {
|
||||
pConvertor->flags |= CONVERTOR_COMPLETED;
|
||||
return 1;
|
||||
}
|
||||
/* Save the global position for the next round */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem->elem.common.type, count_desc,
|
||||
conv_ptr - pConvertor->pBaseBuf );
|
||||
DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (long)pStack->disp ); );
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2016 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
@ -24,6 +24,14 @@ BEGIN_C_DECLS
|
||||
*/
|
||||
|
||||
OPAL_DECLSPEC int32_t
|
||||
opal_pack_general( opal_convertor_t* pConvertor,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data );
|
||||
OPAL_DECLSPEC int32_t
|
||||
opal_pack_general_checksum( opal_convertor_t* pConvertor,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data );
|
||||
OPAL_DECLSPEC int32_t
|
||||
opal_unpack_general( opal_convertor_t* pConvertor,
|
||||
struct iovec* iov, uint32_t* out_size,
|
||||
size_t* max_data );
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user