1
1

Get rid of the storage in the convertor. It wasn't working as expected in all

the cases. Instead replace it with a better solution, which work even for
fragments received not in order. However, this solution work only on the
current supported modes in ompi (homogeneous & heterogeneous with endianess).

The method is tricky. We will rely on 2 partial unpacks. First we will find
a byte that is not on the data to unpack, and we will pad the data with this
byte. Once we have the full length as expected, we will unpack the data, and
all the bytes in the unpacked form which do not match the unused byte will be
copied into the user buffer. This way we will reconstruct the unpacked data
in 2 times, once for the begining and once for the end.

This commit was SVN r10270.
Этот коммит содержится в:
George Bosilca 2006-06-08 23:35:07 +00:00
родитель 958a2b0863
Коммит 272ef9f412
4 изменённых файлов: 73 добавлений и 62 удалений

Просмотреть файл

@ -39,7 +39,7 @@ static void ompi_convertor_construct( ompi_convertor_t* convertor )
{
convertor->pStack = convertor->static_stack;
convertor->stack_size = DT_STATIC_STACK_SIZE;
convertor->storage.length = 0;
convertor->partial_length = 0;
}
static void ompi_convertor_destruct( ompi_convertor_t* convertor )
@ -318,7 +318,7 @@ int ompi_convertor_create_stack_at_begining( ompi_convertor_t* convertor,
dt_elem_desc_t* pElems;
convertor->stack_pos = 1;
convertor->storage.length = 0;
convertor->partial_length = 0;
convertor->bConverted = 0;
/* Fill the first position on the stack. This one correspond to the
* last fake DT_END_LOOP that we add to the data representation and
@ -573,7 +573,7 @@ void ompi_convertor_dump( ompi_convertor_t* convertor )
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted );
printf( "\tlocal_size %ld remote_size %ld flags %X stack_size %d pending_length %d\n",
(unsigned long)convertor->local_size, (unsigned long)convertor->remote_size,
convertor->flags, convertor->stack_size, convertor->storage.length );
convertor->flags, convertor->stack_size, convertor->partial_length );
ompi_ddt_dump( convertor->pDesc );
printf( "Actual stack representation\n" );
ompi_ddt_dump_stack( convertor->pStack, convertor->stack_pos,

Просмотреть файл

@ -75,10 +75,6 @@ typedef struct dt_stack {
/**
*
*/
typedef struct {
char data[16];
uint32_t length;
} ompi_convertor_storage_t;
#define DT_STATIC_STACK_SIZE 5
@ -101,10 +97,10 @@ struct ompi_convertor_t {
/* All others fields get modified for every call to pack/unpack functions */
uint32_t stack_pos; /**< the actual position on the stack */
size_t bConverted; /**< # of bytes already converted */
uint32_t partial_length; /**< amount of data left over from the last unpack */
uint32_t checksum; /**< checksum computed by pack/unpack operation */
uint32_t csum_ui1; /**< partial checksum computed by pack/unpack operation */
uint32_t csum_ui2; /**< partial checksum computed by pack/unpack operation */
ompi_convertor_storage_t storage; /**< pending data from the last conversion */
dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */
};
OBJ_CLASS_DECLARATION( ompi_convertor_t );
@ -123,47 +119,6 @@ ompi_convertor_get_checksum( ompi_convertor_t* convertor )
return convertor->checksum;
}
/**
* Export the partially converted data to an outside entity.
*/
static inline int32_t ompi_convertor_export_storage( const ompi_convertor_t* convertor,
ompi_convertor_storage_t* storage )
{
/* The storage has a meaning only for receive side. */
assert( convertor->flags & CONVERTOR_RECV );
storage->length = convertor->storage.length;
assert( storage->length < 16 ); /* that's the maximum data length */
if( 0 != convertor->storage.length ) {
memcpy( storage->data, convertor->storage.data, storage->length );
}
return storage->length;
}
/**
* Import partially unpacked data back in the convertor, in order to use it
* on the next unpack operation.
*/
static inline int32_t ompi_convertor_import_storage( ompi_convertor_t* convertor,
const ompi_convertor_storage_t* storage )
{
/* The storage has a meaning only for receive side. */
assert( convertor->flags & CONVERTOR_RECV );
convertor->storage.length = storage->length;
assert( storage->length < 16 ); /* that's the maximum data length */
if( 0 != storage->length ) {
memcpy( convertor->storage.data, storage->data, storage->length );
}
return storage->length;
}
/**
* Reset the pending data attached to the convertor by reseting the length.
*/
static inline void ompi_convertor_reset_storage( ompi_convertor_t* convertor )
{
convertor->storage.length = 0;
}
/*
*
*/

Просмотреть файл

@ -253,6 +253,47 @@ ompi_unpack_homogeneous_contig_function( ompi_convertor_t* pConv,
return 0;
}
static inline uint32_t
ompi_unpack_partial_datatype( ompi_convertor_t* pConvertor, dt_elem_desc_t* pElem,
char* partial_data,
uint32_t start_position, uint32_t end_position,
char* user_buffer )
{
char unused_byte = 0x7F;
char unpacked_buffer[16], *unpacked_data = unpacked_buffer;
uint32_t i, count_desc = 1;
uint32_t data_length = ompi_ddt_basicDatatypes[pElem->elem.common.type]->size;
/* First find a byte that is not used in the partial buffer */
find_unused_byte:
for( i = start_position; i < end_position; i++ ) {
if( unused_byte == partial_data[i] ) {
unused_byte--;
goto find_unused_byte;
}
}
/* Fill the rest of the buffer with the unused byte */
for( i = 0; i < start_position; i++ )
partial_data[i] = unused_byte;
for( i = end_position; i < data_length; i++ )
partial_data[i] = unused_byte;
/* Then unpack it */
UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
partial_data, unpacked_data, data_length );
/* For every occurence of something different than the unused byte
* move it from the unpacked buffer into the user memory.
*/
for( i = 0; i < data_length; i++ ) {
if( unused_byte != unpacked_data[i] ) {
user_buffer[i] = unpacked_data[i];
}
}
return count_desc;
}
/* The pack/unpack functions need a cleanup. I have to create a proper interface to access
* all basic functionalities, hence using them as basic blocks for all conversion functions.
*
@ -307,20 +348,23 @@ ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
packed_buffer = iov[iov_count].iov_base;
iov_len_local = iov[iov_count].iov_len;
if( 0 != pConvertor->storage.length ) {
if( 0 != pConvertor->partial_length ) {
char partial_buffer[16], *partial_data = partial_buffer;
uint32_t element_length = ompi_ddt_basicDatatypes[pElem->elem.common.type]->size;
uint32_t missing_length = element_length - pConvertor->storage.length;
uint32_t missing_length = element_length - pConvertor->partial_length;
uint32_t count;
assert( pElem->elem.common.flags & DT_FLAG_DATA );
#if defined(CHECKSUM)
pConvertor->checksum -= OPAL_CSUM(pConvertor->storage.data, pConvertor->storage.length);
pConvertor->checksum -= OPAL_CSUM(user_memory_base, pConvertor->partial_length);
#endif
memcpy(pConvertor->storage.data + pConvertor->storage.length, packed_buffer, missing_length);
packed_buffer = pConvertor->storage.data;
DO_DEBUG( opal_output( 0, "unpack pending from the last unpack %d out of %d bytes\n",
pConvertor->storage.length, ompi_ddt_basicDatatypes[pElem->elem.common.type]->size ); );
UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
packed_buffer, user_memory_base, element_length );
memcpy( partial_data + pConvertor->partial_length, packed_buffer, missing_length);
count = ompi_unpack_partial_datatype( pConvertor, pElem,
partial_data,
pConvertor->partial_length, element_length,
user_memory_base );
count_desc -= count;
if( 0 == count_desc ) {
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
pos_desc++; /* advance to the next data */
@ -330,7 +374,7 @@ ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
packed_buffer = (char*)iov[iov_count].iov_base + missing_length;
iov_len_local -= missing_length;
pConvertor->bConverted += element_length;
pConvertor->storage.length = 0; /* nothing more inside */
pConvertor->partial_length = 0; /* nothing more inside */
}
while( 1 ) {
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
@ -346,13 +390,21 @@ ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
type = pElem->elem.common.type;
assert( type < DT_MAX_PREDEFINED );
if( 0 != iov_len_local ) {
char partial_data[16];
/* We have some partial data here. Let's copy it into the convertor
* and keep it hot until the next round.
*/
assert( iov_len_local < ompi_ddt_basicDatatypes[type]->size );
MEMCPY_CSUM( pConvertor->storage.data, packed_buffer, iov_len_local, pConvertor );
DO_DEBUG( opal_output( 0, "Saving %d bytes for the next call\n", iov_len_local ); );
pConvertor->storage.length = iov_len_local;
MEMCPY_CSUM( partial_data, packed_buffer, iov_len_local, pConvertor );
/*opal_output( 0, "Saving %d bytes for the next call at address %p\n",
iov_len_local, user_memory_base );*/
ompi_unpack_partial_datatype( pConvertor, pElem,
partial_data, 0, iov_len_local,
user_memory_base );
pConvertor->partial_length = iov_len_local;
iov_len_local = 0;
}
goto complete_loop;

Просмотреть файл

@ -200,6 +200,10 @@ int ompi_convertor_generic_simple_position( ompi_convertor_t* pConvertor,
}
}
complete_loop:
if( 0 != pConvertor->partial_length )
assert( iov_len_local == pConvertor->partial_length );
pConvertor->partial_length = iov_len_local;
(*position) -= iov_len_local;
pConvertor->bConverted = *position; /* update the already converted bytes */