Correctly deal with partially converted datatypes both
during the unpack and during the positioning. Fixes trac:4610. This commit was SVN r31904. The following Trac tickets were found above: Ticket 4610 --> https://svn.open-mpi.org/trac/ompi/ticket/4610
Этот коммит содержится в:
родитель
843bc4f024
Коммит
6290f6cc58
@ -418,17 +418,19 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
||||
int32_t rc;
|
||||
|
||||
/**
|
||||
* If we plan to rollback the convertor then first we have to set it
|
||||
* at the beginning.
|
||||
* create_stack_with_pos_contig always set the position relative to the ZERO
|
||||
* position, so there is no need for special handling. In all other cases,
|
||||
* if we plan to rollback the convertor then first we have to reset it at
|
||||
* the beginning.
|
||||
*/
|
||||
if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {
|
||||
rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
|
||||
if( 0 == (*position) ) return rc;
|
||||
}
|
||||
if( OPAL_LIKELY(convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) {
|
||||
rc = opal_convertor_create_stack_with_pos_contig( convertor, (*position),
|
||||
opal_datatype_local_sizes );
|
||||
} else {
|
||||
if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {
|
||||
rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
|
||||
if( 0 == (*position) ) return rc;
|
||||
}
|
||||
rc = opal_convertor_generic_simple_position( convertor, position );
|
||||
/**
|
||||
* If we have a non-contigous send convertor don't allow it move in the middle
|
||||
|
@ -3,7 +3,7 @@
|
||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||
@ -12,6 +12,8 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -115,10 +117,11 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb;
|
||||
|
||||
DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position );
|
||||
assert(*position > pConvertor->bConverted);
|
||||
|
||||
/* We dont want to have to parse the datatype multiple times. What we are interested in
|
||||
* here is to compute the number of completed datatypes that we can move forward, update
|
||||
* the the counters and finally compute the position taking in account only the remaining
|
||||
* the counters and finally compute the position taking in account only the remaining
|
||||
* elements. The only problem is that we have to modify all the elements on the stack.
|
||||
*/
|
||||
iov_len_local = *position - pConvertor->bConverted;
|
||||
@ -129,11 +132,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
" iov_len_local %lu count_desc %d\n",
|
||||
(unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size,
|
||||
(unsigned long)iov_len_local, count_desc ); );
|
||||
/**
|
||||
* Update all the stack except the last one which is supposed to be for
|
||||
* the last partial element description.
|
||||
*/
|
||||
for( type = 0; type < pConvertor->stack_pos; type++ )
|
||||
/* Update all the stack including the last one */
|
||||
for( type = 0; type <= pConvertor->stack_pos; type++ )
|
||||
pStack[type].disp += count_desc * extent;
|
||||
pConvertor->bConverted += count_desc * pConvertor->pDesc->size;
|
||||
iov_len_local = *position - pConvertor->bConverted;
|
||||
@ -149,13 +149,27 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
pStack--;
|
||||
pConvertor->stack_pos--;
|
||||
pElem = &(description[pos_desc]);
|
||||
base_pointer += pStack->disp;
|
||||
|
||||
DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %d disp %llx\n"
|
||||
"stack_pos %d pos_desc %d count_desc %d disp %llx\n",
|
||||
pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf),
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
|
||||
|
||||
/* Last data has been only partially converted. Compute the relative position */
|
||||
if( 0 != pConvertor->partial_length ) {
|
||||
size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
|
||||
size_t missing_length = element_length - pConvertor->partial_length;
|
||||
if( missing_length >= iov_len_local ) {
|
||||
pConvertor->partial_length = (pConvertor->partial_length + iov_len_local) % element_length;
|
||||
pConvertor->bConverted += iov_len_local;
|
||||
assert(pConvertor->partial_length < element_length);
|
||||
return 0;
|
||||
}
|
||||
pConvertor->partial_length = (pConvertor->partial_length + missing_length) % element_length;
|
||||
assert(pConvertor->partial_length == 0);
|
||||
pConvertor->bConverted += missing_length;
|
||||
iov_len_local -= missing_length;
|
||||
count_desc--;
|
||||
}
|
||||
while( 1 ) {
|
||||
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
|
||||
DO_DEBUG( opal_output( 0, "position end_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n",
|
||||
@ -232,7 +246,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
||||
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
||||
/* I complete an element, next step I should go to the next one */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
|
||||
base_pointer - pStack->disp - pConvertor->pBaseBuf );
|
||||
base_pointer - pConvertor->pBaseBuf );
|
||||
DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n",
|
||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
|
||||
return 0;
|
||||
|
@ -166,23 +166,22 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
||||
static inline uint32_t
|
||||
opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem,
|
||||
unsigned char* partial_data,
|
||||
OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE end_position,
|
||||
OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE length,
|
||||
unsigned char** user_buffer )
|
||||
{
|
||||
char unused_byte = 0x7F, saved_data[16];
|
||||
unsigned char temporary[16], *temporary_buffer = temporary;
|
||||
unsigned char* real_data = *user_buffer + pElem->elem.disp;
|
||||
uint32_t i, length, count_desc = 1;
|
||||
unsigned char* user_data = *user_buffer + pElem->elem.disp;
|
||||
uint32_t i, count_desc = 1;
|
||||
size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
|
||||
|
||||
DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n"
|
||||
"\tbConverted %lu total_length %lu count %d\n",
|
||||
(unsigned long)start_position, (unsigned long)end_position, (unsigned long)data_length, *user_buffer,
|
||||
(unsigned long)start_position, (unsigned long)start_position + length, (unsigned long)data_length, *user_buffer,
|
||||
(unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); );
|
||||
|
||||
/* Find a byte that is not used in the partial buffer */
|
||||
find_unused_byte:
|
||||
length = (uint32_t)(end_position - start_position);
|
||||
for( i = 0; i < length; i++ ) {
|
||||
if( unused_byte == partial_data[i] ) {
|
||||
unused_byte--;
|
||||
@ -192,17 +191,16 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
|
||||
|
||||
/* Copy and fill the rest of the buffer with the unused byte */
|
||||
memset( temporary, unused_byte, data_length );
|
||||
MEMCPY( temporary + start_position, partial_data, (end_position - start_position) );
|
||||
MEMCPY( temporary + start_position, partial_data, length );
|
||||
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* In the case where the data is being unpacked from device
|
||||
* memory, need to use the special host to device memory copy.
|
||||
* Note this code path was only seen on large receives of
|
||||
* noncontiguous data via buffered sends. */
|
||||
pConvertor->cbmemcpy(saved_data, real_data, data_length, pConvertor );
|
||||
/* In the case where the data is being unpacked from device memory, need to
|
||||
* use the special host to device memory copy. Note this code path was only
|
||||
* seen on large receives of noncontiguous data via buffered sends. */
|
||||
pConvertor->cbmemcpy(saved_data, user_data, data_length, pConvertor );
|
||||
#else
|
||||
/* Save the content of the user memory */
|
||||
MEMCPY( saved_data, real_data, data_length );
|
||||
MEMCPY( saved_data, user_data, data_length );
|
||||
#endif
|
||||
|
||||
/* Then unpack the data into the user memory */
|
||||
@ -216,22 +214,22 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
|
||||
* buffer back into the user memory.
|
||||
*/
|
||||
#if OPAL_CUDA_SUPPORT
|
||||
/* Need to copy the modified real_data again so we can see which
|
||||
/* Need to copy the modified user_data again so we can see which
|
||||
* bytes need to be converted back to their original values. Note
|
||||
* this code path was only seen on large receives of noncontiguous
|
||||
* data via buffered sends. */
|
||||
{
|
||||
char resaved_data[16];
|
||||
pConvertor->cbmemcpy(resaved_data, real_data, data_length, pConvertor );
|
||||
pConvertor->cbmemcpy(resaved_data, user_data, data_length, pConvertor );
|
||||
for( i = 0; i < data_length; i++ ) {
|
||||
if( unused_byte == resaved_data[i] )
|
||||
pConvertor->cbmemcpy(&real_data[i], &saved_data[i], 1, pConvertor);
|
||||
pConvertor->cbmemcpy(&user_data[i], &saved_data[i], 1, pConvertor);
|
||||
}
|
||||
}
|
||||
#else
|
||||
for( i = 0; i < data_length; i++ ) {
|
||||
if( unused_byte == real_data[i] )
|
||||
real_data[i] = saved_data[i];
|
||||
if( unused_byte == user_data[i] )
|
||||
user_data[i] = saved_data[i];
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
@ -296,7 +294,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor,
|
||||
COMPUTE_CSUM( iov_ptr, missing_length, pConvertor );
|
||||
opal_unpack_partial_datatype( pConvertor, pElem,
|
||||
iov_ptr,
|
||||
pConvertor->partial_length, element_length,
|
||||
pConvertor->partial_length, element_length - pConvertor->partial_length,
|
||||
&conv_ptr );
|
||||
--count_desc;
|
||||
if( 0 == count_desc ) {
|
||||
@ -304,7 +302,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor,
|
||||
pos_desc++; /* advance to the next data */
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
}
|
||||
iov_ptr += missing_length;
|
||||
iov_ptr += missing_length;
|
||||
iov_len_local -= missing_length;
|
||||
pConvertor->partial_length = 0; /* nothing more inside */
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user