Correctly deal with partially converted datatypes both
during the unpack and during the positioning. Fixes trac:4610. This commit was SVN r31904. The following Trac tickets were found above: Ticket 4610 --> https://svn.open-mpi.org/trac/ompi/ticket/4610
Этот коммит содержится в:
родитель
843bc4f024
Коммит
6290f6cc58
@ -418,17 +418,19 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
|
|||||||
int32_t rc;
|
int32_t rc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If we plan to rollback the convertor then first we have to set it
|
* create_stack_with_pos_contig always set the position relative to the ZERO
|
||||||
* at the beginning.
|
* position, so there is no need for special handling. In all other cases,
|
||||||
|
* if we plan to rollback the convertor then first we have to reset it at
|
||||||
|
* the beginning.
|
||||||
*/
|
*/
|
||||||
if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {
|
|
||||||
rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
|
|
||||||
if( 0 == (*position) ) return rc;
|
|
||||||
}
|
|
||||||
if( OPAL_LIKELY(convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) {
|
if( OPAL_LIKELY(convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) {
|
||||||
rc = opal_convertor_create_stack_with_pos_contig( convertor, (*position),
|
rc = opal_convertor_create_stack_with_pos_contig( convertor, (*position),
|
||||||
opal_datatype_local_sizes );
|
opal_datatype_local_sizes );
|
||||||
} else {
|
} else {
|
||||||
|
if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {
|
||||||
|
rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
|
||||||
|
if( 0 == (*position) ) return rc;
|
||||||
|
}
|
||||||
rc = opal_convertor_generic_simple_position( convertor, position );
|
rc = opal_convertor_generic_simple_position( convertor, position );
|
||||||
/**
|
/**
|
||||||
* If we have a non-contigous send convertor don't allow it move in the middle
|
* If we have a non-contigous send convertor don't allow it move in the middle
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||||
* University Research and Technology
|
* University Research and Technology
|
||||||
* Corporation. All rights reserved.
|
* Corporation. All rights reserved.
|
||||||
* Copyright (c) 2004-2009 The University of Tennessee and The University
|
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||||
* of Tennessee Research Foundation. All rights
|
* of Tennessee Research Foundation. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
|
||||||
@ -12,6 +12,8 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
|
||||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||||
|
* Copyright (c) 2014 Research Organization for Information Science
|
||||||
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -115,10 +117,11 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
|||||||
OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb;
|
OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb;
|
||||||
|
|
||||||
DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position );
|
DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position );
|
||||||
|
assert(*position > pConvertor->bConverted);
|
||||||
|
|
||||||
/* We dont want to have to parse the datatype multiple times. What we are interested in
|
/* We dont want to have to parse the datatype multiple times. What we are interested in
|
||||||
* here is to compute the number of completed datatypes that we can move forward, update
|
* here is to compute the number of completed datatypes that we can move forward, update
|
||||||
* the the counters and finally compute the position taking in account only the remaining
|
* the counters and finally compute the position taking in account only the remaining
|
||||||
* elements. The only problem is that we have to modify all the elements on the stack.
|
* elements. The only problem is that we have to modify all the elements on the stack.
|
||||||
*/
|
*/
|
||||||
iov_len_local = *position - pConvertor->bConverted;
|
iov_len_local = *position - pConvertor->bConverted;
|
||||||
@ -129,11 +132,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
|||||||
" iov_len_local %lu count_desc %d\n",
|
" iov_len_local %lu count_desc %d\n",
|
||||||
(unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size,
|
(unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size,
|
||||||
(unsigned long)iov_len_local, count_desc ); );
|
(unsigned long)iov_len_local, count_desc ); );
|
||||||
/**
|
/* Update all the stack including the last one */
|
||||||
* Update all the stack except the last one which is supposed to be for
|
for( type = 0; type <= pConvertor->stack_pos; type++ )
|
||||||
* the last partial element description.
|
|
||||||
*/
|
|
||||||
for( type = 0; type < pConvertor->stack_pos; type++ )
|
|
||||||
pStack[type].disp += count_desc * extent;
|
pStack[type].disp += count_desc * extent;
|
||||||
pConvertor->bConverted += count_desc * pConvertor->pDesc->size;
|
pConvertor->bConverted += count_desc * pConvertor->pDesc->size;
|
||||||
iov_len_local = *position - pConvertor->bConverted;
|
iov_len_local = *position - pConvertor->bConverted;
|
||||||
@ -149,13 +149,27 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
|||||||
pStack--;
|
pStack--;
|
||||||
pConvertor->stack_pos--;
|
pConvertor->stack_pos--;
|
||||||
pElem = &(description[pos_desc]);
|
pElem = &(description[pos_desc]);
|
||||||
base_pointer += pStack->disp;
|
|
||||||
|
|
||||||
DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %d disp %llx\n"
|
DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %d disp %llx\n"
|
||||||
"stack_pos %d pos_desc %d count_desc %d disp %llx\n",
|
"stack_pos %d pos_desc %d count_desc %d disp %llx\n",
|
||||||
pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf),
|
pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf),
|
||||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
|
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
|
||||||
|
/* Last data has been only partially converted. Compute the relative position */
|
||||||
|
if( 0 != pConvertor->partial_length ) {
|
||||||
|
size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
|
||||||
|
size_t missing_length = element_length - pConvertor->partial_length;
|
||||||
|
if( missing_length >= iov_len_local ) {
|
||||||
|
pConvertor->partial_length = (pConvertor->partial_length + iov_len_local) % element_length;
|
||||||
|
pConvertor->bConverted += iov_len_local;
|
||||||
|
assert(pConvertor->partial_length < element_length);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
pConvertor->partial_length = (pConvertor->partial_length + missing_length) % element_length;
|
||||||
|
assert(pConvertor->partial_length == 0);
|
||||||
|
pConvertor->bConverted += missing_length;
|
||||||
|
iov_len_local -= missing_length;
|
||||||
|
count_desc--;
|
||||||
|
}
|
||||||
while( 1 ) {
|
while( 1 ) {
|
||||||
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
|
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
|
||||||
DO_DEBUG( opal_output( 0, "position end_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n",
|
DO_DEBUG( opal_output( 0, "position end_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n",
|
||||||
@ -232,7 +246,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
|
|||||||
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
|
||||||
/* I complete an element, next step I should go to the next one */
|
/* I complete an element, next step I should go to the next one */
|
||||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
|
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
|
||||||
base_pointer - pStack->disp - pConvertor->pBaseBuf );
|
base_pointer - pConvertor->pBaseBuf );
|
||||||
DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n",
|
DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n",
|
||||||
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
|
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -166,23 +166,22 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
|
|||||||
static inline uint32_t
|
static inline uint32_t
|
||||||
opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem,
|
opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem,
|
||||||
unsigned char* partial_data,
|
unsigned char* partial_data,
|
||||||
OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE end_position,
|
OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE length,
|
||||||
unsigned char** user_buffer )
|
unsigned char** user_buffer )
|
||||||
{
|
{
|
||||||
char unused_byte = 0x7F, saved_data[16];
|
char unused_byte = 0x7F, saved_data[16];
|
||||||
unsigned char temporary[16], *temporary_buffer = temporary;
|
unsigned char temporary[16], *temporary_buffer = temporary;
|
||||||
unsigned char* real_data = *user_buffer + pElem->elem.disp;
|
unsigned char* user_data = *user_buffer + pElem->elem.disp;
|
||||||
uint32_t i, length, count_desc = 1;
|
uint32_t i, count_desc = 1;
|
||||||
size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
|
size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
|
||||||
|
|
||||||
DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n"
|
DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n"
|
||||||
"\tbConverted %lu total_length %lu count %d\n",
|
"\tbConverted %lu total_length %lu count %d\n",
|
||||||
(unsigned long)start_position, (unsigned long)end_position, (unsigned long)data_length, *user_buffer,
|
(unsigned long)start_position, (unsigned long)start_position + length, (unsigned long)data_length, *user_buffer,
|
||||||
(unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); );
|
(unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); );
|
||||||
|
|
||||||
/* Find a byte that is not used in the partial buffer */
|
/* Find a byte that is not used in the partial buffer */
|
||||||
find_unused_byte:
|
find_unused_byte:
|
||||||
length = (uint32_t)(end_position - start_position);
|
|
||||||
for( i = 0; i < length; i++ ) {
|
for( i = 0; i < length; i++ ) {
|
||||||
if( unused_byte == partial_data[i] ) {
|
if( unused_byte == partial_data[i] ) {
|
||||||
unused_byte--;
|
unused_byte--;
|
||||||
@ -192,17 +191,16 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
|
|||||||
|
|
||||||
/* Copy and fill the rest of the buffer with the unused byte */
|
/* Copy and fill the rest of the buffer with the unused byte */
|
||||||
memset( temporary, unused_byte, data_length );
|
memset( temporary, unused_byte, data_length );
|
||||||
MEMCPY( temporary + start_position, partial_data, (end_position - start_position) );
|
MEMCPY( temporary + start_position, partial_data, length );
|
||||||
|
|
||||||
#if OPAL_CUDA_SUPPORT
|
#if OPAL_CUDA_SUPPORT
|
||||||
/* In the case where the data is being unpacked from device
|
/* In the case where the data is being unpacked from device memory, need to
|
||||||
* memory, need to use the special host to device memory copy.
|
* use the special host to device memory copy. Note this code path was only
|
||||||
* Note this code path was only seen on large receives of
|
* seen on large receives of noncontiguous data via buffered sends. */
|
||||||
* noncontiguous data via buffered sends. */
|
pConvertor->cbmemcpy(saved_data, user_data, data_length, pConvertor );
|
||||||
pConvertor->cbmemcpy(saved_data, real_data, data_length, pConvertor );
|
|
||||||
#else
|
#else
|
||||||
/* Save the content of the user memory */
|
/* Save the content of the user memory */
|
||||||
MEMCPY( saved_data, real_data, data_length );
|
MEMCPY( saved_data, user_data, data_length );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Then unpack the data into the user memory */
|
/* Then unpack the data into the user memory */
|
||||||
@ -216,22 +214,22 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
|
|||||||
* buffer back into the user memory.
|
* buffer back into the user memory.
|
||||||
*/
|
*/
|
||||||
#if OPAL_CUDA_SUPPORT
|
#if OPAL_CUDA_SUPPORT
|
||||||
/* Need to copy the modified real_data again so we can see which
|
/* Need to copy the modified user_data again so we can see which
|
||||||
* bytes need to be converted back to their original values. Note
|
* bytes need to be converted back to their original values. Note
|
||||||
* this code path was only seen on large receives of noncontiguous
|
* this code path was only seen on large receives of noncontiguous
|
||||||
* data via buffered sends. */
|
* data via buffered sends. */
|
||||||
{
|
{
|
||||||
char resaved_data[16];
|
char resaved_data[16];
|
||||||
pConvertor->cbmemcpy(resaved_data, real_data, data_length, pConvertor );
|
pConvertor->cbmemcpy(resaved_data, user_data, data_length, pConvertor );
|
||||||
for( i = 0; i < data_length; i++ ) {
|
for( i = 0; i < data_length; i++ ) {
|
||||||
if( unused_byte == resaved_data[i] )
|
if( unused_byte == resaved_data[i] )
|
||||||
pConvertor->cbmemcpy(&real_data[i], &saved_data[i], 1, pConvertor);
|
pConvertor->cbmemcpy(&user_data[i], &saved_data[i], 1, pConvertor);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
for( i = 0; i < data_length; i++ ) {
|
for( i = 0; i < data_length; i++ ) {
|
||||||
if( unused_byte == real_data[i] )
|
if( unused_byte == user_data[i] )
|
||||||
real_data[i] = saved_data[i];
|
user_data[i] = saved_data[i];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
@ -296,7 +294,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor,
|
|||||||
COMPUTE_CSUM( iov_ptr, missing_length, pConvertor );
|
COMPUTE_CSUM( iov_ptr, missing_length, pConvertor );
|
||||||
opal_unpack_partial_datatype( pConvertor, pElem,
|
opal_unpack_partial_datatype( pConvertor, pElem,
|
||||||
iov_ptr,
|
iov_ptr,
|
||||||
pConvertor->partial_length, element_length,
|
pConvertor->partial_length, element_length - pConvertor->partial_length,
|
||||||
&conv_ptr );
|
&conv_ptr );
|
||||||
--count_desc;
|
--count_desc;
|
||||||
if( 0 == count_desc ) {
|
if( 0 == count_desc ) {
|
||||||
@ -304,7 +302,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor,
|
|||||||
pos_desc++; /* advance to the next data */
|
pos_desc++; /* advance to the next data */
|
||||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||||
}
|
}
|
||||||
iov_ptr += missing_length;
|
iov_ptr += missing_length;
|
||||||
iov_len_local -= missing_length;
|
iov_len_local -= missing_length;
|
||||||
pConvertor->partial_length = 0; /* nothing more inside */
|
pConvertor->partial_length = 0; /* nothing more inside */
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user