1
1

Correctly deal with partially converted datatypes both

during the unpack and during the positioning.

Fixes trac:4610.

This commit was SVN r31904.

The following Trac tickets were found above:
  Ticket 4610 --> https://svn.open-mpi.org/trac/ompi/ticket/4610
Этот коммит содержится в:
George Bosilca 2014-05-29 21:53:44 +00:00
родитель 843bc4f024
Коммит 6290f6cc58
3 изменённых файлов: 49 добавлений и 35 удалений

Просмотреть файл

@ -418,17 +418,19 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor,
int32_t rc; int32_t rc;
/** /**
* If we plan to rollback the convertor then first we have to set it * create_stack_with_pos_contig always set the position relative to the ZERO
* at the beginning. * position, so there is no need for special handling. In all other cases,
* if we plan to rollback the convertor then first we have to reset it at
* the beginning.
*/ */
if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {
rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
if( 0 == (*position) ) return rc;
}
if( OPAL_LIKELY(convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) { if( OPAL_LIKELY(convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) {
rc = opal_convertor_create_stack_with_pos_contig( convertor, (*position), rc = opal_convertor_create_stack_with_pos_contig( convertor, (*position),
opal_datatype_local_sizes ); opal_datatype_local_sizes );
} else { } else {
if( (0 == (*position)) || ((*position) < convertor->bConverted) ) {
rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes );
if( 0 == (*position) ) return rc;
}
rc = opal_convertor_generic_simple_position( convertor, position ); rc = opal_convertor_generic_simple_position( convertor, position );
/** /**
* If we have a non-contigous send convertor don't allow it move in the middle * If we have a non-contigous send convertor don't allow it move in the middle

Просмотреть файл

@ -3,7 +3,7 @@
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
* University Research and Technology * University Research and Technology
* Corporation. All rights reserved. * Corporation. All rights reserved.
* Copyright (c) 2004-2009 The University of Tennessee and The University * Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights * of Tennessee Research Foundation. All rights
* reserved. * reserved.
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@ -12,6 +12,8 @@
* All rights reserved. * All rights reserved.
* Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -115,10 +117,11 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb;
DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position ); DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position );
assert(*position > pConvertor->bConverted);
/* We dont want to have to parse the datatype multiple times. What we are interested in /* We dont want to have to parse the datatype multiple times. What we are interested in
* here is to compute the number of completed datatypes that we can move forward, update * here is to compute the number of completed datatypes that we can move forward, update
* the the counters and finally compute the position taking in account only the remaining * the counters and finally compute the position taking in account only the remaining
* elements. The only problem is that we have to modify all the elements on the stack. * elements. The only problem is that we have to modify all the elements on the stack.
*/ */
iov_len_local = *position - pConvertor->bConverted; iov_len_local = *position - pConvertor->bConverted;
@ -129,11 +132,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
" iov_len_local %lu count_desc %d\n", " iov_len_local %lu count_desc %d\n",
(unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size, (unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size,
(unsigned long)iov_len_local, count_desc ); ); (unsigned long)iov_len_local, count_desc ); );
/** /* Update all the stack including the last one */
* Update all the stack except the last one which is supposed to be for for( type = 0; type <= pConvertor->stack_pos; type++ )
* the last partial element description.
*/
for( type = 0; type < pConvertor->stack_pos; type++ )
pStack[type].disp += count_desc * extent; pStack[type].disp += count_desc * extent;
pConvertor->bConverted += count_desc * pConvertor->pDesc->size; pConvertor->bConverted += count_desc * pConvertor->pDesc->size;
iov_len_local = *position - pConvertor->bConverted; iov_len_local = *position - pConvertor->bConverted;
@ -149,13 +149,27 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
pStack--; pStack--;
pConvertor->stack_pos--; pConvertor->stack_pos--;
pElem = &(description[pos_desc]); pElem = &(description[pos_desc]);
base_pointer += pStack->disp;
DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %d disp %llx\n" DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %d disp %llx\n"
"stack_pos %d pos_desc %d count_desc %d disp %llx\n", "stack_pos %d pos_desc %d count_desc %d disp %llx\n",
pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf), pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf),
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); ); pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
/* Last data has been only partially converted. Compute the relative position */
if( 0 != pConvertor->partial_length ) {
size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
size_t missing_length = element_length - pConvertor->partial_length;
if( missing_length >= iov_len_local ) {
pConvertor->partial_length = (pConvertor->partial_length + iov_len_local) % element_length;
pConvertor->bConverted += iov_len_local;
assert(pConvertor->partial_length < element_length);
return 0;
}
pConvertor->partial_length = (pConvertor->partial_length + missing_length) % element_length;
assert(pConvertor->partial_length == 0);
pConvertor->bConverted += missing_length;
iov_len_local -= missing_length;
count_desc--;
}
while( 1 ) { while( 1 ) {
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
DO_DEBUG( opal_output( 0, "position end_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n", DO_DEBUG( opal_output( 0, "position end_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n",
@ -232,7 +246,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) { if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
/* I complete an element, next step I should go to the next one */ /* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc, PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc,
base_pointer - pStack->disp - pConvertor->pBaseBuf ); base_pointer - pConvertor->pBaseBuf );
DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n", DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n",
pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); ); pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); );
return 0; return 0;

Просмотреть файл

@ -166,23 +166,22 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
static inline uint32_t static inline uint32_t
opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem, opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem,
unsigned char* partial_data, unsigned char* partial_data,
OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE end_position, OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE length,
unsigned char** user_buffer ) unsigned char** user_buffer )
{ {
char unused_byte = 0x7F, saved_data[16]; char unused_byte = 0x7F, saved_data[16];
unsigned char temporary[16], *temporary_buffer = temporary; unsigned char temporary[16], *temporary_buffer = temporary;
unsigned char* real_data = *user_buffer + pElem->elem.disp; unsigned char* user_data = *user_buffer + pElem->elem.disp;
uint32_t i, length, count_desc = 1; uint32_t i, count_desc = 1;
size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size;
DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n" DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n"
"\tbConverted %lu total_length %lu count %d\n", "\tbConverted %lu total_length %lu count %d\n",
(unsigned long)start_position, (unsigned long)end_position, (unsigned long)data_length, *user_buffer, (unsigned long)start_position, (unsigned long)start_position + length, (unsigned long)data_length, *user_buffer,
(unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); ); (unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); );
/* Find a byte that is not used in the partial buffer */ /* Find a byte that is not used in the partial buffer */
find_unused_byte: find_unused_byte:
length = (uint32_t)(end_position - start_position);
for( i = 0; i < length; i++ ) { for( i = 0; i < length; i++ ) {
if( unused_byte == partial_data[i] ) { if( unused_byte == partial_data[i] ) {
unused_byte--; unused_byte--;
@ -192,17 +191,16 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
/* Copy and fill the rest of the buffer with the unused byte */ /* Copy and fill the rest of the buffer with the unused byte */
memset( temporary, unused_byte, data_length ); memset( temporary, unused_byte, data_length );
MEMCPY( temporary + start_position, partial_data, (end_position - start_position) ); MEMCPY( temporary + start_position, partial_data, length );
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
/* In the case where the data is being unpacked from device /* In the case where the data is being unpacked from device memory, need to
* memory, need to use the special host to device memory copy. * use the special host to device memory copy. Note this code path was only
* Note this code path was only seen on large receives of * seen on large receives of noncontiguous data via buffered sends. */
* noncontiguous data via buffered sends. */ pConvertor->cbmemcpy(saved_data, user_data, data_length, pConvertor );
pConvertor->cbmemcpy(saved_data, real_data, data_length, pConvertor );
#else #else
/* Save the content of the user memory */ /* Save the content of the user memory */
MEMCPY( saved_data, real_data, data_length ); MEMCPY( saved_data, user_data, data_length );
#endif #endif
/* Then unpack the data into the user memory */ /* Then unpack the data into the user memory */
@ -216,22 +214,22 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle
* buffer back into the user memory. * buffer back into the user memory.
*/ */
#if OPAL_CUDA_SUPPORT #if OPAL_CUDA_SUPPORT
/* Need to copy the modified real_data again so we can see which /* Need to copy the modified user_data again so we can see which
* bytes need to be converted back to their original values. Note * bytes need to be converted back to their original values. Note
* this code path was only seen on large receives of noncontiguous * this code path was only seen on large receives of noncontiguous
* data via buffered sends. */ * data via buffered sends. */
{ {
char resaved_data[16]; char resaved_data[16];
pConvertor->cbmemcpy(resaved_data, real_data, data_length, pConvertor ); pConvertor->cbmemcpy(resaved_data, user_data, data_length, pConvertor );
for( i = 0; i < data_length; i++ ) { for( i = 0; i < data_length; i++ ) {
if( unused_byte == resaved_data[i] ) if( unused_byte == resaved_data[i] )
pConvertor->cbmemcpy(&real_data[i], &saved_data[i], 1, pConvertor); pConvertor->cbmemcpy(&user_data[i], &saved_data[i], 1, pConvertor);
} }
} }
#else #else
for( i = 0; i < data_length; i++ ) { for( i = 0; i < data_length; i++ ) {
if( unused_byte == real_data[i] ) if( unused_byte == user_data[i] )
real_data[i] = saved_data[i]; user_data[i] = saved_data[i];
} }
#endif #endif
return 0; return 0;
@ -296,7 +294,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor,
COMPUTE_CSUM( iov_ptr, missing_length, pConvertor ); COMPUTE_CSUM( iov_ptr, missing_length, pConvertor );
opal_unpack_partial_datatype( pConvertor, pElem, opal_unpack_partial_datatype( pConvertor, pElem,
iov_ptr, iov_ptr,
pConvertor->partial_length, element_length, pConvertor->partial_length, element_length - pConvertor->partial_length,
&conv_ptr ); &conv_ptr );
--count_desc; --count_desc;
if( 0 == count_desc ) { if( 0 == count_desc ) {