diff --git a/opal/datatype/opal_convertor.c b/opal/datatype/opal_convertor.c index c4a35eb361..afe23eee83 100644 --- a/opal/datatype/opal_convertor.c +++ b/opal/datatype/opal_convertor.c @@ -418,17 +418,19 @@ int32_t opal_convertor_set_position_nocheck( opal_convertor_t* convertor, int32_t rc; /** - * If we plan to rollback the convertor then first we have to set it - * at the beginning. + * create_stack_with_pos_contig always set the position relative to the ZERO + * position, so there is no need for special handling. In all other cases, + * if we plan to rollback the convertor then first we have to reset it at + * the beginning. */ - if( (0 == (*position)) || ((*position) < convertor->bConverted) ) { - rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes ); - if( 0 == (*position) ) return rc; - } if( OPAL_LIKELY(convertor->flags & OPAL_DATATYPE_FLAG_CONTIGUOUS) ) { rc = opal_convertor_create_stack_with_pos_contig( convertor, (*position), opal_datatype_local_sizes ); } else { + if( (0 == (*position)) || ((*position) < convertor->bConverted) ) { + rc = opal_convertor_create_stack_at_begining( convertor, opal_datatype_local_sizes ); + if( 0 == (*position) ) return rc; + } rc = opal_convertor_generic_simple_position( convertor, position ); /** * If we have a non-contigous send convertor don't allow it move in the middle diff --git a/opal/datatype/opal_datatype_position.c b/opal/datatype/opal_datatype_position.c index dc0edc5983..81aab30854 100644 --- a/opal/datatype/opal_datatype_position.c +++ b/opal/datatype/opal_datatype_position.c @@ -3,7 +3,7 @@ * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2009 The University of Tennessee and The University + * Copyright (c) 2004-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2006 High Performance Computing Center Stuttgart, @@ -12,6 +12,8 @@ * All rights reserved. * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -115,10 +117,11 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, OPAL_PTRDIFF_TYPE extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb; DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position ); + assert(*position > pConvertor->bConverted); /* We dont want to have to parse the datatype multiple times. What we are interested in * here is to compute the number of completed datatypes that we can move forward, update - * the the counters and finally compute the position taking in account only the remaining + * the counters and finally compute the position taking in account only the remaining * elements. The only problem is that we have to modify all the elements on the stack. */ iov_len_local = *position - pConvertor->bConverted; @@ -129,11 +132,8 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, " iov_len_local %lu count_desc %d\n", (unsigned long)pConvertor->bConverted, (unsigned long)*position, (unsigned long)pConvertor->pDesc->size, (unsigned long)iov_len_local, count_desc ); ); - /** - * Update all the stack except the last one which is supposed to be for - * the last partial element description. - */ - for( type = 0; type < pConvertor->stack_pos; type++ ) + /* Update all the stack including the last one */ + for( type = 0; type <= pConvertor->stack_pos; type++ ) pStack[type].disp += count_desc * extent; pConvertor->bConverted += count_desc * pConvertor->pDesc->size; iov_len_local = *position - pConvertor->bConverted; @@ -149,13 +149,27 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, pStack--; pConvertor->stack_pos--; pElem = &(description[pos_desc]); - base_pointer += pStack->disp; DO_DEBUG( opal_output( 0, "position start pos_desc %d count_desc %d disp %llx\n" "stack_pos %d pos_desc %d count_desc %d disp %llx\n", pos_desc, count_desc, (unsigned long long)(base_pointer - pConvertor->pBaseBuf), pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); ); - + /* Last data has been only partially converted. Compute the relative position */ + if( 0 != pConvertor->partial_length ) { + size_t element_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; + size_t missing_length = element_length - pConvertor->partial_length; + if( missing_length >= iov_len_local ) { + pConvertor->partial_length = (pConvertor->partial_length + iov_len_local) % element_length; + pConvertor->bConverted += iov_len_local; + assert(pConvertor->partial_length < element_length); + return 0; + } + pConvertor->partial_length = (pConvertor->partial_length + missing_length) % element_length; + assert(pConvertor->partial_length == 0); + pConvertor->bConverted += missing_length; + iov_len_local -= missing_length; + count_desc--; + } while( 1 ) { if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */ DO_DEBUG( opal_output( 0, "position end_loop count %d stack_pos %d pos_desc %d disp %llx space %lu\n", @@ -232,7 +246,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor, if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) { /* I complete an element, next step I should go to the next one */ PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, OPAL_DATATYPE_UINT1, count_desc, - base_pointer - pStack->disp - pConvertor->pBaseBuf ); + base_pointer - pConvertor->pBaseBuf ); DO_DEBUG( opal_output( 0, "position save stack stack_pos %d pos_desc %d count_desc %d disp %llx\n", pConvertor->stack_pos, pStack->index, (int)pStack->count, (unsigned long long)pStack->disp ); ); return 0; diff --git a/opal/datatype/opal_datatype_unpack.c b/opal/datatype/opal_datatype_unpack.c index 0259f2378b..0bd344f3ae 100644 --- a/opal/datatype/opal_datatype_unpack.c +++ b/opal/datatype/opal_datatype_unpack.c @@ -166,23 +166,22 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv, static inline uint32_t opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pElem, unsigned char* partial_data, - OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE end_position, + OPAL_PTRDIFF_TYPE start_position, OPAL_PTRDIFF_TYPE length, unsigned char** user_buffer ) { char unused_byte = 0x7F, saved_data[16]; unsigned char temporary[16], *temporary_buffer = temporary; - unsigned char* real_data = *user_buffer + pElem->elem.disp; - uint32_t i, length, count_desc = 1; + unsigned char* user_data = *user_buffer + pElem->elem.disp; + uint32_t i, count_desc = 1; size_t data_length = opal_datatype_basicDatatypes[pElem->elem.common.type]->size; DO_DEBUG( opal_output( 0, "unpack partial data start %lu end %lu data_length %lu user %p\n" "\tbConverted %lu total_length %lu count %d\n", - (unsigned long)start_position, (unsigned long)end_position, (unsigned long)data_length, *user_buffer, + (unsigned long)start_position, (unsigned long)start_position + length, (unsigned long)data_length, *user_buffer, (unsigned long)pConvertor->bConverted, (unsigned long)pConvertor->local_size, pConvertor->count ); ); /* Find a byte that is not used in the partial buffer */ find_unused_byte: - length = (uint32_t)(end_position - start_position); for( i = 0; i < length; i++ ) { if( unused_byte == partial_data[i] ) { unused_byte--; @@ -192,17 +191,16 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle /* Copy and fill the rest of the buffer with the unused byte */ memset( temporary, unused_byte, data_length ); - MEMCPY( temporary + start_position, partial_data, (end_position - start_position) ); + MEMCPY( temporary + start_position, partial_data, length ); #if OPAL_CUDA_SUPPORT - /* In the case where the data is being unpacked from device - * memory, need to use the special host to device memory copy. - * Note this code path was only seen on large receives of - * noncontiguous data via buffered sends. */ - pConvertor->cbmemcpy(saved_data, real_data, data_length, pConvertor ); + /* In the case where the data is being unpacked from device memory, need to + * use the special host to device memory copy. Note this code path was only + * seen on large receives of noncontiguous data via buffered sends. */ + pConvertor->cbmemcpy(saved_data, user_data, data_length, pConvertor ); #else /* Save the content of the user memory */ - MEMCPY( saved_data, real_data, data_length ); + MEMCPY( saved_data, user_data, data_length ); #endif /* Then unpack the data into the user memory */ @@ -216,22 +214,22 @@ opal_unpack_partial_datatype( opal_convertor_t* pConvertor, dt_elem_desc_t* pEle * buffer back into the user memory. */ #if OPAL_CUDA_SUPPORT - /* Need to copy the modified real_data again so we can see which + /* Need to copy the modified user_data again so we can see which * bytes need to be converted back to their original values. Note * this code path was only seen on large receives of noncontiguous * data via buffered sends. */ { char resaved_data[16]; - pConvertor->cbmemcpy(resaved_data, real_data, data_length, pConvertor ); + pConvertor->cbmemcpy(resaved_data, user_data, data_length, pConvertor ); for( i = 0; i < data_length; i++ ) { if( unused_byte == resaved_data[i] ) - pConvertor->cbmemcpy(&real_data[i], &saved_data[i], 1, pConvertor); + pConvertor->cbmemcpy(&user_data[i], &saved_data[i], 1, pConvertor); } } #else for( i = 0; i < data_length; i++ ) { - if( unused_byte == real_data[i] ) - real_data[i] = saved_data[i]; + if( unused_byte == user_data[i] ) + user_data[i] = saved_data[i]; } #endif return 0; @@ -296,7 +294,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, COMPUTE_CSUM( iov_ptr, missing_length, pConvertor ); opal_unpack_partial_datatype( pConvertor, pElem, iov_ptr, - pConvertor->partial_length, element_length, + pConvertor->partial_length, element_length - pConvertor->partial_length, &conv_ptr ); --count_desc; if( 0 == count_desc ) { @@ -304,7 +302,7 @@ opal_generic_simple_unpack_function( opal_convertor_t* pConvertor, pos_desc++; /* advance to the next data */ UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc ); } - iov_ptr += missing_length; + iov_ptr += missing_length; iov_len_local -= missing_length; pConvertor->partial_length = 0; /* nothing more inside */ }