diff --git a/ompi/datatype/convertor.h b/ompi/datatype/convertor.h index 0df262fcb5..37c9915463 100644 --- a/ompi/datatype/convertor.h +++ b/ompi/datatype/convertor.h @@ -92,6 +92,8 @@ struct ompi_convertor_t { /* All others fields get modified for every call to pack/unpack functions */ uint32_t stack_pos; /**< the actual position on the stack */ size_t bConverted; /**< # of bytes already converted */ + char pending[16]; /**< bytes pending from the last conversion */ + uint32_t pending_length; /**< # bytes pending ... */ dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack for small datatypes */ }; OBJ_CLASS_DECLARATION( ompi_convertor_t ); diff --git a/ompi/datatype/dt_module.c b/ompi/datatype/dt_module.c index 28ccd9f248..1f5f75b9ed 100644 --- a/ompi/datatype/dt_module.c +++ b/ompi/datatype/dt_module.c @@ -21,6 +21,12 @@ #include "datatype/datatype.h" #include "datatype/datatype_internal.h" +#if OMPI_ENABLE_DEBUG +#include "mca/base/mca_base_param.h" +extern int32_t ompi_unpack_debug; +extern int32_t ompi_pack_debug; +#endif /* OMPI_ENABLE_DEBUG */ + /* by default the debuging is turned off */ int ompi_ddt_dfd = -1; @@ -535,6 +541,11 @@ int32_t ompi_ddt_init( void ) } } } + mca_base_param_reg_int_name( "datatype", "unpack_debug", "Non zero lead to output generated by the unpack functions", + false, false, 0, &ompi_unpack_debug ); + mca_base_param_reg_int_name( "datatype", "pack_debug", "Non zero lead to output generated by the pack functions", + false, false, 0, &ompi_pack_debug ); + #endif /* OMPI_ENABLE_DEBUG */ ompi_ddt_default_convertors_init(); diff --git a/ompi/datatype/new_pack.c b/ompi/datatype/new_pack.c index 59c81ab00f..0f72619520 100644 --- a/ompi/datatype/new_pack.c +++ b/ompi/datatype/new_pack.c @@ -28,9 +28,12 @@ #endif #include -static int ompi_pack_debug=0; - +#if OMPI_ENABLE_DEBUG +int32_t ompi_pack_debug = 0; #define DO_DEBUG(INST) if( ompi_pack_debug ) { INST } +#else +#define DO_DEBUG(INST) +#endif /* OMPI_ENABLE_DEBUG */ /* The pack/unpack functions need a cleanup. I have to create a proper interface to access * all basic functionalities, hence using them as basic blocks for all conversion functions. @@ -114,8 +117,8 @@ static inline void pack_contiguous_loop( ompi_convertor_t* CONVERTOR, for( _i = 0; _i < _copy_loops; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _source, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %ld )\n", - *(DESTINATION), _source, _end_loop->size ); ); + DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %ld ) => space %ld\n", + *(DESTINATION), _source, _end_loop->size, *(SPACE) - _i * _end_loop->size ); ); MEMCPY( *(DESTINATION), _source, _end_loop->size ); *(DESTINATION) += _end_loop->size; _source += _loop->extent; @@ -150,8 +153,8 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor, char *source_base, *destination; uint32_t iov_len_local, iov_count, required_space = 0; - DUMP( "ompi_convertor_generic_simple_pack( %p, {%p, %d}, %d )\n", (void*)pConvertor, - iov[0].iov_base, iov[0].iov_len, *out_size ); + DO_DEBUG( opal_output( 0, "ompi_convertor_generic_simple_pack( %p, {%p, %d}, %d )\n", (void*)pConvertor, + iov[0].iov_base, iov[0].iov_len, *out_size ); ); description = pConvertor->use_desc->desc; diff --git a/ompi/datatype/new_unpack.c b/ompi/datatype/new_unpack.c index ba67030e8a..e35d938b3c 100644 --- a/ompi/datatype/new_unpack.c +++ b/ompi/datatype/new_unpack.c @@ -28,9 +28,12 @@ #endif #include -static int ompi_unpack_debug = 0; - +#if OMPI_ENABLE_DEBUG +int32_t ompi_unpack_debug = 0; #define DO_DEBUG(INST) if( ompi_unpack_debug ) { INST } +#else +#define DO_DEBUG(INST) +#endif /* OMPI_ENABLE_DEBUG */ /* The pack/unpack functions need a cleanup. I have to create a proper interface to access * all basic functionalities, hence using them as basic blocks for all conversion functions. @@ -106,8 +109,8 @@ static inline void unpack_contiguous_loop( ompi_convertor_t* CONVERTOR, for( _i = 0; _i < _copy_loops; _i++ ) { OMPI_DDT_SAFEGUARD_POINTER( _destination, _end_loop->size, (CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count ); - DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %ld )\n", - _destination, *(SOURCE), _end_loop->size ); ); + DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %ld ) => space %ld\n", + _destination, *(SOURCE), _end_loop->size, *(SPACE) - _i * _end_loop->size ); ); MEMCPY( _destination, *(SOURCE), _end_loop->size ); *(SOURCE) += _end_loop->size; _destination += _loop->extent; @@ -145,8 +148,8 @@ int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor, char *user_memory_base, *packed_buffer; uint32_t iov_len_local, iov_count, required_space = 0; - DUMP( "ompi_convertor_generic_simple_unpack( %p, {%p, %d}, %d )\n", (void*)pConvertor, - iov[0].iov_base, iov[0].iov_len, *out_size ); + DO_DEBUG( opal_output( 0, "ompi_convertor_generic_simple_unpack( %p, {%p, %d}, %d )\n", (void*)pConvertor, + iov[0].iov_base, iov[0].iov_len, *out_size ); ); description = pConvertor->use_desc->desc; @@ -245,6 +248,15 @@ int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor, total_unpacked += iov[iov_count].iov_len; pConvertor->bConverted += iov[iov_count].iov_len; /* update the already converted bytes */ assert( iov_len_local >= 0 ); + if( 0 != iov_len_local ) { + /* We have some partial data here. Let's copy it into the convertor + * and keep it hot until the next round. + */ + assert( iov_len_local < 16 ); + memcpy( pConvertor->pending, packed_buffer, iov_len_local ); + DO_DEBUG( opal_output( 0, "Saving %d bytes for the next call\n", iov_len_local ); ); + } + pConvertor->pending_length = iov_len_local; } *max_data = total_unpacked; *out_size = iov_count;