Datatype engine went into a "slim-fast" treatement. That's only the first stage ...
This commit was SVN r8965.
Этот коммит содержится в:
родитель
0376dce258
Коммит
c9e83658dd
@ -90,13 +90,13 @@ inline int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
|
||||
{
|
||||
pConv->checksum = 1;
|
||||
/* protect against over packing data */
|
||||
if( pConv->bConverted == (pConv->pDesc->size * pConv->count) ) {
|
||||
if( pConv->flags & CONVERTOR_COMPLETED ) {
|
||||
iov[0].iov_len = 0;
|
||||
*out_size = 0;
|
||||
*max_data = 0;
|
||||
return 1; /* nothing to do */
|
||||
}
|
||||
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
|
||||
assert( pConv->bConverted < pConv->local_size );
|
||||
|
||||
/* We dont allocate any memory. The packing function should allocate it
|
||||
* if it need. If it's possible to find iovec in the derived datatype
|
||||
@ -113,14 +113,14 @@ inline int32_t ompi_convertor_unpack( ompi_convertor_t* pConv,
|
||||
|
||||
pConv->checksum = 1;
|
||||
/* protect against over unpacking data */
|
||||
if( pConv->bConverted == (pData->size * pConv->count) ) {
|
||||
if( pConv->flags & CONVERTOR_COMPLETED ) {
|
||||
iov[0].iov_len = 0;
|
||||
out_size = 0;
|
||||
*max_data = 0;
|
||||
return 1; /* nothing to do */
|
||||
}
|
||||
|
||||
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
|
||||
assert( pConv->bConverted < pConv->local_size );
|
||||
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
|
||||
}
|
||||
|
||||
@ -140,11 +140,7 @@ int ompi_convertor_create_stack_with_pos_contig( ompi_convertor_t* pConvertor,
|
||||
*/
|
||||
pElems = pConvertor->use_desc->desc;
|
||||
|
||||
if( pData->size == 0 ) { /* special case for empty datatypes */
|
||||
count = pConvertor->count;
|
||||
} else {
|
||||
count = starting_point / pData->size;
|
||||
}
|
||||
count = starting_point / pData->size;
|
||||
extent = pData->ub - pData->lb;
|
||||
|
||||
pStack[0].type = DT_LOOP; /* the first one is always the loop */
|
||||
@ -213,21 +209,17 @@ extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
|
||||
extern int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* convertor,
|
||||
int starting_point, const int* sizes );
|
||||
|
||||
inline int32_t ompi_convertor_set_position( ompi_convertor_t* convertor, size_t* position )
|
||||
inline int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor, size_t* position )
|
||||
{
|
||||
int32_t rc;
|
||||
|
||||
/*
|
||||
* If the convertor is already at the correct position we are happy
|
||||
* Do not allow the convertor to go outside the data boundaries. This test include
|
||||
* the check for datatype with size zero as well as for convertors with a count of zero.
|
||||
*/
|
||||
if( (*position) == convertor->bConverted ) return OMPI_SUCCESS;
|
||||
|
||||
/*
|
||||
* Do not allow the convertor to go outside the data boundaries.
|
||||
*/
|
||||
if( (convertor->pDesc->size * convertor->count) <= *position ) {
|
||||
if( convertor->local_size <= *position) {
|
||||
convertor->flags |= CONVERTOR_COMPLETED;
|
||||
convertor->bConverted = convertor->pDesc->size * convertor->count;
|
||||
convertor->bConverted = convertor->local_size;
|
||||
*position = convertor->bConverted;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -302,6 +294,23 @@ inline int ompi_convertor_prepare( ompi_convertor_t* convertor,
|
||||
convertor->stack_size = DT_STATIC_STACK_SIZE;
|
||||
}
|
||||
|
||||
/* Compute the local and remote sizes */
|
||||
convertor->local_size = convertor->count * datatype->size;
|
||||
if( convertor->remoteArch == ompi_mpi_local_arch ) {
|
||||
convertor->remote_size = convertor->local_size;
|
||||
} else {
|
||||
int i;
|
||||
uint64_t bdt_mask = datatype->bdt_used >> DT_CHAR;
|
||||
convertor->remote_size = 0;
|
||||
for( i = DT_CHAR; bdt_mask != 0; i++, bdt_mask >>= 1 ) {
|
||||
if( bdt_mask & ((unsigned long long)1) ) {
|
||||
/* TODO replace with the remote size */
|
||||
convertor->remote_size += (datatype->btypes[i] * ompi_ddt_basicDatatypes[i]->size);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes );
|
||||
}
|
||||
|
||||
@ -315,10 +324,9 @@ inline int ompi_convertor_prepare( ompi_convertor_t* convertor,
|
||||
* ready to use starting from the old position. If copy_stack is false then the convertor
|
||||
* is created with a empty stack (you have to use ompi_convertor_set_position before using it).
|
||||
*/
|
||||
inline int
|
||||
ompi_convertor_clone( const ompi_convertor_t* source,
|
||||
ompi_convertor_t* destination,
|
||||
int32_t copy_stack )
|
||||
int ompi_convertor_clone( const ompi_convertor_t* source,
|
||||
ompi_convertor_t* destination,
|
||||
int32_t copy_stack )
|
||||
{
|
||||
destination->remoteArch = source->remoteArch;
|
||||
destination->flags = source->flags | CONVERTOR_CLONE;
|
||||
@ -330,6 +338,8 @@ ompi_convertor_clone( const ompi_convertor_t* source,
|
||||
destination->memAlloc_fn = source->memAlloc_fn;
|
||||
destination->memAlloc_userdata = source->memAlloc_userdata;
|
||||
destination->pFunctions = source->pFunctions;
|
||||
destination->local_size = source->local_size;
|
||||
destination->remote_size = source->remote_size;
|
||||
/* create the stack */
|
||||
if( source->stack_size > DT_STATIC_STACK_SIZE ) {
|
||||
destination->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * source->stack_size );
|
||||
@ -350,52 +360,6 @@ ompi_convertor_clone( const ompi_convertor_t* source,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
inline int
|
||||
ompi_convertor_clone_with_position( const ompi_convertor_t* source,
|
||||
ompi_convertor_t* destination,
|
||||
int32_t copy_stack,
|
||||
size_t* position )
|
||||
{
|
||||
(void)ompi_convertor_clone( source, destination, copy_stack );
|
||||
return ompi_convertor_set_position( destination, position );
|
||||
}
|
||||
|
||||
/* Actually we suppose that we can only do receiver side conversion */
|
||||
int32_t ompi_convertor_get_packed_size( const ompi_convertor_t* pConv, size_t* pSize )
|
||||
{
|
||||
int32_t ddt_size = 0;
|
||||
|
||||
if( ompi_ddt_type_size( pConv->pDesc, &ddt_size ) != 0 )
|
||||
return OMPI_ERROR;
|
||||
/* actually *pSize contain the size of one instance of the data */
|
||||
*pSize = ddt_size * pConv->count;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t ompi_convertor_get_unpacked_size( const ompi_convertor_t* pConv, size_t* pSize )
|
||||
{
|
||||
int i;
|
||||
const ompi_datatype_t* pData = pConv->pDesc;
|
||||
|
||||
if( pConv->count == 0 ) {
|
||||
*pSize = 0;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
if( pConv->remoteArch == 0 ) { /* same architecture */
|
||||
*pSize = pData->size * pConv->count;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
*pSize = 0;
|
||||
for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {
|
||||
if( pData->bdt_used & (((unsigned long long)1)<<i) ) {
|
||||
/* TODO replace with the remote size */
|
||||
*pSize += (pData->btypes[i] * ompi_ddt_basicDatatypes[i]->size);
|
||||
}
|
||||
}
|
||||
*pSize *= pConv->count;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void ompi_convertor_dump( ompi_convertor_t* convertor )
|
||||
{
|
||||
printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor,
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/include/constants.h"
|
||||
#include "ompi/datatype/datatype.h"
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
@ -33,9 +34,6 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
union dt_elem_desc;
|
||||
struct ompi_datatype_t;
|
||||
|
||||
/*
|
||||
* CONVERTOR SECTION
|
||||
*/
|
||||
@ -80,6 +78,8 @@ struct ompi_convertor_t {
|
||||
opal_object_t super; /**< basic superclass */
|
||||
uint32_t remoteArch; /**< the remote architecture */
|
||||
uint32_t flags; /**< the properties of this convertor */
|
||||
size_t local_size;
|
||||
size_t remote_size;
|
||||
const struct ompi_datatype_t* pDesc; /**< the datatype description associated with the convertor */
|
||||
const struct dt_type_desc* use_desc; /**< the version used by the convertor (normal or optimized) */
|
||||
uint32_t count; /**< the total number of full datatype elements */
|
||||
@ -146,13 +146,6 @@ OMPI_DECLSPEC ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int3
|
||||
*/
|
||||
OMPI_DECLSPEC int ompi_convertor_cleanup( ompi_convertor_t* convertor );
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_convertor_set_position( ompi_convertor_t* convertor,
|
||||
size_t* position );
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
@ -165,22 +158,31 @@ ompi_convertor_personalize( ompi_convertor_t* pConv, uint32_t flags,
|
||||
/*
|
||||
*
|
||||
*/
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_convertor_need_buffers( ompi_convertor_t* pConvertor );
|
||||
static inline int32_t
|
||||
ompi_convertor_need_buffers( const ompi_convertor_t* pConvertor )
|
||||
{
|
||||
return ompi_ddt_is_contiguous_memory_layout( pConvertor->pDesc, pConvertor->count );
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
OMPI_DECLSPEC int32_t
|
||||
static inline void
|
||||
ompi_convertor_get_packed_size( const ompi_convertor_t* pConv,
|
||||
size_t* pSize );
|
||||
size_t* pSize )
|
||||
{
|
||||
*pSize = pConv->local_size;
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
OMPI_DECLSPEC int32_t
|
||||
static inline void
|
||||
ompi_convertor_get_unpacked_size( const ompi_convertor_t* pConv,
|
||||
size_t* pSize );
|
||||
size_t* pSize )
|
||||
{
|
||||
*pSize = pConv->remote_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is internal to the data type engine. It should not be called from
|
||||
@ -196,31 +198,62 @@ int ompi_convertor_prepare( ompi_convertor_t* convertor,
|
||||
*
|
||||
*/
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_convertor_copy_and_prepare_for_send( const ompi_convertor_t* pSrcConv,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf,
|
||||
ompi_convertor_t* convertor );
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf);
|
||||
static inline int32_t
|
||||
ompi_convertor_copy_and_prepare_for_send( const ompi_convertor_t* pSrcConv,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf,
|
||||
ompi_convertor_t* convertor )
|
||||
{
|
||||
convertor->remoteArch = pSrcConv->remoteArch;
|
||||
convertor->pFunctions = pSrcConv->pFunctions;
|
||||
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
|
||||
|
||||
return ompi_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
*/
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_convertor_copy_and_prepare_for_recv( const ompi_convertor_t* pSrcConv,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf,
|
||||
ompi_convertor_t* convertor );
|
||||
OMPI_DECLSPEC int32_t
|
||||
ompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf );
|
||||
static inline int32_t
|
||||
ompi_convertor_copy_and_prepare_for_recv( const ompi_convertor_t* pSrcConv,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf,
|
||||
ompi_convertor_t* convertor )
|
||||
{
|
||||
convertor->remoteArch = pSrcConv->remoteArch;
|
||||
convertor->pFunctions = pSrcConv->pFunctions;
|
||||
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
|
||||
|
||||
return ompi_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
|
||||
}
|
||||
|
||||
/*
|
||||
* Upper level does not need to call the _nocheck function directly.
|
||||
*/
|
||||
OMPI_DECLSPEC inline int32_t
|
||||
ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,
|
||||
size_t* position );
|
||||
static inline int32_t
|
||||
ompi_convertor_set_position( ompi_convertor_t* convertor,
|
||||
size_t* position )
|
||||
{
|
||||
/*
|
||||
* If the convertor is already at the correct position we are happy.
|
||||
*/
|
||||
if( (*position) == convertor->bConverted ) return OMPI_SUCCESS;
|
||||
return ompi_convertor_set_position_nocheck( convertor, position );
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
@ -229,11 +262,15 @@ OMPI_DECLSPEC int
|
||||
ompi_convertor_clone( const ompi_convertor_t* source,
|
||||
ompi_convertor_t* destination,
|
||||
int32_t copy_stack );
|
||||
OMPI_DECLSPEC int
|
||||
static inline int
|
||||
ompi_convertor_clone_with_position( const ompi_convertor_t* source,
|
||||
ompi_convertor_t* destination,
|
||||
int32_t copy_stack,
|
||||
size_t* position );
|
||||
size_t* position )
|
||||
{
|
||||
(void)ompi_convertor_clone( source, destination, copy_stack );
|
||||
return ompi_convertor_set_position( destination, position );
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
|
@ -186,7 +186,7 @@ static inline int32_t ompi_ddt_get_size( const ompi_datatype_t* pData, long* siz
|
||||
* This function return true (1) if the datatype representation depending on the count
|
||||
* is contiguous in the memory. And false (0) otherwise.
|
||||
*/
|
||||
static inline int32_t ompi_ddt_is_contiguous_memory_layout( ompi_datatype_t* datatype, int32_t count )
|
||||
static inline int32_t ompi_ddt_is_contiguous_memory_layout( const ompi_datatype_t* datatype, int32_t count )
|
||||
{
|
||||
if( !(datatype->flags & DT_FLAG_CONTIGUOUS) ) return 0;
|
||||
if( count == 1 ) return 1; /* only one data ignore the gaps around */
|
||||
|
@ -82,7 +82,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
||||
if( iov[iov_count].iov_base == NULL ) {
|
||||
size_t length = iov[iov_count].iov_len;
|
||||
if( length <= 0 )
|
||||
length = pConvertor->count * pData->size - pConvertor->bConverted - bConverted;
|
||||
length = pConvertor->local_size - pConvertor->bConverted - bConverted;
|
||||
if( (*max_data) < length )
|
||||
length = *max_data;
|
||||
iov[iov_count].iov_base = pConvertor->memAlloc_fn( &length, pConvertor->memAlloc_userdata );
|
||||
@ -164,7 +164,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, type, count_desc,
|
||||
disp_desc, pos_desc );
|
||||
|
||||
return (pConvertor->bConverted == (pData->size * pConvertor->count));
|
||||
return (pConvertor->bConverted == pConvertor->local_size);
|
||||
}
|
||||
|
||||
/* We suppose here that we work with an already optimized version of the data
|
||||
@ -292,7 +292,7 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
|
||||
pConv->bConverted += bConverted; /* update the byte converted field in the convertor */
|
||||
iov[0].iov_len = bConverted; /* update the length in the iovec */
|
||||
*max_data = bConverted;
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
return (pConv->bConverted == pConv->local_size);
|
||||
}
|
||||
|
||||
#define IOVEC_MEM_LIMIT 8192
|
||||
@ -576,9 +576,9 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
|
||||
*max_data = bConverted; /* update the length in the iovec */
|
||||
if( ((*out_size) == iov_pos) || (iov[iov_pos].iov_base == NULL) ) *out_size = iov_pos;
|
||||
else *out_size = iov_pos + 1;
|
||||
assert( pConv->bConverted <= (pData->size * pConv->count) );
|
||||
assert( pConv->bConverted <= pConv->local_size );
|
||||
DO_DEBUG( opal_output( 0, "--------------------------------------------------------------------\n" ); );
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
return (pConv->bConverted == pConv->local_size);
|
||||
}
|
||||
|
||||
/* the contig versions does not use the stack. They can easily retrieve
|
||||
@ -594,7 +594,7 @@ ompi_convertor_pack_no_conv_contig( ompi_convertor_t* pConv,
|
||||
const ompi_datatype_t* pData = pConv->pDesc;
|
||||
dt_stack_t* pStack = pConv->pStack;
|
||||
char *source_base = NULL;
|
||||
size_t length = pData->size * pConv->count - pConv->bConverted;
|
||||
size_t length = pConv->local_size - pConv->bConverted;
|
||||
uint32_t iov_count, initial_amount = pConv->bConverted;
|
||||
ddt_endloop_desc_t* _end_loop = &(pConv->use_desc->desc[pConv->use_desc->used].end_loop);
|
||||
|
||||
@ -643,7 +643,7 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
|
||||
const ompi_datatype_t* pData = pConv->pDesc;
|
||||
dt_stack_t* pStack = pConv->pStack;
|
||||
char *user_memory, *packed_buffer;
|
||||
size_t length = pData->size * pConv->count;
|
||||
size_t length = pConv->local_size;
|
||||
long extent;
|
||||
uint32_t max_allowed, i, index;
|
||||
uint32_t iov_count, total_bytes_converted = 0;
|
||||
@ -652,7 +652,7 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
|
||||
assert( (pData->flags & DT_FLAG_CONTIGUOUS) && ((long)pData->size != extent) );
|
||||
|
||||
/* Limit the amount of packed data to the data left over on this convertor */
|
||||
max_allowed = (pConv->count * pData->size) - pConv->bConverted;
|
||||
max_allowed = pConv->local_size - pConv->bConverted;
|
||||
if( max_allowed > (*max_data) )
|
||||
max_allowed = (*max_data);
|
||||
|
||||
@ -803,16 +803,3 @@ ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t
|
||||
ompi_convertor_copy_and_prepare_for_send( const ompi_convertor_t* pSrcConv,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf,
|
||||
ompi_convertor_t* convertor )
|
||||
{
|
||||
convertor->remoteArch = pSrcConv->remoteArch;
|
||||
convertor->pFunctions = pSrcConv->pFunctions;
|
||||
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
|
||||
|
||||
return ompi_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
|
||||
}
|
||||
|
@ -334,7 +334,7 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
|
||||
|
||||
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
|
||||
packed_buffer = (char*)iov[iov_count].iov_base;
|
||||
remaining = pConv->count * pData->size - pConv->bConverted;
|
||||
remaining = pConv->local_size - pConv->bConverted;
|
||||
if( remaining > (uint32_t)iov[iov_count].iov_len )
|
||||
remaining = iov[iov_count].iov_len;
|
||||
bConverted = remaining; /* how much will get unpacked this time */
|
||||
@ -399,7 +399,7 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
|
||||
}
|
||||
*out_size = iov_count;
|
||||
*max_data = (pConv->bConverted - initial_bytes_converted);
|
||||
if( pConv->bConverted == (pData->size * pConv->count) ) {
|
||||
if( pConv->bConverted == pConv->local_size ) {
|
||||
pConv->flags |= CONVERTOR_COMPLETED;
|
||||
return 1;
|
||||
}
|
||||
@ -670,18 +670,6 @@ conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED] = {
|
||||
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
|
||||
};
|
||||
|
||||
/* Should we supply buffers to the convertor or can we use directly
|
||||
* the user buffer ?
|
||||
*/
|
||||
int32_t ompi_convertor_need_buffers( ompi_convertor_t* pConvertor )
|
||||
{
|
||||
const ompi_datatype_t* pData = pConvertor->pDesc;
|
||||
if( !(pData->flags & DT_FLAG_CONTIGUOUS) ) return 1;
|
||||
if( pConvertor->count == 1 ) return 0; /* only one data ignore the gaps around */
|
||||
if( (long)pData->size != (pData->ub - pData->lb) ) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
|
||||
|
||||
int32_t
|
||||
@ -711,20 +699,6 @@ ompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int32_t
|
||||
ompi_convertor_copy_and_prepare_for_recv( const ompi_convertor_t* pSrcConv,
|
||||
const struct ompi_datatype_t* datatype,
|
||||
int32_t count,
|
||||
const void* pUserBuf,
|
||||
ompi_convertor_t* convertor )
|
||||
{
|
||||
convertor->remoteArch = pSrcConv->remoteArch;
|
||||
convertor->pFunctions = pSrcConv->pFunctions;
|
||||
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
|
||||
|
||||
return ompi_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
|
||||
}
|
||||
|
||||
/* Get the number of elements from the data associated with this convertor that can be
|
||||
* retrieved from a recevied buffer with the size iSize.
|
||||
* To spped-up this function you should use it with a iSize == to the modulo
|
||||
|
@ -63,7 +63,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
|
||||
/*opal_output( 0, "Data extent %d size %d count %d total_size %d starting_point %d\n",
|
||||
pData->ub - pData->lb, pData->size, pConvertor->count,
|
||||
pData->size * pConvertor->count, starting_point );*/
|
||||
pConvertor->local_size, starting_point );*/
|
||||
pConvertor->stack_pos = 0;
|
||||
pStack = pConvertor->pStack;
|
||||
/* Fill the first position on the stack. This one correspond to the
|
||||
@ -192,6 +192,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
|
||||
}
|
||||
|
||||
/* Correctly update the bConverted field */
|
||||
pConvertor->bConverted = pData->size * pConvertor->count;
|
||||
pConvertor->flags |= CONVERTOR_COMPLETED;
|
||||
pConvertor->bConverted = pConvertor->local_size;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
|
||||
*/
|
||||
size_t length = iov[iov_count].iov_len;
|
||||
if( length <= 0 )
|
||||
length = pConvertor->count * pData->size - pConvertor->bConverted;
|
||||
length = pConvertor->local_size - pConvertor->bConverted;
|
||||
if( ((*max_data) - total_packed) < length )
|
||||
length = (*max_data) - total_packed;
|
||||
assert( 0 < length );
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user