1
1

Datatype engine went into a "slim-fast" treatement. That's only the first stage ...

This commit was SVN r8965.
Этот коммит содержится в:
George Bosilca 2006-02-10 20:56:30 +00:00
родитель 0376dce258
Коммит c9e83658dd
7 изменённых файлов: 114 добавлений и 151 удалений

Просмотреть файл

@ -90,13 +90,13 @@ inline int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
{
pConv->checksum = 1;
/* protect against over packing data */
if( pConv->bConverted == (pConv->pDesc->size * pConv->count) ) {
if( pConv->flags & CONVERTOR_COMPLETED ) {
iov[0].iov_len = 0;
*out_size = 0;
*max_data = 0;
return 1; /* nothing to do */
}
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
assert( pConv->bConverted < pConv->local_size );
/* We dont allocate any memory. The packing function should allocate it
* if it need. If it's possible to find iovec in the derived datatype
@ -113,14 +113,14 @@ inline int32_t ompi_convertor_unpack( ompi_convertor_t* pConv,
pConv->checksum = 1;
/* protect against over unpacking data */
if( pConv->bConverted == (pData->size * pConv->count) ) {
if( pConv->flags & CONVERTOR_COMPLETED ) {
iov[0].iov_len = 0;
out_size = 0;
*max_data = 0;
return 1; /* nothing to do */
}
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
assert( pConv->bConverted < pConv->local_size );
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
}
@ -140,11 +140,7 @@ int ompi_convertor_create_stack_with_pos_contig( ompi_convertor_t* pConvertor,
*/
pElems = pConvertor->use_desc->desc;
if( pData->size == 0 ) { /* special case for empty datatypes */
count = pConvertor->count;
} else {
count = starting_point / pData->size;
}
count = starting_point / pData->size;
extent = pData->ub - pData->lb;
pStack[0].type = DT_LOOP; /* the first one is always the loop */
@ -213,21 +209,17 @@ extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
extern int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* convertor,
int starting_point, const int* sizes );
inline int32_t ompi_convertor_set_position( ompi_convertor_t* convertor, size_t* position )
inline int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor, size_t* position )
{
int32_t rc;
/*
* If the convertor is already at the correct position we are happy
* Do not allow the convertor to go outside the data boundaries. This test include
* the check for datatype with size zero as well as for convertors with a count of zero.
*/
if( (*position) == convertor->bConverted ) return OMPI_SUCCESS;
/*
* Do not allow the convertor to go outside the data boundaries.
*/
if( (convertor->pDesc->size * convertor->count) <= *position ) {
if( convertor->local_size <= *position) {
convertor->flags |= CONVERTOR_COMPLETED;
convertor->bConverted = convertor->pDesc->size * convertor->count;
convertor->bConverted = convertor->local_size;
*position = convertor->bConverted;
return OMPI_SUCCESS;
}
@ -302,6 +294,23 @@ inline int ompi_convertor_prepare( ompi_convertor_t* convertor,
convertor->stack_size = DT_STATIC_STACK_SIZE;
}
/* Compute the local and remote sizes */
convertor->local_size = convertor->count * datatype->size;
if( convertor->remoteArch == ompi_mpi_local_arch ) {
convertor->remote_size = convertor->local_size;
} else {
int i;
uint64_t bdt_mask = datatype->bdt_used >> DT_CHAR;
convertor->remote_size = 0;
for( i = DT_CHAR; bdt_mask != 0; i++, bdt_mask >>= 1 ) {
if( bdt_mask & ((unsigned long long)1) ) {
/* TODO replace with the remote size */
convertor->remote_size += (datatype->btypes[i] * ompi_ddt_basicDatatypes[i]->size);
}
}
}
return ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes );
}
@ -315,10 +324,9 @@ inline int ompi_convertor_prepare( ompi_convertor_t* convertor,
* ready to use starting from the old position. If copy_stack is false then the convertor
* is created with a empty stack (you have to use ompi_convertor_set_position before using it).
*/
inline int
ompi_convertor_clone( const ompi_convertor_t* source,
ompi_convertor_t* destination,
int32_t copy_stack )
int ompi_convertor_clone( const ompi_convertor_t* source,
ompi_convertor_t* destination,
int32_t copy_stack )
{
destination->remoteArch = source->remoteArch;
destination->flags = source->flags | CONVERTOR_CLONE;
@ -330,6 +338,8 @@ ompi_convertor_clone( const ompi_convertor_t* source,
destination->memAlloc_fn = source->memAlloc_fn;
destination->memAlloc_userdata = source->memAlloc_userdata;
destination->pFunctions = source->pFunctions;
destination->local_size = source->local_size;
destination->remote_size = source->remote_size;
/* create the stack */
if( source->stack_size > DT_STATIC_STACK_SIZE ) {
destination->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * source->stack_size );
@ -350,52 +360,6 @@ ompi_convertor_clone( const ompi_convertor_t* source,
return OMPI_SUCCESS;
}
inline int
ompi_convertor_clone_with_position( const ompi_convertor_t* source,
ompi_convertor_t* destination,
int32_t copy_stack,
size_t* position )
{
(void)ompi_convertor_clone( source, destination, copy_stack );
return ompi_convertor_set_position( destination, position );
}
/* Actually we suppose that we can only do receiver side conversion */
int32_t ompi_convertor_get_packed_size( const ompi_convertor_t* pConv, size_t* pSize )
{
int32_t ddt_size = 0;
if( ompi_ddt_type_size( pConv->pDesc, &ddt_size ) != 0 )
return OMPI_ERROR;
/* actually *pSize contain the size of one instance of the data */
*pSize = ddt_size * pConv->count;
return OMPI_SUCCESS;
}
int32_t ompi_convertor_get_unpacked_size( const ompi_convertor_t* pConv, size_t* pSize )
{
int i;
const ompi_datatype_t* pData = pConv->pDesc;
if( pConv->count == 0 ) {
*pSize = 0;
return OMPI_SUCCESS;
}
if( pConv->remoteArch == 0 ) { /* same architecture */
*pSize = pData->size * pConv->count;
return OMPI_SUCCESS;
}
*pSize = 0;
for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {
if( pData->bdt_used & (((unsigned long long)1)<<i) ) {
/* TODO replace with the remote size */
*pSize += (pData->btypes[i] * ompi_ddt_basicDatatypes[i]->size);
}
}
*pSize *= pConv->count;
return OMPI_SUCCESS;
}
void ompi_convertor_dump( ompi_convertor_t* convertor )
{
printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor,

Просмотреть файл

@ -22,6 +22,7 @@
#include "ompi_config.h"
#include "ompi/include/constants.h"
#include "ompi/datatype/datatype.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
@ -33,9 +34,6 @@
extern "C" {
#endif
union dt_elem_desc;
struct ompi_datatype_t;
/*
* CONVERTOR SECTION
*/
@ -80,6 +78,8 @@ struct ompi_convertor_t {
opal_object_t super; /**< basic superclass */
uint32_t remoteArch; /**< the remote architecture */
uint32_t flags; /**< the properties of this convertor */
size_t local_size;
size_t remote_size;
const struct ompi_datatype_t* pDesc; /**< the datatype description associated with the convertor */
const struct dt_type_desc* use_desc; /**< the version used by the convertor (normal or optimized) */
uint32_t count; /**< the total number of full datatype elements */
@ -146,13 +146,6 @@ OMPI_DECLSPEC ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int3
*/
OMPI_DECLSPEC int ompi_convertor_cleanup( ompi_convertor_t* convertor );
/*
*
*/
OMPI_DECLSPEC int32_t
ompi_convertor_set_position( ompi_convertor_t* convertor,
size_t* position );
/*
*
*/
@ -165,22 +158,31 @@ ompi_convertor_personalize( ompi_convertor_t* pConv, uint32_t flags,
/*
*
*/
OMPI_DECLSPEC int32_t
ompi_convertor_need_buffers( ompi_convertor_t* pConvertor );
static inline int32_t
ompi_convertor_need_buffers( const ompi_convertor_t* pConvertor )
{
return ompi_ddt_is_contiguous_memory_layout( pConvertor->pDesc, pConvertor->count );
}
/*
*
*/
OMPI_DECLSPEC int32_t
static inline void
ompi_convertor_get_packed_size( const ompi_convertor_t* pConv,
size_t* pSize );
size_t* pSize )
{
*pSize = pConv->local_size;
}
/*
*
*/
OMPI_DECLSPEC int32_t
static inline void
ompi_convertor_get_unpacked_size( const ompi_convertor_t* pConv,
size_t* pSize );
size_t* pSize )
{
*pSize = pConv->remote_size;
}
/*
* This function is internal to the data type engine. It should not be called from
@ -196,31 +198,62 @@ int ompi_convertor_prepare( ompi_convertor_t* convertor,
*
*/
OMPI_DECLSPEC int32_t
ompi_convertor_copy_and_prepare_for_send( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor );
OMPI_DECLSPEC int32_t
ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf);
static inline int32_t
ompi_convertor_copy_and_prepare_for_send( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor )
{
convertor->remoteArch = pSrcConv->remoteArch;
convertor->pFunctions = pSrcConv->pFunctions;
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
return ompi_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
}
/*
*
*/
OMPI_DECLSPEC int32_t
ompi_convertor_copy_and_prepare_for_recv( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor );
OMPI_DECLSPEC int32_t
ompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf );
static inline int32_t
ompi_convertor_copy_and_prepare_for_recv( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor )
{
convertor->remoteArch = pSrcConv->remoteArch;
convertor->pFunctions = pSrcConv->pFunctions;
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
return ompi_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
}
/*
* Upper level does not need to call the _nocheck function directly.
*/
OMPI_DECLSPEC inline int32_t
ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,
size_t* position );
static inline int32_t
ompi_convertor_set_position( ompi_convertor_t* convertor,
size_t* position )
{
/*
* If the convertor is already at the correct position we are happy.
*/
if( (*position) == convertor->bConverted ) return OMPI_SUCCESS;
return ompi_convertor_set_position_nocheck( convertor, position );
}
/*
*
@ -229,11 +262,15 @@ OMPI_DECLSPEC int
ompi_convertor_clone( const ompi_convertor_t* source,
ompi_convertor_t* destination,
int32_t copy_stack );
OMPI_DECLSPEC int
static inline int
ompi_convertor_clone_with_position( const ompi_convertor_t* source,
ompi_convertor_t* destination,
int32_t copy_stack,
size_t* position );
size_t* position )
{
(void)ompi_convertor_clone( source, destination, copy_stack );
return ompi_convertor_set_position( destination, position );
}
/*
*

Просмотреть файл

@ -186,7 +186,7 @@ static inline int32_t ompi_ddt_get_size( const ompi_datatype_t* pData, long* siz
* This function return true (1) if the datatype representation depending on the count
* is contiguous in the memory. And false (0) otherwise.
*/
static inline int32_t ompi_ddt_is_contiguous_memory_layout( ompi_datatype_t* datatype, int32_t count )
static inline int32_t ompi_ddt_is_contiguous_memory_layout( const ompi_datatype_t* datatype, int32_t count )
{
if( !(datatype->flags & DT_FLAG_CONTIGUOUS) ) return 0;
if( count == 1 ) return 1; /* only one data ignore the gaps around */

Просмотреть файл

@ -82,7 +82,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
if( iov[iov_count].iov_base == NULL ) {
size_t length = iov[iov_count].iov_len;
if( length <= 0 )
length = pConvertor->count * pData->size - pConvertor->bConverted - bConverted;
length = pConvertor->local_size - pConvertor->bConverted - bConverted;
if( (*max_data) < length )
length = *max_data;
iov[iov_count].iov_base = pConvertor->memAlloc_fn( &length, pConvertor->memAlloc_userdata );
@ -164,7 +164,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, type, count_desc,
disp_desc, pos_desc );
return (pConvertor->bConverted == (pData->size * pConvertor->count));
return (pConvertor->bConverted == pConvertor->local_size);
}
/* We suppose here that we work with an already optimized version of the data
@ -292,7 +292,7 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
pConv->bConverted += bConverted; /* update the byte converted field in the convertor */
iov[0].iov_len = bConverted; /* update the length in the iovec */
*max_data = bConverted;
return (pConv->bConverted == (pData->size * pConv->count));
return (pConv->bConverted == pConv->local_size);
}
#define IOVEC_MEM_LIMIT 8192
@ -576,9 +576,9 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
*max_data = bConverted; /* update the length in the iovec */
if( ((*out_size) == iov_pos) || (iov[iov_pos].iov_base == NULL) ) *out_size = iov_pos;
else *out_size = iov_pos + 1;
assert( pConv->bConverted <= (pData->size * pConv->count) );
assert( pConv->bConverted <= pConv->local_size );
DO_DEBUG( opal_output( 0, "--------------------------------------------------------------------\n" ); );
return (pConv->bConverted == (pData->size * pConv->count));
return (pConv->bConverted == pConv->local_size);
}
/* the contig versions does not use the stack. They can easily retrieve
@ -594,7 +594,7 @@ ompi_convertor_pack_no_conv_contig( ompi_convertor_t* pConv,
const ompi_datatype_t* pData = pConv->pDesc;
dt_stack_t* pStack = pConv->pStack;
char *source_base = NULL;
size_t length = pData->size * pConv->count - pConv->bConverted;
size_t length = pConv->local_size - pConv->bConverted;
uint32_t iov_count, initial_amount = pConv->bConverted;
ddt_endloop_desc_t* _end_loop = &(pConv->use_desc->desc[pConv->use_desc->used].end_loop);
@ -643,7 +643,7 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
const ompi_datatype_t* pData = pConv->pDesc;
dt_stack_t* pStack = pConv->pStack;
char *user_memory, *packed_buffer;
size_t length = pData->size * pConv->count;
size_t length = pConv->local_size;
long extent;
uint32_t max_allowed, i, index;
uint32_t iov_count, total_bytes_converted = 0;
@ -652,7 +652,7 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
assert( (pData->flags & DT_FLAG_CONTIGUOUS) && ((long)pData->size != extent) );
/* Limit the amount of packed data to the data left over on this convertor */
max_allowed = (pConv->count * pData->size) - pConv->bConverted;
max_allowed = pConv->local_size - pConv->bConverted;
if( max_allowed > (*max_data) )
max_allowed = (*max_data);
@ -803,16 +803,3 @@ ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
return OMPI_SUCCESS;
}
int32_t
ompi_convertor_copy_and_prepare_for_send( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor )
{
convertor->remoteArch = pSrcConv->remoteArch;
convertor->pFunctions = pSrcConv->pFunctions;
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
return ompi_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
}

Просмотреть файл

@ -334,7 +334,7 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
packed_buffer = (char*)iov[iov_count].iov_base;
remaining = pConv->count * pData->size - pConv->bConverted;
remaining = pConv->local_size - pConv->bConverted;
if( remaining > (uint32_t)iov[iov_count].iov_len )
remaining = iov[iov_count].iov_len;
bConverted = remaining; /* how much will get unpacked this time */
@ -399,7 +399,7 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
}
*out_size = iov_count;
*max_data = (pConv->bConverted - initial_bytes_converted);
if( pConv->bConverted == (pData->size * pConv->count) ) {
if( pConv->bConverted == pConv->local_size ) {
pConv->flags |= CONVERTOR_COMPLETED;
return 1;
}
@ -670,18 +670,6 @@ conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED] = {
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
};
/* Should we supply buffers to the convertor or can we use directly
* the user buffer ?
*/
int32_t ompi_convertor_need_buffers( ompi_convertor_t* pConvertor )
{
const ompi_datatype_t* pData = pConvertor->pDesc;
if( !(pData->flags & DT_FLAG_CONTIGUOUS) ) return 1;
if( pConvertor->count == 1 ) return 0; /* only one data ignore the gaps around */
if( (long)pData->size != (pData->ub - pData->lb) ) return 1;
return 0;
}
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
int32_t
@ -711,20 +699,6 @@ ompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,
return OMPI_SUCCESS;
}
int32_t
ompi_convertor_copy_and_prepare_for_recv( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor )
{
convertor->remoteArch = pSrcConv->remoteArch;
convertor->pFunctions = pSrcConv->pFunctions;
convertor->flags = pSrcConv->flags & ~CONVERTOR_STATE_MASK;
return ompi_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
}
/* Get the number of elements from the data associated with this convertor that can be
* retrieved from a recevied buffer with the size iSize.
* To spped-up this function you should use it with a iSize == to the modulo

Просмотреть файл

@ -63,7 +63,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
/*opal_output( 0, "Data extent %d size %d count %d total_size %d starting_point %d\n",
pData->ub - pData->lb, pData->size, pConvertor->count,
pData->size * pConvertor->count, starting_point );*/
pConvertor->local_size, starting_point );*/
pConvertor->stack_pos = 0;
pStack = pConvertor->pStack;
/* Fill the first position on the stack. This one correspond to the
@ -192,6 +192,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
}
/* Correctly update the bConverted field */
pConvertor->bConverted = pData->size * pConvertor->count;
pConvertor->flags |= CONVERTOR_COMPLETED;
pConvertor->bConverted = pConvertor->local_size;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -186,7 +186,7 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
*/
size_t length = iov[iov_count].iov_len;
if( length <= 0 )
length = pConvertor->count * pData->size - pConvertor->bConverted;
length = pConvertor->local_size - pConvertor->bConverted;
if( ((*max_data) - total_packed) < length )
length = (*max_data) - total_packed;
assert( 0 < length );