1
1

New datatype engine. Most of the work was on the initialization. Now there are several

functions to create/clone a convertor, with the goal of reducing the time spend on
the convertor related functions, thus reducing the latency.

This commit was SVN r5985.
Этот коммит содержится в:
George Bosilca 2005-06-08 19:02:16 +00:00
родитель 993982f748
Коммит 8e6223cb36
10 изменённых файлов: 108 добавлений и 510 удалений

Просмотреть файл

@ -28,7 +28,7 @@ libdatatype_la_SOURCES = \
dt_add.c dt_create.c dt_create_array.c dt_create_dup.c dt_create_indexed.c \
dt_create_struct.c dt_create_vector.c dt_destroy.c dt_module.c \
dt_optimize.c dt_pack.c dt_sndrcv.c dt_unpack.c fake_stack.c dt_args.c \
dt_arch.c dt_external32.c dt_match_size.c new_pack.c
dt_arch.c dt_external32.c dt_match_size.c convertor.c new_pack.c
# Conditionally install the header files

Просмотреть файл

@ -30,9 +30,6 @@
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
@ -81,7 +78,7 @@ OMPI_DECLSPEC extern ompi_pointer_array_t *ompi_datatype_f_to_c_table;
typedef union dt_elem_desc dt_elem_desc_t;
typedef struct __dt_struct_desc {
typedef struct dt_type_desc {
uint32_t length; /* the maximum number of elements in the description array */
uint32_t used; /* the number of used elements in the description array */
dt_elem_desc_t* desc;
@ -180,156 +177,15 @@ OMPI_DECLSPEC int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* pDa
OMPI_DECLSPEC int32_t ompi_ddt_optimize_short( ompi_datatype_t* pData, int32_t count, dt_type_desc_t* pTypeDesc );
OMPI_DECLSPEC const ompi_datatype_t* ompi_ddt_match_size( int size, uint16_t datakind, uint16_t datalang );
typedef int32_t (*conversion_fct_t)( uint32_t count,
const void* from, uint32_t from_len, long from_extent,
void* to, uint32_t in_length, long to_extent );
typedef struct ompi_convertor_t ompi_convertor_t;
typedef int32_t (*convertor_advance_fct_t)( ompi_convertor_t* pConvertor,
struct iovec* pInputv,
uint32_t* inputCount,
uint32_t* max_data,
int32_t* freeAfter );
typedef void*(*memalloc_fct_t)( size_t* pLength );
typedef struct __dt_stack {
int16_t index; /**< index in the element description */
int16_t type; /**< the type used for the last pack/unpack (original or DT_BYTE) */
int32_t count; /**< number of times we still have to do it */
int32_t end_loop; /**< for loops the end of the loop, otherwise useless */
long disp; /**< actual displacement depending on the count field */
} dt_stack_t;
#define DT_STATIC_STACK_SIZE 5
struct ompi_convertor_t {
ompi_object_t super; /**< basic superclass */
uint32_t remoteArch; /**< the remote architecture */
uint32_t flags; /**< the properties of this convertor */
ompi_datatype_t* pDesc; /**< the datatype description associated with the convertor */
const dt_type_desc_t* use_desc; /**< the datatype version used by the convertor (normal or optimized) */
uint32_t count; /**< the total number of full datatype elements */
char* pBaseBuf; /**< initial buffer as supplied by the user */
dt_stack_t* pStack; /**< the local stack for the actual conversion */
uint32_t stack_size; /**< size of the allocated stack */
convertor_advance_fct_t fAdvance; /**< pointer to the pack/unpack functions */
memalloc_fct_t memAlloc_fn; /**< pointer to the memory allocation function */
conversion_fct_t* pFunctions; /**< the convertor functions pointer */
/* All others fields get modified for every call to pack/unpack functions */
uint32_t stack_pos; /**< the actual position on the stack */
uint32_t bConverted; /**< the size of already converted elements in bytes */
dt_stack_t static_stack[DT_STATIC_STACK_SIZE]; /**< local stack to be used for contiguous data */
};
OBJ_CLASS_DECLARATION( ompi_convertor_t );
#if !OMPI_ENABLE_DEBUG
/*
* Return 0 if everything went OK and if there is still room before the complete
* conversion of the data (need additional call with others input buffers )
* 1 if everything went fine and the data was completly converted
* -1 something wrong occurs.
/*
*
*/
static inline int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data, int32_t* freeAfter )
{
/* protect against over packing data */
if( pConv->bConverted == (pConv->pDesc->size * pConv->count) ) {
iov[0].iov_len = 0;
*out_size = 0;
*max_data = 0;
return 1; /* nothing to do */
}
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
/* We dont allocate any memory. The packing function should allocate it
* if it need. If it's possible to find iovec in the derived datatype
* description then we dont have to allocate any memory.
*/
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
}
OMPI_DECLSPEC int32_t ompi_ddt_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, void *rbuf,
int32_t rcount, const ompi_datatype_t* rdtype);
static inline int32_t ompi_convertor_unpack( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data, int32_t* freeAfter )
{
ompi_datatype_t *pData = pConv->pDesc;
uint32_t length;
/* protect against over unpacking data */
if( pConv->bConverted == (pData->size * pConv->count) ) {
iov[0].iov_len = 0;
out_size = 0;
*max_data = 0;
return 1; /* nothing to do */
}
if( pConv->flags & DT_FLAG_CONTIGUOUS ) {
if( iov[0].iov_base == NULL ) {
length = pConv->count * pData->size - pConv->bConverted;
iov[0].iov_base = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
if( iov[0].iov_len < length )
length = iov[0].iov_len;
iov[0].iov_len = length;
*max_data = length;
pConv->bConverted += length;
return (pConv->bConverted == (pData->size * pConv->count));
}
}
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
}
#else
OMPI_DECLSPEC int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data, int32_t* freeAfter );
OMPI_DECLSPEC int32_t ompi_convertor_unpack( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data, int32_t* freeAfter );
#endif /* OMPI_ENABLE_DEBUG */
/* Base convertor for all external32 operations */
extern ompi_convertor_t* ompi_mpi_external32_convertor;
/* and finally the convertor functions */
OMPI_DECLSPEC ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int32_t mode );
OMPI_DECLSPEC int32_t ompi_convertor_set_start_position( ompi_convertor_t* convertor,
int32_t starting_pos );
OMPI_DECLSPEC int32_t ompi_convertor_init_for_send( ompi_convertor_t* pConv, uint32_t flags,
const ompi_datatype_t* pData, int32_t count,
const void* pUserBuf, int32_t local_starting_point,
memalloc_fct_t allocfn );
OMPI_DECLSPEC int32_t ompi_convertor_init_for_recv( ompi_convertor_t* pConv, uint32_t flags,
const ompi_datatype_t* pData, int32_t count,
const void* pUserBuf, int32_t remote_starting_point,
memalloc_fct_t allocfn );
OMPI_DECLSPEC int32_t ompi_convertor_need_buffers( ompi_convertor_t* pConvertor );
OMPI_DECLSPEC int32_t ompi_convertor_get_packed_size( const ompi_convertor_t* pConv, uint32_t* pSize );
OMPI_DECLSPEC int32_t ompi_convertor_get_unpacked_size( const ompi_convertor_t* pConv, uint32_t* pSize );
static inline int ompi_convertor_copy( const ompi_convertor_t* pSrcConv, ompi_convertor_t* pDestConv )
{
pDestConv->pDesc = NULL;
pDestConv->remoteArch = pSrcConv->remoteArch;
/* Cleanup the old stack if any */
if( pDestConv->stack_size > DT_STATIC_STACK_SIZE ) {
free( pDestConv->pStack );
}
pDestConv->pStack = pDestConv->static_stack;
pDestConv->stack_size = DT_STATIC_STACK_SIZE;
pDestConv->stack_pos = 0;
pDestConv->pFunctions = pSrcConv->pFunctions;
pDestConv->use_desc = pSrcConv->use_desc;
return OMPI_SUCCESS;
}
static inline ompi_convertor_t* ompi_convertor_get_copy( const ompi_convertor_t* pConvertor )
{
ompi_convertor_t* pDestConv = OBJ_NEW(ompi_convertor_t);
(void)ompi_convertor_copy( pConvertor, pDestConv );
return pDestConv;
}
/* temporary function prototypes. They should move in other place later. */
/*
*
*/
OMPI_DECLSPEC int32_t ompi_ddt_get_args( const ompi_datatype_t* pData, int32_t which,
int32_t * ci, int32_t * i,
int32_t * ca, long* a,
@ -338,8 +194,6 @@ OMPI_DECLSPEC int32_t ompi_ddt_set_args( ompi_datatype_t* pData,
int32_t ci, int32_t ** i,
int32_t ca, long* a,
int32_t cd, ompi_datatype_t** d,int32_t type);
OMPI_DECLSPEC int32_t ompi_ddt_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdtype, void *rbuf,
int32_t rcount, const ompi_datatype_t* rdtype);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -196,16 +196,6 @@ do { \
(_place)->elem.extent = (_extent); \
} while(0)
/* keep the last 16 bits free for data flags */
#define CONVERTOR_USELESS 0x00010000
#define CONVERTOR_RECV 0x00020000
#define CONVERTOR_SEND 0x00040000
#define CONVERTOR_HOMOGENEOUS 0x00080000
#define CONVERTOR_STATE_MASK 0xFF000000
#define CONVERTOR_STATE_START 0x01000000
#define CONVEROTR_STATE_COMPLETE 0x02000000
#define CONVERTOR_STATE_ALLOC 0x04000000
typedef struct {
float r;
float i;
@ -224,11 +214,7 @@ typedef struct {
extern const ompi_datatype_t* ompi_ddt_basicDatatypes[];
#define BASIC_DDT_FROM_ELEM( ELEM ) (ompi_ddt_basicDatatypes[(ELEM).elem.common.type])
extern conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED];
extern int32_t ompi_ddt_external32_init( void );
extern void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos,
const dt_elem_desc_t* pDesc, const char* name );
extern void ompi_convertor_dump( ompi_convertor_t* convertor );
#define SAVE_STACK( PSTACK, INDEX, TYPE, COUNT, DISP, END_LOOP) \
do { \
@ -321,127 +307,6 @@ static inline int GET_FIRST_NON_LOOP( const dt_elem_desc_t* _pElem )
return index;
}
int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
int starting_point, const int* sizes );
static inline
int ompi_convertor_create_stack_with_pos_contig( ompi_convertor_t* pConvertor,
int starting_point, const int* sizes )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
ompi_datatype_t* pData = pConvertor->pDesc;
dt_elem_desc_t* pElems;
uint32_t count;
long extent;
pStack = pConvertor->pStack;
pStack[0].count = pConvertor->count;
pStack[0].index = -1;
pElems = pConvertor->use_desc->desc;
pStack[0].end_loop = pConvertor->use_desc->used;
/* Special case for contiguous datatypes */
if( pData->size == 0 ) { /* special case for empty datatypes */
count = pConvertor->count;
} else {
count = starting_point / pData->size;
}
extent = pData->ub - pData->lb;
pStack[0].disp = count * extent;
pStack[0].count -= count;
/* now compute the number of pending bytes */
count = starting_point - count * pData->size;
pStack[1].index = 0; /* useless */
pStack[1].count = pData->size - count;
pStack[1].end_loop = 0; /* useless */
/* we save the current displacement starting from the begining
* of this data.
*/
pStack[1].disp = pData->true_lb + count;
pConvertor->bConverted = starting_point;
pConvertor->stack_pos = 1;
return OMPI_SUCCESS;
}
static inline
int ompi_convertor_create_stack_at_begining( ompi_convertor_t* pConvertor, const int* sizes )
{
dt_stack_t* pStack;
dt_elem_desc_t* pElems;
int index = 0;
pConvertor->stack_pos = 0;
pStack = pConvertor->pStack;
/* Fill the first position on the stack. This one correspond to the
* last fake DT_END_LOOP that we add to the data representation and
* allow us to move quickly inside the datatype when we have a count.
*/
pConvertor->pStack[0].index = -1;
pConvertor->pStack[0].count = pConvertor->count;
pConvertor->pStack[0].disp = 0;
/* first here we should select which data representation will be used for
* this operation: normal one or the optimized version ? */
pElems = pConvertor->use_desc->desc;
pStack[0].end_loop = pConvertor->use_desc->used;
/* In the case where the datatype start with loops, we should push them on the stack.
* Otherwise when we reach the end_loop field we will pop too many entries and finish
* by overriding other places in memory. Now the big question is when to stop creating
* the entries on the stack ? Should I stop when I reach the first data element or
* should I stop on the first contiguous loop ?
*/
while( pElems[index].elem.common.type == DT_LOOP ) {
PUSH_STACK( pStack, pConvertor->stack_pos, index, DT_LOOP,
pElems[index].loop.loops, 0, pElems[index].loop.items );
index++;
}
if( pElems[index].elem.common.flags & DT_FLAG_DATA ) { /* let's stop here */
PUSH_STACK( pStack, pConvertor->stack_pos, index, pElems[index].elem.common.type,
pElems[index].elem.count, pElems[index].elem.disp, 0 );
} else {
ompi_output( 0, "Here we should have a data in the datatype description\n" );
ompi_ddt_dump( pConvertor->pDesc );
}
pConvertor->bConverted = 0;
return OMPI_SUCCESS;
}
static inline void
convertor_init_generic( ompi_convertor_t* pConv, const ompi_datatype_t* datatype, int count,
const void* pUserBuf )
{
uint32_t required_stack_length = datatype->btypes[DT_LOOP] + 3;
OBJ_RETAIN( datatype );
if( pConv->pDesc != datatype ) {
pConv->pDesc = (ompi_datatype_t*)datatype;
if( pConv->pStack != NULL ) {
if( pConv->stack_size > DT_STATIC_STACK_SIZE )
free( pConv->pStack );
}
pConv->pStack = pConv->static_stack;
pConv->stack_size = DT_STATIC_STACK_SIZE;
/* Decide which data representation will be used for the conversion. */
if( (NULL != datatype->opt_desc.desc) && (pConv->flags & CONVERTOR_HOMOGENEOUS) ) {
pConv->use_desc = &(datatype->opt_desc);
} else {
pConv->use_desc = &(datatype->desc);
}
pConv->bConverted = 0; /* reset the convertor */
}
if( DT_STATIC_STACK_SIZE < required_stack_length ) {
pConv->stack_size = required_stack_length;
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * pConv->stack_size );
}
pConv->pBaseBuf = (void*)pUserBuf;
pConv->count = count;
}
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -14,8 +14,11 @@
*
* $HEADER$
*/
#include "ompi_config.h"
#include "datatype/datatype.h"
#include "datatype/convertor.h"
#include "datatype/datatype_internal.h"
#include "datatype/dt_arch.h"

Просмотреть файл

@ -18,6 +18,7 @@
#include "ompi_config.h"
#include "datatype/datatype.h"
#include "datatype/convertor.h"
#include "datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
#include <alloca.h>

Просмотреть файл

@ -18,6 +18,7 @@
#include "ompi_config.h"
#include "datatype/datatype.h"
#include "datatype/convertor.h"
#include "datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
@ -40,7 +41,7 @@ int ompi_ddt_safeguard_pointer_debug_breakpoint( const void* actual_ptr, int len
static
int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
@ -50,7 +51,7 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
uint32_t advance; /* number of bytes that we should advance the buffer */
long disp_desc = 0; /* compute displacement for truncated data */
int bConverted = 0; /* number of bytes converted this time */
ompi_datatype_t *pData = pConvertor->pDesc;
const ompi_datatype_t *pData = pConvertor->pDesc;
dt_elem_desc_t* pElem;
char* pOutput = pConvertor->pBaseBuf;
char* pInput;
@ -170,7 +171,7 @@ static
int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
@ -180,7 +181,7 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
long lastDisp = 0, last_count = 0;
uint32_t space = iov[0].iov_len, last_blength = 0;
char* pDestBuf;
ompi_datatype_t* pData = pConv->pDesc;
const ompi_datatype_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
pDestBuf = iov[0].iov_base;
@ -305,7 +306,7 @@ static
int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t *out_size,
uint32_t* max_data,
size_t* max_data,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
@ -317,7 +318,7 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
long lastDisp = 0;
uint32_t space = *max_data, last_blength = 0, saveLength;
char *destination, *source;
ompi_datatype_t* pData = pConv->pDesc;
const ompi_datatype_t* pData = pConv->pDesc;
ddt_elem_desc_t pack_elem;
dt_elem_desc_t* pElems;
@ -581,10 +582,10 @@ static int
ompi_convertor_pack_no_conv_contig( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int* freeAfter )
{
ompi_datatype_t* pData = pConv->pDesc;
const ompi_datatype_t* pData = pConv->pDesc;
dt_stack_t* pStack = pConv->pStack;
char *pSrc;
size_t length = pData->size * pConv->count - pConv->bConverted;
@ -629,10 +630,10 @@ static int
ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int* freeAfter )
{
ompi_datatype_t* pData = pConv->pDesc;
const ompi_datatype_t* pData = pConv->pDesc;
dt_stack_t* pStack = pConv->pStack;
char *pSrc, *pDest;
size_t length = pData->size * pConv->count;
@ -748,150 +749,44 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
return (pConv->bConverted == length);
}
/*
* Set the starting position for a convertor. This function can be used at any
* moment in the life of a convertor to move the position to the desired point.
*/
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
inline int32_t ompi_convertor_set_start_position( ompi_convertor_t* convertor,
int32_t starting_pos )
inline int32_t
ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf )
{
if( convertor->flags & DT_FLAG_CONTIGUOUS )
return ompi_convertor_create_stack_with_pos_contig( convertor, starting_pos, ompi_ddt_local_sizes );
if( starting_pos != 0 ) {
return ompi_convertor_create_stack_with_pos_general( convertor, starting_pos, ompi_ddt_local_sizes );
}
return ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes );
}
int32_t ompi_convertor_init_for_send( ompi_convertor_t* pConv,
uint32_t flags,
const ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
int32_t starting_pos,
memalloc_fct_t allocfn )
{
if( !(datatype->flags & DT_FLAG_COMMITED) ) {
/* this datatype is improper for conversion. Commit it first */
if( OMPI_SUCCESS != ompi_convertor_prepare( convertor, datatype,
count, pUserBuf ) ) {
return OMPI_ERROR;
}
pConv->flags = CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS; /* by default set to homogeneous */
convertor_init_generic( pConv, datatype, count, pUserBuf );
pConv->pFunctions = ompi_ddt_copy_functions;
pConv->memAlloc_fn = allocfn;
convertor->flags |= CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS;
convertor->memAlloc_fn = NULL;
/* Just to avoid complaint from the compiler */
pConv->fAdvance = ompi_convertor_pack_general;
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
pConv->fAdvance = ompi_convertor_pack_no_conversion;
convertor->fAdvance = ompi_convertor_pack_general;
convertor->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
convertor->fAdvance = ompi_convertor_pack_no_conversion;
if( datatype->flags & DT_FLAG_CONTIGUOUS ) {
pConv->flags |= DT_FLAG_CONTIGUOUS;
if( ((datatype->ub - datatype->lb) == (long)datatype->size) || (1 >= pConv->count) ) /* gaps or no gaps */
pConv->fAdvance = ompi_convertor_pack_no_conv_contig;
convertor->flags |= DT_FLAG_CONTIGUOUS;
if( ((datatype->ub - datatype->lb) == (long)datatype->size)
|| (1 >= convertor->count) ) /* gaps or no gaps */
convertor->fAdvance = ompi_convertor_pack_no_conv_contig;
else
pConv->fAdvance = ompi_convertor_pack_no_conv_contig_with_gaps;
convertor->fAdvance = ompi_convertor_pack_no_conv_contig_with_gaps;
}
if( -1 == starting_pos ) return OMPI_SUCCESS;
/* dont call any function if the convertor is in the correct position */
if( (pConv->bConverted == (unsigned long)starting_pos) &&
(0 != starting_pos) ) return OMPI_SUCCESS;
/* do we start after the end of the data ? */
if( starting_pos >= (int)(pConv->count * datatype->size) ) {
pConv->bConverted = pConv->count * datatype->size;
return OMPI_SUCCESS;
}
return ompi_convertor_set_start_position( pConv, starting_pos );
return OMPI_SUCCESS;
}
#if OMPI_ENABLE_DEBUG
int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data, int32_t* freeAfter )
{
/* protect against over packing data */
if( pConv->bConverted == (pConv->pDesc->size * pConv->count) ) {
iov[0].iov_len = 0;
*out_size = 0;
*max_data = 0;
return 1; /* nothing to do */
}
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
/* We dont allocate any memory. The packing function should allocate it
* if it need. If it's possible to find iovec in the derived datatype
* description then we dont have to allocate any memory.
*/
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
}
#endif /* OMPI_ENABLE_DEBUG */
ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int32_t mode )
int32_t
ompi_convertor_copy_and_prepare_for_send( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor )
{
ompi_convertor_t* pConv = OBJ_NEW(ompi_convertor_t);
convertor->remoteArch = pSrcConv->remoteArch;
convertor->pFunctions = pSrcConv->pFunctions;
pConv->remoteArch = remote_arch;
return pConv;
}
static void ompi_convertor_construct( ompi_convertor_t* pConv )
{
pConv->pDesc = NULL;
pConv->use_desc = NULL;
pConv->pStack = pConv->static_stack;
pConv->stack_size = DT_STATIC_STACK_SIZE;
pConv->fAdvance = NULL;
pConv->memAlloc_fn = NULL;
}
static void ompi_convertor_destruct( ompi_convertor_t* pConv )
{
if( pConv->stack_size > DT_STATIC_STACK_SIZE ) {
free( pConv->pStack );
}
if( pConv->pDesc != NULL ) OBJ_RELEASE( pConv->pDesc );
pConv->pDesc = NULL;
}
OBJ_CLASS_INSTANCE(ompi_convertor_t, ompi_object_t, ompi_convertor_construct, ompi_convertor_destruct );
/* Actually we suppose that we can only do receiver side conversion */
int32_t ompi_convertor_get_packed_size( const ompi_convertor_t* pConv, uint32_t* pSize )
{
int32_t ddt_size = 0;
if( ompi_ddt_type_size( pConv->pDesc, &ddt_size ) != 0 )
return OMPI_ERROR;
/* actually *pSize contain the size of one instance of the data */
*pSize = ddt_size * pConv->count;
return OMPI_SUCCESS;
}
int32_t ompi_convertor_get_unpacked_size( const ompi_convertor_t* pConv, uint32_t* pSize )
{
int i;
ompi_datatype_t* pData = pConv->pDesc;
if( pConv->count == 0 ) {
*pSize = 0;
return OMPI_SUCCESS;
}
if( pConv->remoteArch == 0 ) { /* same architecture */
*pSize = pData->size * pConv->count;
return OMPI_SUCCESS;
}
*pSize = 0;
for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {
if( pData->bdt_used & (((unsigned long long)1)<<i) ) {
/* TODO replace with the remote size */
*pSize += (pData->btypes[i] * ompi_ddt_basicDatatypes[i]->size);
}
}
*pSize *= pConv->count;
return OMPI_SUCCESS;
return ompi_convertor_prepare_for_send( convertor, datatype, count, pUserBuf );
}

Просмотреть файл

@ -17,6 +17,7 @@
#include "ompi_config.h"
#include "datatype/datatype.h"
#include "datatype/convertor.h"
#include "request/request.h"
#include "mca/pml/pml.h"
@ -43,7 +44,8 @@ int32_t ompi_ddt_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdty
ompi_convertor_t *send_convertor, *recv_convertor;
struct iovec iov;
int length, completed;
uint32_t max_data, iov_count;
uint32_t iov_count;
size_t max_data;
int32_t freeAfter;
/* First check if we really have something to do */
@ -65,7 +67,8 @@ int32_t ompi_ddt_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdty
/* If receive packed. */
if (rdtype == MPI_PACKED) {
send_convertor = OBJ_NEW(ompi_convertor_t);
ompi_convertor_init_for_send( send_convertor, 0, sdtype, scount, sbuf, 0, NULL );
ompi_convertor_prepare_for_send( send_convertor, sdtype, scount, sbuf );
ompi_convertor_personalize( send_convertor, 0, 0, NULL );
iov_count = 1;
iov.iov_len = rcount;
@ -80,7 +83,8 @@ int32_t ompi_ddt_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdty
/* If send packed. */
if (sdtype == MPI_PACKED) {
recv_convertor = OBJ_NEW(ompi_convertor_t);
ompi_convertor_init_for_recv( recv_convertor, 0, rdtype, rcount, rbuf, 0, NULL );
ompi_convertor_prepare( recv_convertor, rdtype, rcount, rbuf );
ompi_convertor_prepare_for_recv( recv_convertor, 0, 0, NULL );
iov_count = 1;
iov.iov_len = scount;
@ -99,8 +103,10 @@ int32_t ompi_ddt_sndrcv( void *sbuf, int32_t scount, const ompi_datatype_t* sdty
send_convertor = OBJ_NEW(ompi_convertor_t);
recv_convertor = OBJ_NEW(ompi_convertor_t);
ompi_convertor_init_for_send( send_convertor, 0, sdtype, scount, sbuf, 0, NULL );
ompi_convertor_init_for_recv( recv_convertor, 0, rdtype, rcount, rbuf, 0, NULL );
ompi_convertor_prepare( send_convertor, sdtype, scount, sbuf );
ompi_convertor_prepare( recv_convertor, rdtype, rcount, rbuf );
ompi_convertor_prepare_for_send( send_convertor, 0, 0, NULL );
ompi_convertor_prepare_for_recv( recv_convertor, 0, 0, NULL );
completed = 0;
while( !completed ) {

Просмотреть файл

@ -17,6 +17,7 @@
#include "ompi_config.h"
#include "datatype/datatype.h"
#include "datatype/convertor.h"
#include "datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
@ -24,7 +25,8 @@
#endif
#include <stdlib.h>
void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos, const dt_elem_desc_t* pDesc, const char* name )
void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos,
const union dt_elem_desc* pDesc, const char* name )
{
ompi_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
for( ; stack_pos >= 0; stack_pos-- ) {
@ -56,7 +58,7 @@ void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos, const dt_elem
static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
struct iovec* iov,
uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
@ -168,7 +170,7 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
@ -178,7 +180,7 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
long lastDisp = 0;
size_t space = iov[0].iov_len, last_count = 0, last_blength = 0;
char* pSrcBuf;
ompi_datatype_t* pData = pConv->pDesc;
const ompi_datatype_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
pSrcBuf = iov[0].iov_base;
@ -314,10 +316,10 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int32_t* freeAfter )
{
ompi_datatype_t *pData = pConv->pDesc;
const ompi_datatype_t *pData = pConv->pDesc;
char* pDstBuf = pConv->pBaseBuf, *pSrcBuf;
uint32_t iov_count, initial_bytes_converted = pConv->bConverted;
long extent = pData->ub - pData->lb;
@ -616,7 +618,7 @@ conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED] = {
*/
int32_t ompi_convertor_need_buffers( ompi_convertor_t* pConvertor )
{
ompi_datatype_t* pData = pConvertor->pDesc;
const ompi_datatype_t* pData = pConvertor->pDesc;
if( !(pData->flags & DT_FLAG_CONTIGUOUS) ) return 1;
if( pConvertor->count == 1 ) return 0; /* only one data ignore the gaps around */
if( (long)pData->size != (pData->ub - pData->lb) ) return 1;
@ -624,74 +626,44 @@ int32_t ompi_convertor_need_buffers( ompi_convertor_t* pConvertor )
}
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
int32_t ompi_convertor_init_for_recv( ompi_convertor_t* pConv, uint32_t flags,
const ompi_datatype_t* datatype, int32_t count,
const void* pUserBuf, int32_t starting_pos,
memalloc_fct_t allocfn )
inline int32_t
ompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf )
{
if( !(datatype->flags & DT_FLAG_COMMITED) ) {
/* this datatype is improper for conversion. Commit it first */
/* Here I should check that the data is not overlapping */
if( OMPI_SUCCESS != ompi_convertor_prepare( convertor, datatype,
count, pUserBuf ) ) {
return OMPI_ERROR;
}
pConv->flags = CONVERTOR_RECV | CONVERTOR_HOMOGENEOUS;
convertor_init_generic( pConv, datatype, count, pUserBuf );
pConv->pFunctions = ompi_ddt_copy_functions;
pConv->memAlloc_fn = allocfn;
pConv->fAdvance = ompi_convertor_unpack_general; /* TODO: just stop complaining */
pConv->fAdvance = ompi_convertor_unpack_homogeneous; /* default behaviour */
convertor->memAlloc_fn = NULL;
convertor->fAdvance = ompi_convertor_unpack_general; /* TODO: just stop complaining */
convertor->fAdvance = ompi_convertor_unpack_homogeneous; /* default behaviour */
/* TODO: work only on homogeneous architectures */
if( datatype->flags & DT_FLAG_CONTIGUOUS ) {
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = ompi_convertor_unpack_homogeneous_contig;
if( convertor->pDesc->flags & DT_FLAG_CONTIGUOUS ) {
convertor->flags |= DT_FLAG_CONTIGUOUS;
convertor->fAdvance = ompi_convertor_unpack_homogeneous_contig;
}
if( -1 == starting_pos ) return OMPI_SUCCESS;
/* dont call any function if the convertor is in the correct position */
if( (pConv->bConverted == (unsigned long)starting_pos) &&
(0 != starting_pos) ) return OMPI_SUCCESS;
if( starting_pos >= (int)(pConv->count * datatype->size) ) {
pConv->bConverted = pConv->count * datatype->size;
return OMPI_SUCCESS;
}
return ompi_convertor_set_start_position( pConv, starting_pos );
return OMPI_SUCCESS;
}
#if OMPI_ENABLE_DEBUG
int32_t ompi_convertor_unpack( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data, int32_t* freeAfter )
{
ompi_datatype_t *pData = pConv->pDesc;
uint32_t length;
/* protect against over unpacking data */
if( pConv->bConverted == (pData->size * pConv->count) ) {
iov[0].iov_len = 0;
out_size = 0;
*max_data = 0;
return 1; /* nothing to do */
}
if( pConv->flags & DT_FLAG_CONTIGUOUS ) {
if( iov[0].iov_base == NULL ) {
length = pConv->count * pData->size - pConv->bConverted;
iov[0].iov_base = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
if( iov[0].iov_len < length )
length = iov[0].iov_len;
iov[0].iov_len = length;
*max_data = length;
pConv->bConverted += length;
return (pConv->bConverted == (pData->size * pConv->count));
}
}
assert( pConv->bConverted < (pConv->pDesc->size * pConv->count) );
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
}
#endif /* OMPI_ENABLE_DEBUG */
int32_t
ompi_convertor_copy_and_prepare_for_recv( const ompi_convertor_t* pSrcConv,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf,
ompi_convertor_t* convertor )
{
convertor->remoteArch = pSrcConv->remoteArch;
convertor->pFunctions = pSrcConv->pFunctions;
return ompi_convertor_prepare_for_recv( convertor, datatype, count, pUserBuf );
}
/* Get the number of elements from the data associated with this convertor that can be
* retrieved from a recevied buffer with the size iSize.

Просмотреть файл

@ -18,6 +18,7 @@
#include "ompi_config.h"
#include "datatype/datatype.h"
#include "datatype/convertor.h"
#include "datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
@ -46,7 +47,7 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int lastLength = 0, loop_length;
ompi_datatype_t* pData = pConvertor->pDesc;
const ompi_datatype_t* pData = pConvertor->pDesc;
int* remoteLength;
int resting_place = starting_point;
dt_elem_desc_t* pElems;
@ -194,8 +195,8 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
void ompi_convertor_dump( ompi_convertor_t* convertor )
{
printf( "Convertor %p count %d stack position %d bConverted %d\n", (void*)convertor,
convertor->count, convertor->stack_pos, convertor->bConverted );
printf( "Convertor %p count %d stack position %d bConverted %ld\n", (void*)convertor,
convertor->count, convertor->stack_pos, (unsigned long)convertor->bConverted );
ompi_ddt_dump( convertor->pDesc );
printf( "Actual stack representation\n" );
ompi_ddt_dump_stack( convertor->pStack, convertor->stack_pos,

Просмотреть файл

@ -18,6 +18,7 @@
#include "ompi_config.h"
#include "datatype/datatype.h"
#include "datatype/convertor.h"
#include "datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
@ -102,7 +103,7 @@ do { \
int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
uint32_t* max_data,
size_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
@ -113,7 +114,7 @@ int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
uint32_t bConverted = 0; /* number of bytes converted this time */
dt_elem_desc_t* description;
dt_elem_desc_t* pElem;
ompi_datatype_t *pData = pConvertor->pDesc;
const ompi_datatype_t *pData = pConvertor->pDesc;
char* iov_base_local;
uint32_t iov_len_local, i, iov_count;