New datatype engine.
This commit was SVN r918.
Этот коммит содержится в:
родитель
cd6be89d0d
Коммит
5b0c135466
@ -9,19 +9,13 @@ noinst_LTLIBRARIES = libdatatype.la
|
||||
|
||||
# Source code files
|
||||
|
||||
headers = \
|
||||
datatype.h
|
||||
headers = datatype.h datatype_internal.h
|
||||
|
||||
libdatatype_la_SOURCES = \
|
||||
$(headers) \
|
||||
datatype.c \
|
||||
datatype_copy.c \
|
||||
datatype_crc32.c \
|
||||
datatype_create.c \
|
||||
datatype_delete.c \
|
||||
datatype_memcpy.c \
|
||||
datatype_pack.c \
|
||||
datatype_sum32.c
|
||||
dt_add.c dt_create.c dt_create_array.c dt_create_dup.c dt_create_indexed.c \
|
||||
dt_create_struct.c dt_create_vector.c dt_destroy.c dt_module.c dt_old_limits.c \
|
||||
dt_optimize.c dt_pack.c dt_unpack.c
|
||||
|
||||
# Conditionally install the header files
|
||||
|
||||
|
@ -1,664 +1,263 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
|
||||
/** @file
|
||||
*
|
||||
* lam_datatype_t interface for LAM internal data type representation
|
||||
*
|
||||
* lam_datatype_t is a class which represents contiguous or
|
||||
* non-contiguous datat together with constituent type-related
|
||||
* information. It is the LAM's-eye view of MPI_Datatype.
|
||||
*/
|
||||
|
||||
#ifndef LAM_DATATYPE_H_INCLUDED
|
||||
#define LAM_DATATYPE_H_INCLUDED 1
|
||||
|
||||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/uio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#ifndef DATATYPE_H_HAS_BEEN_INCLUDED
|
||||
#define DATATYPE_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include "lam_config.h"
|
||||
#include "constants.h"
|
||||
#include "lfc/lam_object.h"
|
||||
#include "lfc/lam_hash_table.h"
|
||||
#include "types.h"
|
||||
|
||||
#include "mpi.h"
|
||||
|
||||
|
||||
/* fortran sizes and alignments ***************************************/
|
||||
|
||||
extern int lam_sizeof_f77_integer;
|
||||
extern int lam_sizeof_f77_real;
|
||||
extern int lam_sizeof_f77_dblprec;
|
||||
extern int lam_sizeof_f77_complex;
|
||||
extern int lam_sizeof_f77_dblcomplex;
|
||||
|
||||
extern int lam_alignment_f77_integer;
|
||||
extern int lam_alignment_f77_real;
|
||||
extern int lam_alignment_f77_dblprec;
|
||||
extern int lam_alignment_f77_complex;
|
||||
extern int lam_alignment_f77_dblcomplex;
|
||||
|
||||
|
||||
/* enums **************************************************************/
|
||||
|
||||
/**
|
||||
* Datatype state flags
|
||||
#define DT_LOOP 0x00
|
||||
#define DT_LB 0x01
|
||||
#define DT_UB 0x02
|
||||
#define DT_SPACE 0x03
|
||||
#define DT_CHAR 0x04
|
||||
#define DT_BYTE 0x05
|
||||
#define DT_SHORT 0x06
|
||||
#define DT_INT 0x07
|
||||
#define DT_FLOAT 0x08
|
||||
#define DT_LONG 0x09
|
||||
#define DT_DOUBLE 0x0A
|
||||
#define DT_LONG_LONG 0x0B
|
||||
#define DT_LONG_DOUBLE 0x0C
|
||||
#define DT_COMPLEX_FLOAT 0x0D
|
||||
#define DT_COMPLEX_DOUBLE 0x0E
|
||||
#define DT_END_LOOP 0x0F
|
||||
/* if there are more basic datatypes than the number of bytes in the int type
|
||||
* the bdt_used field of the data description struct should be changed to long.
|
||||
*/
|
||||
enum lam_datatype_state_t {
|
||||
LAM_DATATYPE_STATE_COMMITTED = 1 << 0,
|
||||
LAM_DATATYPE_STATE_CONTIGUOUS = 1 << 1,
|
||||
LAM_DATATYPE_STATE_FORTRAN = 1 << 2,
|
||||
LAM_DATATYPE_STATE_OPTIMIZED = 1 << 3,
|
||||
LAM_DATATYPE_STATE_DONT_OPTIMIZE = 1 << 4,
|
||||
LAM_DATATYPE_STATE_XDR = 1 << 5,
|
||||
/* etc. */
|
||||
#define DT_MAX_PREDEFINED 0x10
|
||||
|
||||
/* flags for the datatypes. */
|
||||
#define DT_FLAG_DESTROYED 0x0001 /* user destroyed but some other layers still have a reference */
|
||||
#define DT_FLAG_COMMITED 0x0002 /* ready to be used for a send/recv operation */
|
||||
#define DT_FLAG_CONTIGUOUS 0x0004 /* contiguous datatype */
|
||||
#define DT_FLAG_OVERLAP 0x0008 /* datatype is unpropper for a recv operation */
|
||||
#define DT_FLAG_USER_LB 0x0010 /* has a user defined LB */
|
||||
#define DT_FLAG_USER_UB 0x0020 /* has a user defined UB */
|
||||
#define DT_FLAG_FOREVER 0x0040 /* cannot be removed: initial and predefined datatypes */
|
||||
#define DT_FLAG_IN_LOOP 0x0080 /* we are inside a loop */
|
||||
#define DT_FLAG_INITIAL 0x0100 /* one of the initial datatype */
|
||||
#define DT_FLAG_DATA 0x0200 /* data or control structure */
|
||||
#define DT_FLAG_BASIC (DT_FLAG_INITIAL | DT_FLAG_COMMITED | DT_FLAG_FOREVER | DT_FLAG_CONTIGUOUS)
|
||||
|
||||
#define DT_INCREASE_STACK 32
|
||||
|
||||
/* the basic element. A data description is composed
|
||||
* by a set of basic elements.
|
||||
*/
|
||||
typedef struct __dt_elem_desc {
|
||||
unsigned short flags; /* flags for the record */
|
||||
unsigned short type; /* the basic data type id */
|
||||
unsigned int count; /* number of elements */
|
||||
long disp; /* displacement of the first element */
|
||||
unsigned int extent; /* extent of each element */
|
||||
} dt_elem_desc_t;
|
||||
|
||||
typedef struct {
|
||||
float r;
|
||||
float i;
|
||||
} complex_float_t;
|
||||
|
||||
typedef struct {
|
||||
double r;
|
||||
double i;
|
||||
} complex_double_t;
|
||||
|
||||
/* The basic memory zone description. The idea is to be able to represent the
|
||||
* data as a array of zones, thus allowing us to simply find when concatenating
|
||||
* several data leads to merging contiguous zones of memory.
|
||||
*/
|
||||
typedef struct __dt_zone_desc {
|
||||
int useless;
|
||||
} dt_zone_desc_t;
|
||||
|
||||
typedef struct __dt_struct_desc {
|
||||
int length; /* the maximum number of elements in the description array */
|
||||
int used; /* the number of used elements in the description array */
|
||||
dt_elem_desc_t* desc;
|
||||
} dt_type_desc_t;
|
||||
|
||||
/* the data description.
|
||||
*/
|
||||
typedef struct __dt_desc {
|
||||
lam_object_t super;
|
||||
unsigned int size; /* total size in bytes of the memory used by the data if
|
||||
* the data is put on a contiguous buffer */
|
||||
long true_lb;
|
||||
long true_ub; /* the true ub of the data without user defined lb and ub */
|
||||
unsigned int align; /* data should be aligned to */
|
||||
long lb; /* lower bound in memory */
|
||||
long ub; /* upper bound in memory */
|
||||
unsigned short flags; /* the flags */
|
||||
unsigned short id; /* data id, normally the index in the data array. */
|
||||
unsigned int nbElems; /* total number of elements inside the datatype */
|
||||
unsigned int bdt_used; /* which basic datatypes are used in the data description */
|
||||
|
||||
/* Attribute fields */
|
||||
lam_hash_table_t *keyhash;
|
||||
char name[MPI_MAX_OBJECT_NAME];
|
||||
|
||||
dt_type_desc_t desc; /* the data description */
|
||||
dt_type_desc_t opt_desc; /* short description of the data used when conversion is useless
|
||||
* or in the send case (without conversion) */
|
||||
void* args; /* data description for the user */
|
||||
|
||||
/* basic elements count used to compute the size of the datatype for
|
||||
* remote nodes */
|
||||
unsigned int btypes[DT_MAX_PREDEFINED];
|
||||
} dt_desc_t, lam_datatype_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION( lam_datatype_t );
|
||||
|
||||
extern dt_desc_t basicDatatypes[];
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a); })
|
||||
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
|
||||
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
#else
|
||||
static long LMAX( long a, long b ) { return ( a < b ? b : a ); }
|
||||
static long LMIN( long a, long b ) { return ( a < b ? a : b ); }
|
||||
static int IMAX( int a, int b ) { return ( a < b ? b : a ); }
|
||||
static int IMIN( int a, int b ) { return ( a < b ? a : b ); }
|
||||
#endif /* __GNU__ */
|
||||
|
||||
typedef struct __dt_stack {
|
||||
int index;
|
||||
int count;
|
||||
int end_loop;
|
||||
long disp;
|
||||
} dt_stack_t;
|
||||
|
||||
typedef struct __dt_convert {
|
||||
char* buf;
|
||||
unsigned int length;
|
||||
dt_stack_t* pStack;
|
||||
dt_desc_t* pDesc;
|
||||
} dt_convert_t;
|
||||
|
||||
int dt_load( void );
|
||||
int dt_unload( void );
|
||||
dt_desc_t* dt_create( int expectedSize );
|
||||
int dt_commit( dt_desc_t** );
|
||||
#define dt_free dt_destroy
|
||||
int dt_free( dt_desc_t** );
|
||||
int dt_destroy( dt_desc_t** );
|
||||
void dt_dump( dt_desc_t* pData );
|
||||
void dt_dump_complete( dt_desc_t* pData );
|
||||
/* data creation functions */
|
||||
int dt_duplicate( dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_contiguous( size_t count, dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_vector( size_t count, int bLength, long stride,
|
||||
dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_hvector( size_t count, int bLength, long stride,
|
||||
dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_indexed( size_t count, int* pBlockLength, int* pDisp,
|
||||
dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_hindexed( size_t count, int* pBlockLength, long* pDisp,
|
||||
dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_indexed_block( size_t count, int bLength, int* pDisp,
|
||||
dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_struct( size_t count, size_t* pBlockLength, long* pDisp,
|
||||
dt_desc_t** pTypes, dt_desc_t** newType );
|
||||
int dt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType );
|
||||
int dt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
|
||||
int order, dt_desc_t* oldType, dt_desc_t** newType );
|
||||
int dt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib,
|
||||
int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType,
|
||||
dt_desc_t** newType );
|
||||
|
||||
int dt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent );
|
||||
|
||||
int dt_type_lb( dt_desc_t* pData, long* disp );
|
||||
int dt_type_ub( dt_desc_t* pData, long* disp );
|
||||
int dt_type_size ( dt_desc_t* pData, int *size );
|
||||
int dt_type_extent( dt_desc_t* pData, long* extent );
|
||||
|
||||
int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew );
|
||||
int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent);
|
||||
int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent);
|
||||
int dt_get_element_count( dt_desc_t* datatype, size_t iSize );
|
||||
int dt_copy_content_same_dt( dt_desc_t* pData, int count, char* pDestBuf, char* pSrcBuf );
|
||||
|
||||
#define dt_increase_ref(PDT) OBJ_RETAIN( PDT )
|
||||
#define dt_decrease_ref(PDT) OBJ_RELEASE( PDT )
|
||||
|
||||
int dt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDesc );
|
||||
|
||||
#define REMOVE_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) ^ (FLAG)
|
||||
#define SET_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) | (FLAG)
|
||||
#define UNSET_FLAG( INT_VALUE, FLAG) (INT_VALUE) = (INT_VALUE) & (~(FLAG))
|
||||
|
||||
#define REMOVE_CONTIGUOUS_FLAG( INT_VALUE ) REMOVE_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
|
||||
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) SET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
|
||||
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) UNSET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
|
||||
|
||||
/* flags for the datatypes */
|
||||
|
||||
typedef int (*conversion_fct_t)( unsigned int count,
|
||||
void* from, unsigned int from_len, long from_extent,
|
||||
void* to, unsigned int in_length, long to_extent,
|
||||
unsigned int* used );
|
||||
|
||||
/* keep the last 16 bits free for data flags */
|
||||
#define CONVERTOR_USELESS 0x00010000
|
||||
#define CONVERTOR_RECV 0x00020000
|
||||
#define CONVERTOR_SEND 0x00040000
|
||||
|
||||
#define CONVERTOR_STATE_MASK 0xFF000000
|
||||
#define CONVERTOR_STATE_START 0x01000000
|
||||
#define CONVEROTR_STATE_COMPLETE 0x02000000
|
||||
#define CONVERTOR_STATE_ALLOC 0x04000000
|
||||
|
||||
typedef struct __struct_convertor convertor_t;
|
||||
typedef int (*convertor_advance_fct_t)( convertor_t* pConvertor,
|
||||
struct iovec* pInputv,
|
||||
unsigned int inputCount );
|
||||
|
||||
/* and now the convertor stuff */
|
||||
struct __struct_convertor {
|
||||
dt_desc_t* pDesc;
|
||||
long remoteArch;
|
||||
dt_stack_t* pStack;
|
||||
/* the convertor functions pointer */
|
||||
/* the local stack for the actual conversion */
|
||||
int converted; /* the number of already converted elements */
|
||||
int bConverted; /* the size of already converted elements in bytes */
|
||||
unsigned int flags;
|
||||
unsigned int count;
|
||||
unsigned int stack_pos;
|
||||
char* pBaseBuf;
|
||||
unsigned int available_space;
|
||||
void* freebuf;
|
||||
convertor_advance_fct_t fAdvance;
|
||||
conversion_fct_t* pFunctions;
|
||||
};
|
||||
|
||||
extern conversion_fct_t copy_functions[DT_MAX_PREDEFINED];
|
||||
|
||||
/* some convertor flags */
|
||||
#define convertor_progress( PCONV, IOVEC, COUNT ) \
|
||||
(PCONV)->fAdvance( (PCONV), (IOVEC), (COUNT) );
|
||||
|
||||
/* and finally the convertor functions */
|
||||
convertor_t* convertor_create( int remote_arch, int mode );
|
||||
int convertor_init_for_send( convertor_t* pConv, unsigned int flags,
|
||||
dt_desc_t* pData, int count, void* pUserBuf );
|
||||
int convertor_init_for_recv( convertor_t* pConv, unsigned int flags,
|
||||
dt_desc_t* pData, int count, void* pUserBuf );
|
||||
convertor_t* convertor_get_copy( convertor_t* pConvertor );
|
||||
int convertor_need_buffers( convertor_t* pConvertor );
|
||||
int convertor_pack( convertor_t* pConv, struct iovec* in, unsigned int in_size );
|
||||
int convertor_unpack( convertor_t* pConv, struct iovec* out, unsigned int out_size );
|
||||
int convertor_destroy( convertor_t** ppConv );
|
||||
int convertor_get_packed_size( convertor_t* pConv, unsigned int* pSize );
|
||||
int convertor_get_unpacked_size( convertor_t* pConv, unsigned int* pSize );
|
||||
|
||||
#endif /* DATATYPE_H_HAS_BEEN_INCLUDED */
|
||||
|
||||
enum {
|
||||
LAM_DATATYPE_PACK = 0,
|
||||
LAM_DATATYPE_UNPACK,
|
||||
LAM_DATATYPE_PACK_COMPLETE = 0,
|
||||
LAM_DATATYPE_PACK_INCOMPLETE,
|
||||
TYPE_PACK_INCOMPLETE_VECTOR,
|
||||
TYPE_PACK_INCOMPLETE_DATAVEC_REPEAT,
|
||||
TYPE_PACK_INCOMPLETE_DATAVEC_ELEMENT,
|
||||
TYPE_PACK_ERROR = -1
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Enumeration of datatype creation functions
|
||||
*/
|
||||
enum lam_datatype_kind_t {
|
||||
|
||||
LAM_DATATYPE_KIND_BASIC = 0,
|
||||
|
||||
LAM_DATATYPE_KIND_CONTIG,
|
||||
LAM_DATATYPE_KIND_DUP,
|
||||
LAM_DATATYPE_KIND_HINDEXED,
|
||||
LAM_DATATYPE_KIND_HVECTOR,
|
||||
LAM_DATATYPE_KIND_INDEXED,
|
||||
LAM_DATATYPE_KIND_LB,
|
||||
LAM_DATATYPE_KIND_PACKED,
|
||||
LAM_DATATYPE_KIND_STRUCT,
|
||||
LAM_DATATYPE_KIND_UB,
|
||||
LAM_DATATYPE_KIND_VECTOR,
|
||||
|
||||
LAM_DATATYPE_KIND_CONTIG_FORTRAN,
|
||||
LAM_DATATYPE_KIND_HINDEXED_FORTRAN,
|
||||
LAM_DATATYPE_KIND_HVECTOR_FORTRAN,
|
||||
LAM_DATATYPE_KIND_INDEXED_FORTRAN,
|
||||
LAM_DATATYPE_KIND_STRUCT_FORTRAN,
|
||||
LAM_DATATYPE_KIND_VECTOR_FORTRAN
|
||||
};
|
||||
|
||||
|
||||
typedef enum lam_datatype_state_t lam_datatype_state_t;
|
||||
typedef enum lam_datatype_kind_t lam_datatype_kind_t;
|
||||
|
||||
|
||||
/* types **************************************************************/
|
||||
|
||||
typedef struct lam_datatype_t lam_datatype_t;
|
||||
typedef struct lam_datavec_element_t lam_datavec_element_t;
|
||||
typedef struct lam_datavec_t lam_datavec_t;
|
||||
typedef struct lam_dataxdr_t lam_dataxdr_t;
|
||||
typedef struct lam_pack_state_t lam_pack_state_t;
|
||||
typedef struct lam_memcpy_state_t lam_memcpy_state_t;
|
||||
|
||||
/**
|
||||
* Function prototype for a generalized memcpy()
|
||||
*/
|
||||
typedef void *(lam_memcpy_fn_t) (void *restrict dst,
|
||||
const void *restrict src,
|
||||
size_t size, lam_memcpy_state_t *check);
|
||||
|
||||
|
||||
/**
|
||||
* Internal representation of MPI datatype
|
||||
*/
|
||||
struct lam_datatype_t {
|
||||
|
||||
lam_object_t super; /**< object super class */
|
||||
char name[MPI_MAX_OBJECT_NAME]; /**< object name */
|
||||
int flags; /**< bit flags */
|
||||
|
||||
/* Attributes */
|
||||
|
||||
lam_hash_table_t *keyhash;
|
||||
|
||||
/* cached information */
|
||||
|
||||
ssize_t lower_bound;
|
||||
size_t extent;
|
||||
size_t packed_size; /**< size in bytes, ignoring gaps */
|
||||
int nbasic; /**< number of basic elements */
|
||||
|
||||
/* optimized representation */
|
||||
|
||||
size_t datavec_size; /**< size of optimized representation */
|
||||
lam_datavec_t *datavec; /**< optimized representation (may be null) */
|
||||
|
||||
/* XDR representation */
|
||||
|
||||
size_t dataxdr_size; /**< size of XDR representation */
|
||||
lam_dataxdr_t *dataxdr; /**< XDR representation (may be null) */
|
||||
|
||||
/* full representation (c.f. MPI_Type_create_struct) */
|
||||
|
||||
struct {
|
||||
lam_datatype_kind_t c_kind; /**< creation function */
|
||||
int c_count; /**< number of blocks */
|
||||
int *c_blocklengths; /**< number of elements in each block */
|
||||
MPI_Aint *c_offset; /**< stride/displacement as appropriate */
|
||||
lam_datatype_t **c_types; /**< array of types (array) */
|
||||
} creator;
|
||||
};
|
||||
|
||||
OBJ_CLASS_DECLARATION(lam_datatype_t);
|
||||
|
||||
|
||||
/**
|
||||
* An optimized representation of noncontiguous data used by packing
|
||||
* routines
|
||||
*/
|
||||
struct lam_datavec_t {
|
||||
size_t nrepeat;
|
||||
ssize_t repeat_offset;
|
||||
size_t nelement;
|
||||
lam_datavec_element_t *element;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* An element of a data type in optimized form
|
||||
*/
|
||||
struct lam_datavec_element_t {
|
||||
size_t size; /**< size in bytes of element */
|
||||
ssize_t offset; /**< offset from start of data type */
|
||||
ssize_t seq_offset; /**< offset from start of packed data type */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* XDR representation of a datatype
|
||||
*/
|
||||
struct lam_dataxdr_element_t {
|
||||
/* to be done */
|
||||
void *xdrs; /**< XDR stream */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* State of incremental memcpy with checksum or CRC
|
||||
*/
|
||||
struct lam_memcpy_state_t {
|
||||
size_t size; /**< total size in bytes of the object being checksummed / CRCed */
|
||||
size_t partial_size; /**< size of non- uint32_t to be carried over to next call */
|
||||
uint32_t partial_int; /**< value of non- uint32_t to be carried over to next call */
|
||||
uint32_t sum; /**< current value of the CRC or checksum */
|
||||
bool first_call; /**< is this the first call for this checksum/CRC? */
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Pack state
|
||||
*
|
||||
* Structure to store the state of an incremental pack/unpack of a
|
||||
* datatype.
|
||||
*/
|
||||
struct lam_pack_state_t {
|
||||
size_t type_index; /**< current index of datatype */
|
||||
size_t repeat_index; /**< current index of datavec repeat */
|
||||
size_t element_index; /**< current index of datavec element */
|
||||
size_t datavec_offset; /**< current offset into datavec element */
|
||||
size_t packed_offset; /**< current offset into packed buffer */
|
||||
};
|
||||
|
||||
|
||||
/* interface **********************************************************/
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* Test 32-bit alignment of an address
|
||||
*
|
||||
* @param address An address
|
||||
* @return true if the address is 32-bit aligned
|
||||
*/
|
||||
static inline bool lam_aligned32(void *addr)
|
||||
{
|
||||
if (((uintptr_t) addr & (uintptr_t) 3) == (uintptr_t) 0) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test 64-bit alignment of an address
|
||||
*
|
||||
* @param address An address
|
||||
* @return true if the address is 64-bit aligned
|
||||
*/
|
||||
static inline bool lam_aligned64(void *addr)
|
||||
{
|
||||
if (((uintptr_t) addr & (uintptr_t) 7) == (uintptr_t) 0) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a 32-bit checksum of (the contents of) an array of data
|
||||
* types
|
||||
*
|
||||
* @param addr Data type array
|
||||
* @param count Size of array
|
||||
* @param datatype Datatype descriptor
|
||||
* @return Checksum
|
||||
*/
|
||||
uint32_t lam_datatype_sum32(const void *addr,
|
||||
size_t count,
|
||||
lam_datatype_t *datatype);
|
||||
|
||||
|
||||
/**
|
||||
* Copy (the contents of) an array of data types
|
||||
*
|
||||
* @param dst Output data type array
|
||||
* @param src Input data type array
|
||||
* @param count Size of array
|
||||
* @param datatype Datatype descriptor
|
||||
* @param check Pointer to checksum or CRC
|
||||
* @return 0 on success, -1 on error
|
||||
*/
|
||||
int lam_datatype_copy(void *dst,
|
||||
const void *src,
|
||||
size_t count,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *check);
|
||||
|
||||
/**
|
||||
* Copy (the contents of) an array of data types, and convert to
|
||||
* another datatype
|
||||
*
|
||||
* @param dst Output data type array
|
||||
* @param dst_count Size of output array
|
||||
* @param dst_datatype Output datatype descriptor
|
||||
* @param src Input data type array
|
||||
* @param src_count Size of input array
|
||||
* @param src_datatype Input datatype descriptor
|
||||
* @param checksum Checksum
|
||||
* @return 0 on success, -1 on error
|
||||
*/
|
||||
int lam_datatype_convert(void *dst,
|
||||
lam_datatype_t *dst_datatype,
|
||||
size_t dst_count,
|
||||
const void *src,
|
||||
lam_datatype_t *src_datatype,
|
||||
size_t src_count,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *check);
|
||||
|
||||
|
||||
/**
|
||||
* Initialize pack state structure
|
||||
*
|
||||
* @param state Pointer to state structure
|
||||
* @return LAM return code
|
||||
*/
|
||||
static inline int lam_pack_state_init(lam_pack_state_t *state)
|
||||
{
|
||||
assert(state);
|
||||
|
||||
state->type_index = 0;
|
||||
state->repeat_index = 0;
|
||||
state->element_index = 0;
|
||||
state->datavec_offset = 0;
|
||||
state->packed_offset = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Incrementally pack or unpack a buffer to/from an array of
|
||||
* datatypes.
|
||||
*
|
||||
* DO NOT USE THIS FUNCTION DIRECTLY: lam_datatype_pack or
|
||||
* lam_datatype_unpack instead.
|
||||
*
|
||||
* @param direction 0 for pack , non-zero for unpack
|
||||
* @param state current state of the incremental pack/unpack
|
||||
* @param typebuf array of types
|
||||
* @param ntype size of type array
|
||||
* @param buf buffer to pack into/unpack from
|
||||
* @param bufsize size of buffer
|
||||
* @param datatype type descriptor
|
||||
* @param memcpy_fn pointer to memcpy function
|
||||
* @param check pointer to checksum
|
||||
* @return 0 complete, non-zero otherwise
|
||||
*
|
||||
* Incrementally copy data type arrays to/from a packed buffer by
|
||||
* iterating over the type and type_map until we finish or run out of
|
||||
* room.
|
||||
*
|
||||
* The state (all members) should be initialized to 0 before the first
|
||||
* call.
|
||||
*/
|
||||
int lam_datatype_packer(lam_pack_state_t *state,
|
||||
void *buf,
|
||||
size_t bufsize,
|
||||
void *typebuf,
|
||||
size_t ntype,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *check,
|
||||
int pack_direction);
|
||||
|
||||
|
||||
/**
|
||||
* Incrementally pack a buffer from an array of datatypes.
|
||||
*
|
||||
* The arguments for this function are the same as for
|
||||
* lam_datatype_packer except that the last argument (pack_direction)
|
||||
* is not required.
|
||||
*/
|
||||
static inline int lam_datatype_pack(lam_pack_state_t *state,
|
||||
void *buf,
|
||||
size_t bufsize,
|
||||
const void *typebuf,
|
||||
size_t ntype,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *check)
|
||||
{
|
||||
return lam_datatype_packer(state, buf, bufsize, (void *) typebuf,
|
||||
ntype, datatype, memcpy_fn, check,
|
||||
LAM_DATATYPE_PACK);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Incrementally unpack a buffer to an array of datatypes.
|
||||
*
|
||||
* The arguments for this function are the same as for
|
||||
* lam_datatype_packer except that the last argument (pack_direction)
|
||||
* is not required.
|
||||
*/
|
||||
static inline int lam_datatype_unpack(lam_pack_state_t *state,
|
||||
const void *buf,
|
||||
size_t bufsize,
|
||||
void *typebuf,
|
||||
size_t ntype,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *check)
|
||||
{
|
||||
return lam_datatype_packer(state, (void *) buf, bufsize, typebuf,
|
||||
ntype, datatype, memcpy_fn, check,
|
||||
LAM_DATATYPE_UNPACK);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Incrementally generate an iovec for gathering from an array of
|
||||
* datatypes
|
||||
*
|
||||
* @param state current state of the incremental pack/unpack
|
||||
* @param base_addr base address for iovec offsets
|
||||
* @param vec iovec buffer
|
||||
* @param vec_count maximum length of iovec buffer
|
||||
* @param max_bytes maximum bytes addressed by iovec
|
||||
* @param buf buffer to pack into/unpack from
|
||||
* @param bufsize size of buffer
|
||||
* @param typebuf array of types
|
||||
* @param ntype size of type array
|
||||
* @param type type descriptor
|
||||
* @return 0 if complete, non-zero otherwise
|
||||
*
|
||||
* Incrementally traverse an array of datatypes and generate an iovec
|
||||
* of at most length vec_count and addressing at most max_bytes. This
|
||||
* can be used to do a (partial) RDMA gather of the datatype array.
|
||||
*
|
||||
* The state (all members) should be initialized to 0 before the first
|
||||
* call.
|
||||
*/
|
||||
int lam_datatype_gather_iovec(lam_pack_state_t *state,
|
||||
void *base_addr,
|
||||
struct iovec *vec,
|
||||
size_t vec_count,
|
||||
size_t max_bytes,
|
||||
const void *typebuf,
|
||||
size_t ntype,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *);
|
||||
|
||||
/**
|
||||
* Incrementally generate an iovec for scattering from a packed array
|
||||
* of datatypes
|
||||
*
|
||||
* @param state current state of the incremental pack/unpack
|
||||
* @param base_addr base address for iovec offsets
|
||||
* @param vec iovec buffer
|
||||
* @param vec_count maximum length of iovec buffer
|
||||
* @param max_bytes maximum bytes addressed by iovec
|
||||
* @param buf packed buffer
|
||||
* @param bufsize size of buffer
|
||||
* @param typebuf array of types
|
||||
* @param ntype size of type array
|
||||
* @param type type descriptor
|
||||
* @return 0 if complete, non-zero otherwise
|
||||
*
|
||||
* Incrementally copy data type arrays to/from a packed buffer. by
|
||||
* iterating over the type and type_map until we finish or run out of
|
||||
* room.
|
||||
*
|
||||
* Incrementally traverse a packed array of datatypes and generate an
|
||||
* iovec of at most length vec_count and addressing at most max_bytes.
|
||||
* This can be used to do a (partial) RDMA scatter of the datatype
|
||||
* array.
|
||||
*
|
||||
* The state (all members) should be initialized to 0 before the first
|
||||
* call.
|
||||
*/
|
||||
int lam_datatype_scatter_iovec(lam_pack_state_t *state,
|
||||
void *base_addr,
|
||||
struct iovec *vec,
|
||||
size_t vec_count,
|
||||
size_t max_bytes,
|
||||
const void *buf,
|
||||
size_t bufsize,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *check);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* incremental memcpy with checksum / CRC functions
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* initialize the state for an incremental memcpy with checksum / CRC
|
||||
*
|
||||
* @param state pointer to state object for the current sequence of copies
|
||||
* @param sum_size the length of the entire buffer to be checksummed
|
||||
*/
|
||||
static inline void
|
||||
lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size)
|
||||
{
|
||||
state->size = sum_size;
|
||||
state->first_call = true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Copy data from one buffer to another
|
||||
*
|
||||
* @param dst pointer to the destination buffer
|
||||
* @param src pointer to the source buffer
|
||||
* @param size size of the buffer
|
||||
* @param check unused
|
||||
* @return the original value of dst
|
||||
*/
|
||||
static inline void *lam_memcpy(void *dst, const void *src, size_t size,
|
||||
void *check)
|
||||
{
|
||||
return memcpy(dst, src, size);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* An alternative version of memcpy that may out-perform the system
|
||||
* version on some (silly) systems.
|
||||
*
|
||||
* @param dst pointer to the destination buffer
|
||||
* @param src pointer to the source buffer
|
||||
* @param size size of the buffer
|
||||
* @param state unused
|
||||
* @return the original value of dst
|
||||
*/
|
||||
void *lam_memcpy_alt(void *dst, const void *src, size_t size,
|
||||
lam_memcpy_state_t *state);
|
||||
|
||||
|
||||
/**
|
||||
* Generate a 32-bit CRC for a buffer
|
||||
*
|
||||
* @param buffer Data buffer
|
||||
* @param size Size of buffer
|
||||
* @param initial_crc Initial value of the CRC register
|
||||
* @return The CRC
|
||||
*
|
||||
* Generate a 32-bit for a data buffer starting from a given CRC
|
||||
* value.
|
||||
*/
|
||||
uint32_t lam_crc32(const void *buffer, size_t size,
|
||||
uint32_t initial_crc);
|
||||
|
||||
|
||||
/**
|
||||
* Generate a 32-bit checksum for a buffer
|
||||
*
|
||||
* @param buffer Data buffer
|
||||
* @param size Size of buffer
|
||||
* @return The CRC
|
||||
*
|
||||
* Generate a 32-bit for a data buffer starting from a given CRC
|
||||
* value.
|
||||
*/
|
||||
uint32_t lam_sum32(const void *buffer, size_t size);
|
||||
|
||||
|
||||
/**
|
||||
* Copy data from one buffer to another and calculate a 32-bit CRC
|
||||
*
|
||||
* @param dst pointer to the destination buffer
|
||||
* @param src pointer to the source buffer
|
||||
* @param size size of the buffer
|
||||
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||
* @return the original value of dst
|
||||
*
|
||||
* This handles cumulative CRCs for for arbitrary lengths and address
|
||||
* alignments as best as it can. The initial contents of state->sum is
|
||||
* used as the starting value of the CRC. The final CRC is placed
|
||||
* back in state->sum.
|
||||
*/
|
||||
void *lam_memcpy_crc32(void *dst,
|
||||
const void *src,
|
||||
size_t size,
|
||||
lam_memcpy_state_t *check);
|
||||
|
||||
|
||||
/**
|
||||
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||
*
|
||||
* @param dst pointer to the destination buffer
|
||||
* @param src pointer to the source buffer
|
||||
* @param size size of the buffer
|
||||
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||
* @return the original value of dst
|
||||
*
|
||||
* This handles cumulative checksumming for arbitrary lengths and
|
||||
* address alignments as best as it can; the contents of
|
||||
* lastPartialLong and lastPartialLength are updated to reflected the
|
||||
* last partial word's value and length (in bytes) -- this should
|
||||
* allow proper handling of checksumming contiguous or noncontiguous
|
||||
* buffers via multiple calls of bcopy_csum() - Mitch
|
||||
*/
|
||||
void *lam_memcpy_sum32(void *dst,
|
||||
const void *src,
|
||||
size_t size,
|
||||
lam_memcpy_state_t *check);
|
||||
|
||||
|
||||
/**
|
||||
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||
*
|
||||
* @param dst pointer to the destination buffer
|
||||
* @param src pointer to the source buffer
|
||||
* @param size size of the buffer
|
||||
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||
* @return the original value of dst
|
||||
*/
|
||||
void *lam_memcpy_sum64(void *dst,
|
||||
const void *src,
|
||||
size_t size,
|
||||
lam_memcpy_state_t *check);
|
||||
|
||||
|
||||
/**
|
||||
* Create a LAM/MPI datatype
|
||||
*
|
||||
* @param combiner integer identifying the kind of MPI create function
|
||||
* @param ninteger number of integers passed to the create function
|
||||
* @param integer array of integers passed to the create function
|
||||
* @param naddress number of addresses passed to the create function
|
||||
* @param address array of addresses passed to the create function
|
||||
* @param ntype number of data types passed to the create function
|
||||
* @param type array of data types passed to the create function
|
||||
* @param newtype pointer to address of new type
|
||||
* @return LAM_SUCCESS on successful creation, LAM_ERROR otherwise
|
||||
*
|
||||
* This is the central location for creation of data types in LAM/MPI.
|
||||
* All MPI_Type_create functions rely upon this to do the actual type
|
||||
* creation.
|
||||
*/
|
||||
int lam_datatype_create(int combiner,
|
||||
int nintegers,
|
||||
int integers[],
|
||||
int naddresses,
|
||||
ssize_t addresses[],
|
||||
int ntypes,
|
||||
lam_datatype_t *types[], lam_datatype_t **newtype);
|
||||
|
||||
|
||||
/**
|
||||
* Delete a LAM/MPI datatype (actually, just mark it for deletion)
|
||||
*
|
||||
* @param type datatype
|
||||
* @return LAM_SUCCESS on success, LAM_ERROR otherwise
|
||||
*
|
||||
* This is the central location for creation of data types in LAM/MPI.
|
||||
* All MPI_Type_create functions rely upon this to do the actual type
|
||||
* creation.
|
||||
*/
|
||||
int lam_datatype_delete(lam_datatype_t *type);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* LAM_DATATYPE_H_INCLUDED */
|
||||
|
79
src/datatype/datatype_internal.h
Обычный файл
79
src/datatype/datatype_internal.h
Обычный файл
@ -0,0 +1,79 @@
|
||||
#ifndef DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED
|
||||
#define DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#if defined(VERBOSE)
|
||||
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) \
|
||||
dump_stack( (PSTACK), (STACK_POS), (PDESC), (NAME) )
|
||||
# if defined(__GNUC__)
|
||||
# define DUMP(ARGS...) printf(ARGS)
|
||||
# else
|
||||
# if defined(ACCEPT_C99)
|
||||
# define DUMP( ARGS... ) printf(__VA_ARGS__)
|
||||
# else
|
||||
# define DUMP printf
|
||||
# endif /* ACCEPT_C99 */
|
||||
# endif /* __GNUC__ */
|
||||
#else
|
||||
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME )
|
||||
# if defined(__GNUC__)
|
||||
# define DUMP(ARGS...)
|
||||
# else
|
||||
# if defined(ACCEPT_C99)
|
||||
# define DUMP(ARGS...)
|
||||
# else
|
||||
static void DUMP() { /* empty hopefully removed by the compiler */ }
|
||||
# endif /* ACCEPT_C99 */
|
||||
# endif /* __GNUC__ */
|
||||
#endif /* VERBOSE */
|
||||
|
||||
extern void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name );
|
||||
#define SAVE_STACK( PSTACK, INDEX, COUNT, DISP, END_LOOP) \
|
||||
do { \
|
||||
(PSTACK)->index = (INDEX); \
|
||||
(PSTACK)->count = (COUNT); \
|
||||
(PSTACK)->disp = (DISP); \
|
||||
(PSTACK)->end_loop = (END_LOOP); \
|
||||
} while(0)
|
||||
|
||||
#define PUSH_STACK( PSTACK, STACK_POS, INDEX, COUNT, DISP, END_LOOP) \
|
||||
do { \
|
||||
dt_stack_t* pTempStack = (PSTACK) + 1; \
|
||||
SAVE_STACK( pTempStack, (INDEX), (COUNT), (DISP), (END_LOOP) ); \
|
||||
(STACK_POS)++; \
|
||||
(PSTACK) = pTempStack; \
|
||||
} while(0)
|
||||
|
||||
#define MEMCPY( DST, SRC, BLENGTH ) memcpy( (DST), (SRC), (BLENGTH) )
|
||||
|
||||
#ifdef USELESS
|
||||
#define MEMCPY_LIMIT 1
|
||||
|
||||
#define MEMCPY( DST, SRC, BLENGTH ) \
|
||||
do { \
|
||||
if( (BLENGTH) < (MEMCPY_LIMIT) ) { \
|
||||
long mask = sizeof(int) - 1; \
|
||||
char *dst = (char*)(DST), *src = (char*)(SRC); \
|
||||
int i; \
|
||||
if( ((long)(DST) & mask) == ((long)(SRC) & mask) ) { \
|
||||
int *idst = (int*)((long)(DST) & (~mask)); \
|
||||
int *isrc = (int*)((long)(SRC) & (~mask)); \
|
||||
for( i = 0; i < ((long)(DST) & mask); i++ ) { \
|
||||
*dst = *src; dst++; src++; \
|
||||
} \
|
||||
if( ((char*)idst) != dst ) { \
|
||||
idst++; isrc++; \
|
||||
} \
|
||||
for( i = 0; i < ((BLENGTH) >> 2); i++ ) { \
|
||||
*idst = *isrc; idst++; isrc++; \
|
||||
} \
|
||||
} else { \
|
||||
for( i = 0; i < (BLENGTH); i++ ) { \
|
||||
*dst = *src; dst++; src++; \
|
||||
} \
|
||||
} \
|
||||
} else \
|
||||
memcpy( (DST), (SRC), (BLENGTH) ); \
|
||||
} while(0)
|
||||
#endif /* USELESS */
|
||||
|
||||
#endif /* DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */
|
603
src/datatype/ddt_test.c
Обычный файл
603
src/datatype/ddt_test.c
Обычный файл
@ -0,0 +1,603 @@
|
||||
#include "ddt.h"
|
||||
#include <time.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
|
||||
#define TIMER_DATA_TYPE struct timeval
|
||||
#define GET_TIME(TV) gettimeofday( &(TV), NULL )
|
||||
#define ELAPSED_TIME(TSTART, TEND) (((TEND).tv_sec - (TSTART).tv_sec) * 1000000 + ((TEND).tv_usec - (TSTART).tv_usec))
|
||||
|
||||
int mpich_typeub( void )
|
||||
{
|
||||
int errs = 0;
|
||||
long extent, lb, extent1, extent2, extent3;
|
||||
long displ[2];
|
||||
int blens[2];
|
||||
dt_desc_t *type1, *type2, *type3, *types[2];
|
||||
|
||||
dt_create_vector( 2, 1, 4, &(basicDatatypes[DT_INT]), &type1 );
|
||||
dt_commit( &type1 );
|
||||
dt_get_extent( type1, &lb, &extent );
|
||||
extent1 = 5 * sizeof(int);
|
||||
if (extent != extent1) {
|
||||
printf("EXTENT 1 %ld != %ld\n",extent,extent1);
|
||||
errs++;
|
||||
printf("extent(type1)=%ld\n",(long)extent);
|
||||
}
|
||||
|
||||
blens[0]=1;
|
||||
blens[1]=1;
|
||||
displ[0]=0;
|
||||
displ[1]=sizeof(int)*4;
|
||||
types[0]=type1;
|
||||
types[1]=&(basicDatatypes[DT_UB]);
|
||||
extent2 = displ[1];
|
||||
|
||||
/* using MPI_UB and Type_struct, monkey with the extent, making it 16
|
||||
*/
|
||||
dt_create_struct( 2, blens, displ, types, &type2 );
|
||||
dt_commit( &type2 );
|
||||
dt_get_extent( type2, &lb, &extent );
|
||||
if (extent != extent2) {
|
||||
printf("EXTENT 2 %ld != %ld\n",extent,extent2);
|
||||
errs++;
|
||||
printf("extent(type2)=%ld\n",(long)extent);
|
||||
}
|
||||
|
||||
/* monkey with the extent again, making it 4
|
||||
* ===> MPICH gives 4
|
||||
* ===> MPIF gives 16, the old extent
|
||||
*/
|
||||
displ[1]=sizeof(int);
|
||||
types[0]=type2;
|
||||
types[1]=&(basicDatatypes[DT_UB]);
|
||||
extent3 = extent2;
|
||||
|
||||
dt_create_struct( 2, blens, displ, types, &type3 );
|
||||
dt_commit( &type3 );
|
||||
|
||||
dt_get_extent( type3, &lb, &extent );
|
||||
if (extent != extent3) {
|
||||
printf("EXTENT 3 %ld != %ld\n",extent,extent3);
|
||||
errs++;
|
||||
printf("extent(type3)=%ld\n",(long)extent);
|
||||
}
|
||||
|
||||
dt_free( &type1 );
|
||||
dt_free( &type2 );
|
||||
dt_free( &type3 );
|
||||
return errs;
|
||||
}
|
||||
|
||||
int mpich_typeub2( void )
|
||||
{
|
||||
int blocklen[3], err = 0, sz1, sz2, sz3;
|
||||
long disp[3], lb, ub, ex1, ex2, ex3;
|
||||
dt_desc_t *types[3], *dt1, *dt2, *dt3;
|
||||
|
||||
blocklen[0] = 1;
|
||||
blocklen[1] = 1;
|
||||
blocklen[2] = 1;
|
||||
disp[0] = -3;
|
||||
disp[1] = 0;
|
||||
disp[2] = 6;
|
||||
types[0] = &(basicDatatypes[DT_LB]);
|
||||
types[1] = &(basicDatatypes[DT_INT]);
|
||||
types[2] = &(basicDatatypes[DT_UB]);
|
||||
|
||||
dt_create_struct(3,blocklen,disp, types,&dt1);
|
||||
dt_commit(&dt1);
|
||||
|
||||
dt_type_lb(dt1, &lb); dt_type_ub(dt1, &ub);
|
||||
dt_type_extent(dt1,&ex1); dt_type_size(dt1,&sz1);
|
||||
|
||||
/* Values should be lb = -3, ub = 6 extent 9; size depends on implementation */
|
||||
if (lb != -3 || ub != 6 || ex1 != 9) {
|
||||
printf("Example 3.26 type1 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex1, sz1);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf("Example 3.26 type1 correct\n" );
|
||||
|
||||
dt_create_contiguous(2,dt1,&dt2);
|
||||
dt_type_lb(dt2, &lb); dt_type_ub(dt2, &ub);
|
||||
dt_type_extent(dt2,&ex2); dt_type_size(dt2,&sz2);
|
||||
/* Values should be lb = -3, ub = 15, extent = 18, size depends on implementation */
|
||||
if (lb != -3 || ub != 15 || ex2 != 18) {
|
||||
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8);
|
||||
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex2, sz2);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf("Example 3.26 type1 correct\n" );
|
||||
|
||||
dt_create_contiguous(2,dt1,&dt2);
|
||||
dt_type_lb(dt2, &lb); dt_type_ub(dt2, &ub);
|
||||
dt_type_extent(dt2,&ex2); dt_type_size(dt2,&sz2);
|
||||
/* Values should be lb = -3, ub = 15, extent = 18, size depends on implementation */
|
||||
if (lb != -3 || ub != 15 || ex2 != 18) {
|
||||
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8);
|
||||
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex2, sz2);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf( "Example 3.26 type2 correct\n" );
|
||||
|
||||
types[0]=dt1; types[1]=dt1;
|
||||
blocklen[0]=1; blocklen[1]=1;
|
||||
disp[0]=0; disp[1]=ex1;
|
||||
|
||||
dt_create_struct(2, blocklen, disp, types, &dt3);
|
||||
dt_commit(&dt3);
|
||||
|
||||
dt_type_lb(dt3, &lb); dt_type_ub(dt3, &ub);
|
||||
dt_type_extent(dt3,&ex3); dt_type_size(dt3,&sz3);
|
||||
/* Another way to express type2 */
|
||||
if (lb != -3 || ub != 15 || ex3 != 18) {
|
||||
printf("type3 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8);
|
||||
printf("type3 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex3, sz2);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf( "type3 correct\n" );
|
||||
|
||||
dt_free( &dt1 );
|
||||
dt_free( &dt2 );
|
||||
dt_free( &dt3 );
|
||||
return err;
|
||||
}
|
||||
|
||||
int mpich_typeub3( void )
|
||||
{
|
||||
int blocklen[2], sz, err = 0, idisp[3];
|
||||
long disp[3], lb, ub, ex;
|
||||
dt_desc_t *types[3], *dt1, *dt2, *dt3, *dt4, *dt5;
|
||||
|
||||
/* Create a datatype with explicit LB and UB */
|
||||
blocklen[0] = 1;
|
||||
blocklen[1] = 1;
|
||||
blocklen[2] = 1;
|
||||
disp[0] = -3;
|
||||
disp[1] = 0;
|
||||
disp[2] = 6;
|
||||
types[0] = &(basicDatatypes[DT_LB]);
|
||||
types[1] = &(basicDatatypes[DT_INT]);
|
||||
types[2] = &(basicDatatypes[DT_UB]);
|
||||
|
||||
/* Generate samples for contiguous, hindexed, hvector, indexed, and vector (struct and contiguous tested in typeub2) */
|
||||
dt_create_struct(3,blocklen,disp, types,&dt1);
|
||||
dt_commit(&dt1);
|
||||
|
||||
/* This type is the same as in typeub2, and is tested there */
|
||||
|
||||
types[0]=dt1; types[1]=dt1;
|
||||
blocklen[0]=1; blocklen[1]=1;
|
||||
disp[0]=-4; disp[1]=7;
|
||||
idisp[0]=-4; idisp[1]=7;
|
||||
|
||||
dt_create_hindexed( 2, blocklen, disp, dt1, &dt2 );
|
||||
dt_commit( &dt2 );
|
||||
|
||||
dt_type_lb( dt2, &lb ); dt_type_ub( dt2, &ub );
|
||||
dt_type_extent( dt2, &ex ); dt_type_size( dt2, &sz );
|
||||
|
||||
if (lb != -7 || ub != 13 || ex != 20) {
|
||||
printf("hindexed lb %d ub %d extent %d size %d\n", (int)-7, (int)13, (int)20, sz);
|
||||
printf("hindexed lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf( "hindexed ok\n" );
|
||||
|
||||
dt_create_indexed( 2, blocklen, idisp, dt1, &dt3 );
|
||||
dt_commit( &dt3 );
|
||||
|
||||
dt_type_lb( dt3, &lb ); dt_type_ub( dt3, &ub );
|
||||
dt_type_extent( dt3, &ex ); dt_type_size( dt3, &sz );
|
||||
|
||||
if (lb != -39 || ub != 69 || ex != 108) {
|
||||
printf("indexed lb %d ub %d extent %d size %d\n", (int)-39, (int)69, (int)108, sz);
|
||||
printf("indexed lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf( "indexed ok\n" );
|
||||
|
||||
dt_create_hvector( 2, 1, 14, dt1, &dt4 );
|
||||
dt_commit( &dt4 );
|
||||
|
||||
dt_type_lb( dt4, &lb ); dt_type_ub( dt4, &ub );
|
||||
dt_type_extent( dt4, &ex ); dt_type_size( dt4, &sz );
|
||||
|
||||
if (lb != -3 || ub != 20 || ex != 23) {
|
||||
printf("hvector lb %d ub %d extent %d size %d\n", (int)-3, (int)20, (int)23, sz);
|
||||
printf("hvector lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf( "hvector ok\n" );
|
||||
|
||||
dt_create_vector( 2, 1, 14, dt1, &dt5 );
|
||||
dt_commit( &dt5 );
|
||||
|
||||
dt_type_lb( dt5, &lb ); dt_type_ub( dt5, &ub );
|
||||
dt_type_extent( dt5, &ex ); dt_type_size( dt5, &sz );
|
||||
|
||||
|
||||
if (lb != -3 || ub != 132 || ex != 135) {
|
||||
printf("vector lb %d ub %d extent %d size %d\n", (int)-3, (int)132, (int)135, sz);
|
||||
printf("vector lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
|
||||
err++;
|
||||
}
|
||||
else
|
||||
printf( "vector ok\n" );
|
||||
|
||||
dt_free( &dt1 );
|
||||
dt_free( &dt2 );
|
||||
dt_free( &dt3 );
|
||||
dt_free( &dt4 );
|
||||
dt_free( &dt5 );
|
||||
return err;
|
||||
}
|
||||
|
||||
void print_double_mat( size_t N, double* mat )
|
||||
{
|
||||
int i, j;
|
||||
double* pMat;
|
||||
|
||||
for( i = 0; i < N; i++ ) {
|
||||
printf( "(%4d) :", i * N * sizeof(double) );
|
||||
pMat = mat + i * N;
|
||||
for( j = 0; j < N; j++ ) {
|
||||
printf( "%5.1f ", *pMat );
|
||||
pMat++;
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
}
|
||||
|
||||
int init_random_upper_matrix( size_t N, double* mat )
|
||||
{
|
||||
int i, j;
|
||||
|
||||
srand( time(NULL) );
|
||||
for( i = 0; i < N; i++ )
|
||||
for( j = i; j < N; j++ ) {
|
||||
*mat = (double)random();
|
||||
mat++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int check_diag_matrix( size_t N, double* mat1, double* mat2 )
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for( i = 0; i < N; i++ ) {
|
||||
mat1 += i;
|
||||
mat2 += i;
|
||||
for( j = i; j < N; j++ ) {
|
||||
if( *mat1 != *mat2 ) {
|
||||
printf( "error in position (%d, %d) expect %f and find %f\n",
|
||||
i, j, *mat1, *mat2 );
|
||||
return -1;
|
||||
}
|
||||
mat1++; mat2++;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
dt_desc_t* upper_matrix( size_t mat_size )
|
||||
{
|
||||
int *disp, i;
|
||||
size_t *blocklen;
|
||||
dt_desc_t* upper;
|
||||
|
||||
disp = (int*)malloc( sizeof(int) * mat_size );
|
||||
blocklen = (size_t*)malloc( sizeof(size_t) * mat_size );
|
||||
|
||||
for( i = 0; i < mat_size; i++ ) {
|
||||
disp[i] = i * mat_size + i;
|
||||
blocklen[i] = mat_size - i;
|
||||
}
|
||||
|
||||
dt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
|
||||
&upper );
|
||||
free( disp );
|
||||
free( blocklen );
|
||||
return upper;
|
||||
}
|
||||
|
||||
dt_desc_t* lower_matrix( size_t mat_size )
|
||||
{
|
||||
int *disp, i;
|
||||
size_t *blocklen;
|
||||
dt_desc_t* upper;
|
||||
|
||||
disp = (int*)malloc( sizeof(int) * mat_size );
|
||||
blocklen = (size_t*)malloc( sizeof(size_t) * mat_size );
|
||||
|
||||
for( i = 0; i < mat_size; i++ ) {
|
||||
disp[i] = i * mat_size;
|
||||
blocklen[i] = i;
|
||||
}
|
||||
|
||||
dt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
|
||||
&upper );
|
||||
free( disp );
|
||||
free( blocklen );
|
||||
return upper;
|
||||
}
|
||||
|
||||
extern long conversion_elapsed;
|
||||
|
||||
int test_upper( size_t length )
|
||||
{
|
||||
double *mat1, *mat2, *inbuf;
|
||||
dt_desc_t *pdt, *pdt1;
|
||||
convertor_t * pConv;
|
||||
char *ptr;
|
||||
int i, j, split_chunk, total_length, rc;
|
||||
struct iovec a;
|
||||
TIMER_DATA_TYPE start, end;
|
||||
long total_time;
|
||||
|
||||
printf( "test upper matrix\n" );
|
||||
pdt = upper_matrix( length );
|
||||
pdt1 = lower_matrix( length );
|
||||
/*dt_dump( pdt );*/
|
||||
|
||||
mat1 = malloc( length * length * sizeof(double) );
|
||||
init_random_upper_matrix( length, mat1 );
|
||||
mat2 = calloc( length * length, sizeof(double) );
|
||||
|
||||
total_length = length * (length + 1) / 2 * sizeof(double);
|
||||
inbuf = (double*)malloc( total_length );
|
||||
ptr = (char*)inbuf;
|
||||
/* copy upper matrix in the array simulating the input buffer */
|
||||
for( i = 0; i < length; i++ )
|
||||
for( j = i; j < length; j++ ) {
|
||||
*inbuf = mat1[i * length + j];
|
||||
inbuf++;
|
||||
}
|
||||
inbuf = (double*)ptr;
|
||||
pConv = convertor_create( 0, 0 );
|
||||
convertor_init_for_recv( pConv, 0, pdt, 1, mat2 );
|
||||
|
||||
/* test the automatic destruction pf the data */
|
||||
dt_destroy( &pdt );
|
||||
dt_destroy( &pdt1 );
|
||||
|
||||
GET_TIME( start );
|
||||
split_chunk = (length + 1) * sizeof(double);
|
||||
/* split_chunk = (total_length + 1) * sizeof(double); */
|
||||
for( i = total_length; i > 0; ) {
|
||||
if( i < split_chunk ) split_chunk = i;
|
||||
a.iov_base = ptr;
|
||||
a.iov_len = split_chunk;
|
||||
convertor_unpack( pConv, &a, 1 );
|
||||
ptr += split_chunk;
|
||||
i -= split_chunk;
|
||||
}
|
||||
GET_TIME( end );
|
||||
total_time = ELAPSED_TIME( start, end );
|
||||
printf( "complete unpacking in %ld microsec\n", total_time );
|
||||
/* printf( "conversion done in %ld microsec\n", conversion_elapsed ); */
|
||||
/* printf( "stack management in %ld microsec\n", total_time - conversion_elapsed ); */
|
||||
free( inbuf );
|
||||
rc = check_diag_matrix( length, mat1, mat2 );
|
||||
free( mat1 );
|
||||
free( mat2 );
|
||||
return rc;
|
||||
}
|
||||
|
||||
dt_desc_t* test_matrix_borders( unsigned int size, unsigned int width )
|
||||
{
|
||||
dt_desc_t *pdt, *pdt_line;
|
||||
int disp[2];
|
||||
size_t blocklen[2];
|
||||
|
||||
disp[0] = 0;
|
||||
blocklen[0] = width;
|
||||
disp[1] = (size - width) * sizeof(double);
|
||||
blocklen[1] = width;
|
||||
|
||||
dt_create_indexed( 2, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
|
||||
&pdt_line );
|
||||
dt_create_contiguous( size, pdt_line, &pdt );
|
||||
dt_destroy( &pdt_line );
|
||||
return pdt;
|
||||
}
|
||||
|
||||
dt_desc_t* test_contiguous( void )
|
||||
{
|
||||
dt_desc_t *pdt, *pdt1, *pdt2;
|
||||
|
||||
printf( "test contiguous (alignement)\n" );
|
||||
pdt1 = dt_create( -1 );
|
||||
dt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 );
|
||||
dt_dump( pdt1 );
|
||||
dt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 );
|
||||
dt_dump( pdt1 );
|
||||
dt_create_contiguous( 4, pdt1, &pdt2 );
|
||||
dt_destroy( &pdt1 );
|
||||
dt_dump( pdt2 );
|
||||
dt_create_contiguous( 2, pdt2, &pdt );
|
||||
dt_destroy( &pdt2 );
|
||||
dt_dump( pdt );
|
||||
dt_dump_complete( pdt );
|
||||
return pdt;
|
||||
}
|
||||
|
||||
dt_desc_t* test_struct( void )
|
||||
{
|
||||
dt_desc_t* types[] = { &(basicDatatypes[DT_FLOAT]),
|
||||
NULL,
|
||||
&(basicDatatypes[DT_CHAR]) };
|
||||
int lengths[] = { 2, 1, 3 };
|
||||
long disp[] = { 0, 16, 26 };
|
||||
dt_desc_t* pdt, *pdt1;
|
||||
|
||||
printf( "test struct\n" );
|
||||
pdt1 = dt_create( -1 );
|
||||
dt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 );
|
||||
dt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 );
|
||||
dt_dump_complete( pdt1 );
|
||||
|
||||
types[1] = pdt1;
|
||||
|
||||
dt_create_struct( 3, lengths, disp, types, &pdt );
|
||||
dt_destroy( &pdt1 );
|
||||
dt_dump_complete( pdt );
|
||||
return pdt;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int i1;
|
||||
int gap;
|
||||
int i2;
|
||||
} sdata_intern;
|
||||
|
||||
typedef struct {
|
||||
int counter;
|
||||
sdata_intern v[10];
|
||||
int last;
|
||||
} sstrange;
|
||||
|
||||
#define SSTRANGE_CNT 10
|
||||
#define USE_RESIZED
|
||||
|
||||
dt_desc_t* create_strange_dt( void )
|
||||
{
|
||||
sdata_intern v[2];
|
||||
long displ[3];
|
||||
dt_desc_t* types[3] = { &(basicDatatypes[DT_INT]) };
|
||||
sstrange t[2];
|
||||
int pBlock[3] = {1, 10, 1}, dispi[3];
|
||||
dt_desc_t *pdt, *pdt1, *pdt2, *pdtTemp;
|
||||
|
||||
dispi[0] = (int)((char*)&(v[0].i1) - (char*)&(v[0])); /* 0 */
|
||||
dispi[1] = (int)(((char*)(&(v[0].i2)) - (char*)&(v[0])) / sizeof(int)); /* 2 */
|
||||
dt_create_indexed_block( 2, 1, dispi, &(basicDatatypes[DT_INT]), &pdtTemp );
|
||||
#ifdef USE_RESIZED
|
||||
/* optional */
|
||||
displ[0] = 0;
|
||||
displ[1] = (char*)&(v[1]) - (char*)&(v[0]);
|
||||
dt_create_resized( pdtTemp, displ[0], displ[1], &pdt1 );
|
||||
dt_destroy( &pdtTemp );
|
||||
#else
|
||||
pdt1 = pdtTemp;
|
||||
#endif /* USE_RESIZED */
|
||||
|
||||
types[1] = pdt1;
|
||||
types[2] = &(basicDatatypes[DT_INT]);
|
||||
displ[0] = 0;
|
||||
displ[1] = (long)((char*)&(t[0].v[0]) - (char*)&(t[0]));
|
||||
displ[2] = (long)((char*)&(t[0].last) - (char*)&(t[0]));
|
||||
dt_create_struct( 3, pBlock, displ, types, &pdtTemp );
|
||||
#ifdef USE_RESIZED
|
||||
/* optional */
|
||||
displ[1] = (char*)&(t[1]) - (char*)&(t[0]);
|
||||
dt_create_resized( pdtTemp, displ[0], displ[1], &pdt2 );
|
||||
dt_destroy( &pdtTemp );
|
||||
#else
|
||||
pdt2 = pdtTemp;
|
||||
#endif /* USE_RESIZED */
|
||||
|
||||
dt_create_contiguous( SSTRANGE_CNT, pdt2, &pdt );
|
||||
|
||||
dt_destroy( &pdt1 );
|
||||
dt_destroy( &pdt2 );
|
||||
dt_dump( pdt );
|
||||
{
|
||||
dt_elem_desc_t* pElemDesc;
|
||||
dt_optimize_short( pdt, 1, &pElemDesc );
|
||||
|
||||
free( pElemDesc );
|
||||
}
|
||||
return pdt;
|
||||
}
|
||||
|
||||
int local_copy_ddt_count( dt_desc_t* pdt, int count )
|
||||
{
|
||||
long extent;
|
||||
void *pdst, *psrc;
|
||||
dt_type_extent( pdt, &extent );
|
||||
|
||||
pdst = malloc( extent * count );
|
||||
psrc = malloc( extent * count );
|
||||
|
||||
pdt = create_strange_dt();
|
||||
|
||||
//dt_copy_content_same_dt( pdt, count, pdst, psrc );
|
||||
|
||||
free(pdst );
|
||||
free( psrc );
|
||||
return 0;
|
||||
}
|
||||
int main( int argc, char* argv[] )
|
||||
{
|
||||
dt_desc_t *pdt, *pdt1, *pdt2, *pdt3;
|
||||
int rc, length = 500;
|
||||
|
||||
pdt = create_strange_dt();
|
||||
return 0;
|
||||
/*
|
||||
local_copy_ddt_count(pdt, 10);
|
||||
dt_destroy( &pdt );
|
||||
*/
|
||||
pdt = upper_matrix(100);
|
||||
local_copy_ddt_count(pdt, 1);
|
||||
dt_destroy( &pdt );
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
|
||||
mpich_typeub();
|
||||
mpich_typeub2();
|
||||
mpich_typeub3();
|
||||
|
||||
rc = test_upper( length );
|
||||
if( rc == 0 )
|
||||
printf( "decode [PASSED]\n" );
|
||||
else
|
||||
printf( "decode [NOT PASSED]\n" );
|
||||
|
||||
pdt = test_matrix_borders( length, 100 );
|
||||
dt_dump( pdt );
|
||||
dt_free( &pdt );
|
||||
|
||||
printf( ">>--------------------------------------------<<\n" );
|
||||
pdt = test_contiguous();
|
||||
dt_destroy( &pdt );
|
||||
printf( ">>--------------------------------------------<<\n" );
|
||||
pdt = test_struct();
|
||||
dt_destroy( &pdt );
|
||||
printf( ">>--------------------------------------------<<\n" );
|
||||
|
||||
pdt1 = dt_create( -1 );
|
||||
pdt2 = dt_create( -1 );
|
||||
pdt3 = dt_create( -1 );
|
||||
dt_add( pdt3, &(basicDatatypes[DT_INT]), 10, 0, -1 );
|
||||
dt_add( pdt3, &(basicDatatypes[DT_FLOAT]), 5, 10 * sizeof(int), -1 );
|
||||
|
||||
dt_add( pdt2, &(basicDatatypes[DT_INT]), 1, 0, -1 );
|
||||
dt_add( pdt2, pdt3, 3, sizeof(int) * 1, -1 );
|
||||
|
||||
dt_add( pdt1, &(basicDatatypes[DT_LONG_LONG]), 5, 0, -1 );
|
||||
dt_add( pdt1, &(basicDatatypes[DT_LONG_DOUBLE]), 2, sizeof(long long) * 5, -1 );
|
||||
|
||||
printf( ">>--------------------------------------------<<\n" );
|
||||
dt_dump( pdt1 );
|
||||
printf( ">>--------------------------------------------<<\n" );
|
||||
dt_dump( pdt2 );
|
||||
printf( ">>--------------------------------------------<<\n" );
|
||||
dt_dump( pdt3 );
|
||||
|
||||
dt_destroy( &pdt1 );
|
||||
dt_destroy( &pdt2 );
|
||||
dt_destroy( &pdt3 );
|
||||
return 0;
|
||||
}
|
188
src/datatype/dt_add.c
Обычный файл
188
src/datatype/dt_add.c
Обычный файл
@ -0,0 +1,188 @@
|
||||
#include "datatype.h"
|
||||
|
||||
/* When we add a datatype we should update it's definition depending on
|
||||
* the initial displacement for the whole data, so the displacement of
|
||||
* all elements inside a datatype depend only on the loop displacement
|
||||
* and it's own displacement.
|
||||
*/
|
||||
|
||||
/* we have 3 differents structures to update:
|
||||
* the first is the real representation of the datatype
|
||||
* the second is the internal representation using extents
|
||||
* the last is the representation used for send operations
|
||||
*/
|
||||
int dt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, unsigned int count, long disp, long extent )
|
||||
{
|
||||
int newLength, place_needed = 0, i;
|
||||
short localFlags;
|
||||
dt_elem_desc_t *pLast, *pLoop = NULL;
|
||||
long lb, ub;
|
||||
|
||||
/* the extent should be always be positive. So a negative
|
||||
* value here have a special meaning ie. default extent as
|
||||
* computed by ub - lb
|
||||
*/
|
||||
if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb);
|
||||
|
||||
/* first make sure that we have enought place to
|
||||
* put the new element inside */
|
||||
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) {
|
||||
place_needed = 1;
|
||||
/* handle special cases for DT_LB and DT_UB */
|
||||
if( pdtAdd == &(basicDatatypes[DT_LB]) ) {
|
||||
pdtBase->bdt_used |= (1<< DT_LB);
|
||||
if( pdtBase->flags & DT_FLAG_USER_LB ) {
|
||||
pdtBase->lb = LMIN( pdtBase->lb, disp );
|
||||
} else {
|
||||
pdtBase->lb = disp;
|
||||
pdtBase->flags |= DT_FLAG_USER_LB;
|
||||
}
|
||||
return 0;
|
||||
} else if( pdtAdd == &(basicDatatypes[DT_UB]) ) {
|
||||
pdtBase->bdt_used |= (1<< DT_UB);
|
||||
if( pdtBase->flags & DT_FLAG_USER_UB ) {
|
||||
pdtBase->ub = LMAX( pdtBase->ub, disp );
|
||||
} else {
|
||||
pdtBase->ub = disp;
|
||||
pdtBase->flags |= DT_FLAG_USER_UB;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
place_needed = pdtAdd->desc.used;
|
||||
if( count != 1 ) place_needed += 2;
|
||||
}
|
||||
|
||||
dt_increase_ref( pdtAdd );
|
||||
|
||||
/* compute the new memory alignement */
|
||||
pdtBase->align = IMAX( pdtBase->align, pdtAdd->align );
|
||||
|
||||
pdtBase->bdt_used |= pdtAdd->bdt_used;
|
||||
newLength = pdtBase->desc.used + place_needed;
|
||||
if( newLength > pdtBase->desc.length ) {
|
||||
newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK;
|
||||
printf( "increase the size of the data desc array from %d to %d (old ptr = %p ",
|
||||
pdtBase->desc.length, newLength, pdtBase->desc.desc );
|
||||
pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc, newLength );
|
||||
printf( "new ptr = %p\n", pdtBase->desc.desc );
|
||||
pdtBase->desc.length = newLength;
|
||||
}
|
||||
pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
|
||||
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { /* add a basic datatype */
|
||||
pLast->type = pdtAdd->id;
|
||||
pLast->count = count;
|
||||
pLast->disp = disp;
|
||||
pLast->extent = extent;
|
||||
pdtBase->desc.used++;
|
||||
pdtBase->btypes[pdtAdd->id] += count;
|
||||
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
|
||||
if( extent == pdtAdd->size )
|
||||
pLast->flags |= DT_FLAG_CONTIGUOUS;
|
||||
} else {
|
||||
/* now we add a complex datatype */
|
||||
if( disp != pdtBase->ub ) { /* add the initial gap */
|
||||
if( disp < pdtBase->ub ) pdtBase->flags |= DT_FLAG_OVERLAP;
|
||||
}
|
||||
/* keep trace of the total number of basic datatypes in the datatype definition */
|
||||
pdtBase->btypes[DT_LOOP] += pdtAdd->btypes[DT_LOOP];
|
||||
for( i = 3; i < DT_END_LOOP; i++ )
|
||||
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
|
||||
pdtBase->btypes[DT_END_LOOP] += pdtAdd->btypes[DT_END_LOOP];
|
||||
|
||||
/* if the extent of the datatype if the same as the extent of the loop
|
||||
* description of the datatype then we simply have to update the main loop.
|
||||
*/
|
||||
if( count != 1 ) {
|
||||
pLoop = pLast;
|
||||
pLast->type = DT_LOOP;
|
||||
pLast->count = count;
|
||||
pLast->disp = (long)pdtAdd->desc.used + 1;
|
||||
pLast->extent = extent;
|
||||
pLast->flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
|
||||
localFlags = DT_FLAG_IN_LOOP;
|
||||
pdtBase->btypes[DT_LOOP] += 2;
|
||||
pdtBase->desc.used += 2;
|
||||
pLast++;
|
||||
}
|
||||
|
||||
for( i = 0; i < pdtAdd->desc.used; i++ ) {
|
||||
pLast->type = pdtAdd->desc.desc[i].type;
|
||||
pLast->flags = pdtAdd->desc.desc[i].flags | localFlags;
|
||||
pLast->count = pdtAdd->desc.desc[i].count;
|
||||
pLast->extent = pdtAdd->desc.desc[i].extent;
|
||||
pLast->disp = pdtAdd->desc.desc[i].disp;
|
||||
if( pdtAdd->desc.desc[i].type != DT_LOOP )
|
||||
pLast->disp += disp/* + pdtAdd->lb */;
|
||||
pLast++;
|
||||
}
|
||||
pdtBase->desc.used += pdtAdd->desc.used;
|
||||
if( pLoop != NULL ) {
|
||||
pLast->type = DT_END_LOOP;
|
||||
pLast->count = pdtAdd->desc.used + 1; /* where the loop start */
|
||||
pLast->disp = disp + (count - 1) * extent
|
||||
+ (pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
|
||||
pLast->extent = pdtAdd->size; /* the size of the data inside the loop */
|
||||
pLast->flags = pLoop->flags;
|
||||
}
|
||||
/* should I add some space until the extent of this datatype ? */
|
||||
}
|
||||
|
||||
pdtBase->size += count * pdtAdd->size;
|
||||
pdtBase->true_lb = LMIN( pdtBase->true_lb, pdtAdd->true_lb + disp );
|
||||
pdtBase->true_ub = LMAX( pdtBase->true_ub,
|
||||
disp + pdtAdd->true_lb +
|
||||
(count - 1) * extent + pdtAdd->true_ub );
|
||||
|
||||
/* the lower bound should be inherited from the parents if and only
|
||||
* if the USER has explicitly set it. The result lb is the MIN between
|
||||
* the all lb + disp if and only if all or nobody flags's contain the LB.
|
||||
*/
|
||||
if( (pdtAdd->flags ^ pdtBase->flags) & DT_FLAG_USER_LB ) {
|
||||
pdtBase->flags |= DT_FLAG_USER_LB;
|
||||
if( pdtAdd->flags & DT_FLAG_USER_LB )
|
||||
lb = pdtAdd->lb + disp;
|
||||
else
|
||||
lb = pdtBase->lb;
|
||||
} else {
|
||||
lb = LMIN( pdtBase->lb, pdtAdd->lb + disp );
|
||||
}
|
||||
|
||||
/* the same apply for the upper bound except for the case where
|
||||
* either of them has the flag UB, in which case we should
|
||||
* compute the UB including the natural alignement of the data.
|
||||
*/
|
||||
if( (pdtBase->flags ^ pdtAdd->flags) & DT_FLAG_USER_UB ) {
|
||||
if( pdtBase->flags & DT_FLAG_USER_UB )
|
||||
ub = pdtBase->ub;
|
||||
else {
|
||||
pdtBase->flags |= DT_FLAG_USER_UB;
|
||||
ub = disp + pdtAdd->lb + count * extent;
|
||||
}
|
||||
} else {
|
||||
if( pdtBase->flags & DT_FLAG_USER_UB )
|
||||
ub = LMAX( pdtBase->ub, disp + pdtAdd->lb + count * (extent) );
|
||||
else {
|
||||
/* we should compute the extent depending on the alignement */
|
||||
long ubN = (disp + pdtAdd->lb + count * (extent));
|
||||
ub = LMAX( ((pdtBase->ub / pdtBase->align) * pdtBase->align),
|
||||
(((ubN + pdtBase->align - 1)/ pdtBase->align) * pdtBase->align) );
|
||||
}
|
||||
}
|
||||
/* update the extent and size */
|
||||
pdtBase->lb = lb;
|
||||
pdtBase->ub = ub;
|
||||
pdtBase->nbElems += (count * pdtAdd->nbElems);
|
||||
|
||||
/* Is the data still contiguous ?
|
||||
* The only way for the data to be contiguous is to have the true extent equal to his size.
|
||||
* In other words to avoid having internal gaps between elements.
|
||||
*/
|
||||
if( (pdtBase->size != (pdtBase->true_ub - pdtBase->true_lb)) ||
|
||||
!(pdtBase->flags & DT_FLAG_CONTIGUOUS) || !(pdtAdd->flags & DT_FLAG_CONTIGUOUS) )
|
||||
UNSET_CONTIGUOUS_FLAG(pdtBase->flags);
|
||||
|
||||
dt_decrease_ref( pdtAdd );
|
||||
|
||||
return 0;
|
||||
}
|
260
src/datatype/dt_create.c
Обычный файл
260
src/datatype/dt_create.c
Обычный файл
@ -0,0 +1,260 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "limits.h"
|
||||
|
||||
/* other fields starting after bdt_used (index of DT_LOOP should be ONE) */
|
||||
#define EMPTY_DATA NULL, "", {0, 0, NULL}, {0, 0, NULL}, NULL, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
|
||||
#define BASEOBJ_DATA { NULL, 1 }
|
||||
dt_desc_t basicDatatypes[] = {
|
||||
/*super size true_lb true_ub align lb ub flags id nbElems bdt_used others fields */
|
||||
/*0x00*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC, DT_LOOP, 1, (1<<DT_LOOP), EMPTY_DATA },
|
||||
/*0x01*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC | DT_FLAG_USER_LB, DT_LB, 1, (1<<DT_LB), EMPTY_DATA },
|
||||
/*0x02*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC | DT_FLAG_USER_UB, DT_UB, 1, (1<<DT_UB), EMPTY_DATA },
|
||||
/* now the real basic datatypes */
|
||||
/*0x03*/ { BASEOBJ_DATA, 1, 0, 1, 1, 0, 1, DT_FLAG_BASIC | DT_FLAG_DATA, DT_SPACE, 1, (1<<DT_SPACE), EMPTY_DATA },
|
||||
/*0x04*/ { BASEOBJ_DATA, 1, 0, 1, 1, 0, 1, DT_FLAG_BASIC | DT_FLAG_DATA, DT_CHAR, 1, (1<<DT_CHAR), EMPTY_DATA },
|
||||
/*0x05*/ { BASEOBJ_DATA, 1, 0, 1, 1, 0, 1, DT_FLAG_BASIC | DT_FLAG_DATA, DT_BYTE, 1, (1<<DT_BYTE), EMPTY_DATA },
|
||||
/*0x06*/ { BASEOBJ_DATA, sizeof(short), 0, sizeof(short), sizeof(short), 0, sizeof(short), DT_FLAG_BASIC | DT_FLAG_DATA, DT_SHORT, 1, (1<<DT_SHORT), EMPTY_DATA },
|
||||
/*0x07*/ { BASEOBJ_DATA, sizeof(int), 0, sizeof(int), sizeof(int), 0, sizeof(int), DT_FLAG_BASIC | DT_FLAG_DATA, DT_INT, 1, (1<<DT_INT), EMPTY_DATA },
|
||||
/*0x08*/ { BASEOBJ_DATA, sizeof(float), 0, sizeof(float), sizeof(float), 0, sizeof(float), DT_FLAG_BASIC | DT_FLAG_DATA, DT_FLOAT, 1, (1<<DT_FLOAT), EMPTY_DATA },
|
||||
/*0x09*/ { BASEOBJ_DATA, sizeof(long), 0, sizeof(long), sizeof(long), 0, sizeof(long), DT_FLAG_BASIC | DT_FLAG_DATA, DT_LONG, 1, (1<<DT_LONG), EMPTY_DATA },
|
||||
/*0x0A*/ { BASEOBJ_DATA, sizeof(double), 0, sizeof(double), sizeof(double), 0, sizeof(double), DT_FLAG_BASIC | DT_FLAG_DATA, DT_DOUBLE, 1, (1<<DT_DOUBLE), EMPTY_DATA },
|
||||
/*0x0B*/ { BASEOBJ_DATA, sizeof(long long), 0, sizeof(long long), sizeof(long long), 0, sizeof(long long), DT_FLAG_BASIC | DT_FLAG_DATA, DT_LONG_LONG, 1, (1<<DT_LONG_LONG), EMPTY_DATA },
|
||||
/*0x0C*/ { BASEOBJ_DATA, sizeof(long double), 0, sizeof(long double), sizeof(long double), 0, sizeof(long double), DT_FLAG_BASIC | DT_FLAG_DATA, DT_LONG_DOUBLE, 1, (1<<DT_LONG_DOUBLE), EMPTY_DATA },
|
||||
/*0x0D*/ { BASEOBJ_DATA, 2 * sizeof(float), 0, 2 * sizeof(float), 2 * sizeof(float), 0, 2 * sizeof(float), DT_FLAG_BASIC | DT_FLAG_DATA, DT_COMPLEX_FLOAT, 1, (1<<DT_COMPLEX_FLOAT), EMPTY_DATA },
|
||||
/*0x0E*/ { BASEOBJ_DATA, 2 * sizeof(double), 0, 2 * sizeof(double), 2 * sizeof(double), 0, 2 * sizeof(double), DT_FLAG_BASIC | DT_FLAG_DATA, DT_COMPLEX_DOUBLE, 1, (1<<DT_COMPLEX_DOUBLE), EMPTY_DATA },
|
||||
/*0x0F*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC, DT_END_LOOP, 1, (1<<DT_END_LOOP), EMPTY_DATA },
|
||||
};
|
||||
|
||||
static char* basicDatatypeNames[] = { "loop", "lb", "ub", "space", "char", "byte", "short", "int", "float",
|
||||
"long", "double", "long_long", "long_double", "cfloat", "cdouble", "end_loop", "unknown" };
|
||||
|
||||
typedef struct __internal_keep_ptr {
|
||||
struct __internal_keep_ptr* next;
|
||||
} internal_dt_desc_t;
|
||||
|
||||
static internal_dt_desc_t* __free_dt_desc = NULL;
|
||||
|
||||
static void __get_free_dt_struct( dt_desc_t* pData )
|
||||
{
|
||||
int i;
|
||||
|
||||
pData->size = 0;
|
||||
pData->id = 0;
|
||||
pData->nbElems = 0;
|
||||
pData->bdt_used = 0;
|
||||
for( i = 0; i < DT_MAX_PREDEFINED; i++ )
|
||||
pData->btypes[i] = 0;
|
||||
pData->btypes[DT_LOOP] = 1;
|
||||
|
||||
pData->opt_desc.desc = NULL;
|
||||
pData->opt_desc.length = 0;
|
||||
pData->opt_desc.used = 0;
|
||||
pData->args = NULL;
|
||||
pData->align = 1;
|
||||
pData->flags = DT_FLAG_CONTIGUOUS;
|
||||
pData->true_lb = LONG_MAX;
|
||||
pData->true_ub = LONG_MIN;
|
||||
pData->lb = LONG_MAX;
|
||||
pData->ub = LONG_MIN;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(lam_datatype_t, lam_object_t, __get_free_dt_struct, dt_destroy );
|
||||
|
||||
dt_desc_t* dt_create( int expectedSize )
|
||||
{
|
||||
dt_desc_t* pdt = (dt_desc_t*)OBJ_NEW(lam_datatype_t);
|
||||
|
||||
if( expectedSize == -1 ) expectedSize = DT_INCREASE_STACK;
|
||||
pdt->desc.length = expectedSize;
|
||||
pdt->desc.used = 0;
|
||||
pdt->desc.desc = (dt_elem_desc_t*)calloc(pdt->desc.length, sizeof(dt_elem_desc_t));
|
||||
return pdt;
|
||||
}
|
||||
|
||||
int dt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType )
|
||||
{
|
||||
dt_duplicate( oldType, newType );
|
||||
(*newType)->lb = lb;
|
||||
(*newType)->ub = lb + extent;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_commit( dt_desc_t** data )
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)*data;
|
||||
|
||||
if( pData->flags & DT_FLAG_COMMITED ) return -1;
|
||||
pData->flags |= DT_FLAG_COMMITED;
|
||||
/* If the data is contiguous is useless to generate an optimized version. */
|
||||
if( pData->size != (pData->true_ub - pData->true_lb) )
|
||||
(void)dt_optimize_short( pData, 1, &(pData->opt_desc) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void _dump_data_flags( unsigned short usflags )
|
||||
{
|
||||
char flags[12] = "-----------";
|
||||
|
||||
if( usflags & DT_FLAG_DESTROYED ) flags[0] = 'd';
|
||||
if( usflags & DT_FLAG_COMMITED ) flags[1] = 'c';
|
||||
if( usflags & DT_FLAG_CONTIGUOUS ) flags[2] = 'C';
|
||||
if( usflags & DT_FLAG_OVERLAP ) flags[3] = 'o';
|
||||
if( usflags & DT_FLAG_USER_LB ) flags[4] = 'l';
|
||||
if( usflags & DT_FLAG_USER_UB ) flags[5] = 'u';
|
||||
if( usflags & DT_FLAG_FOREVER ) flags[6] = 'F';
|
||||
if( usflags & DT_FLAG_IN_LOOP ) flags[7] = 'L';
|
||||
if( usflags & DT_FLAG_DATA ) flags[8] = 'D';
|
||||
if( usflags & DT_FLAG_INITIAL ) flags[9] = 'I';
|
||||
if( (usflags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) flags[10] = 'B';
|
||||
flags[11] = 0;
|
||||
printf( "%s\t", flags );
|
||||
}
|
||||
|
||||
int __dump_data_desc( dt_elem_desc_t* pDesc, int nbElems )
|
||||
{
|
||||
char* dtName;
|
||||
int i;
|
||||
|
||||
for( i = 0; i < nbElems; i++ ) {
|
||||
if( pDesc->type > DT_MAX_PREDEFINED ) dtName = basicDatatypeNames[DT_MAX_PREDEFINED];
|
||||
else dtName = basicDatatypeNames[pDesc->type];
|
||||
_dump_data_flags( pDesc->flags );
|
||||
if( pDesc->type == DT_LOOP )
|
||||
printf( "%15s %d times the next %d elements extent %d\n", dtName,
|
||||
pDesc->count, (int)pDesc->disp, pDesc->extent );
|
||||
else
|
||||
printf( "%15s count %d disp 0x%lx (%ld) extent %d\n", dtName,
|
||||
pDesc->count, pDesc->disp, pDesc->disp, pDesc->extent );
|
||||
pDesc++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __dt_contain_basic_datatypes( dt_desc_t* pData )
|
||||
{
|
||||
int i, mask = 1;
|
||||
|
||||
if( pData->flags & DT_FLAG_USER_LB ) printf( "lb " );
|
||||
if( pData->flags & DT_FLAG_USER_UB ) printf( "ub " );
|
||||
for( i = 0; i < DT_MAX_PREDEFINED; i++ ) {
|
||||
if( pData->bdt_used & mask )
|
||||
printf( "%s ", basicDatatypeNames[i] );
|
||||
mask <<= 1;
|
||||
}
|
||||
}
|
||||
|
||||
void dt_dump( dt_desc_t* data )
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)data;
|
||||
|
||||
printf( "Datatype %p size %d align %d id %d length %d used %d\n\
|
||||
true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n\
|
||||
nbElems %d loops %d flags %X (",
|
||||
pData, pData->size, pData->align, pData->id, pData->desc.length, pData->desc.used,
|
||||
pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb,
|
||||
pData->lb, pData->ub, pData->ub - pData->lb,
|
||||
pData->nbElems, pData->btypes[DT_LOOP], pData->flags );
|
||||
/* dump the flags */
|
||||
if( pData->flags == DT_FLAG_BASIC ) printf( "basic datatype " );
|
||||
else {
|
||||
if( pData->flags & DT_FLAG_DESTROYED ) printf( "destroyed " );
|
||||
if( pData->flags & DT_FLAG_COMMITED ) printf( "commited " );
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS) printf( "contiguous " );
|
||||
}
|
||||
printf( ")" ); _dump_data_flags( pData->flags );
|
||||
printf( "\n contain " ); __dt_contain_basic_datatypes( pData ); printf( "\n" );
|
||||
__dump_data_desc( pData->desc.desc, pData->desc.used );
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
printf( "Optimized description \n" );
|
||||
__dump_data_desc( pData->opt_desc.desc, pData->opt_desc.used );
|
||||
}
|
||||
}
|
||||
#define DUMP_TYPE( TYPENAME, TYPE ) \
|
||||
int dump_##TYPENAME( unsigned int count, \
|
||||
char* from, unsigned int from_len, long from_extent, \
|
||||
char* to, unsigned int to_len, long to_extent, \
|
||||
int* used ) \
|
||||
{ \
|
||||
int remote_type_size = sizeof(TYPE), res = 1; \
|
||||
if( (remote_type_size * count) > from_len ) { \
|
||||
count = from_len / remote_type_size; \
|
||||
if( (count * remote_type_size) != from_len ) { \
|
||||
printf( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
|
||||
from_len - (count * remote_type_size) ); \
|
||||
res = -1; \
|
||||
} \
|
||||
printf( "correct: %s count %d from %p with length %d to %p space %d\n", \
|
||||
#TYPE, count, from, from_len, to, to_len ); \
|
||||
} else \
|
||||
printf( " %s count %d from %p with length %d to %p space %d\n", \
|
||||
#TYPE, count, from, from_len, to, to_len ); \
|
||||
\
|
||||
*used = count * to_extent; \
|
||||
return res * count; \
|
||||
}
|
||||
|
||||
DUMP_TYPE( char, char );
|
||||
DUMP_TYPE( short, short );
|
||||
DUMP_TYPE( int, int );
|
||||
DUMP_TYPE( float, float );
|
||||
DUMP_TYPE( long, long );
|
||||
DUMP_TYPE( double, double );
|
||||
DUMP_TYPE( long_long, long long );
|
||||
DUMP_TYPE( long_double, long double );
|
||||
DUMP_TYPE( complex_float, complex_float_t );
|
||||
DUMP_TYPE( complex_double, complex_double_t );
|
||||
|
||||
static convertor_t* pDumpConv = NULL;
|
||||
|
||||
static conversion_fct_t dump_functions[] = {
|
||||
(conversion_fct_t)NULL, /* DT_LOOP */
|
||||
(conversion_fct_t)NULL, /* DT_LB */
|
||||
(conversion_fct_t)NULL, /* DT_UB */
|
||||
(conversion_fct_t)NULL, /* DT_SPACE */
|
||||
(conversion_fct_t)dump_char, /* DT_CHAR */
|
||||
(conversion_fct_t)dump_char, /* DT_BYTE */
|
||||
(conversion_fct_t)dump_short, /* DT_SHORT */
|
||||
(conversion_fct_t)dump_int, /* DT_INT */
|
||||
(conversion_fct_t)dump_float, /* DT_FLOAT */
|
||||
(conversion_fct_t)dump_long, /* DT_LONG */
|
||||
(conversion_fct_t)dump_double, /* DT_DOUBLE */
|
||||
(conversion_fct_t)dump_long_long, /* DT_LONG_LONG */
|
||||
(conversion_fct_t)dump_long_double, /* DT_LONG_DOUBLE */
|
||||
(conversion_fct_t)dump_complex_float, /* DT_COMPLEX_FLOAT */
|
||||
(conversion_fct_t)dump_complex_double, /* DT_COMPLEX_DOUBLE */
|
||||
};
|
||||
|
||||
void dt_dump_complete( dt_desc_t* data )
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)data;
|
||||
struct iovec fake = { (void*)0, 0 };
|
||||
|
||||
printf( "Datatype %p size %d align %d id %d length %d used %d\n\
|
||||
true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n\
|
||||
nbElems %d loops %d flags %X (",
|
||||
pData, pData->size, pData->align, pData->id, pData->desc.length, pData->desc.used,
|
||||
pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb,
|
||||
pData->lb, pData->ub, pData->ub - pData->lb,
|
||||
pData->nbElems, pData->btypes[DT_LOOP], pData->flags );
|
||||
|
||||
/* dump the flags */
|
||||
if( pData->flags == DT_FLAG_BASIC ) printf( "basic datatype " );
|
||||
else {
|
||||
if( pData->flags & DT_FLAG_DESTROYED ) printf( "destroyed " );
|
||||
if( pData->flags & DT_FLAG_COMMITED ) printf( "commited " );
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS) printf( "contiguous " );
|
||||
if( pData->flags & DT_FLAG_OVERLAP ) printf( "overlap " );
|
||||
}
|
||||
printf( ")\n contain " ); __dt_contain_basic_datatypes( pData );
|
||||
printf( "\n{\n" );
|
||||
if( pDumpConv == NULL ) {
|
||||
pDumpConv = convertor_create( 0, 0 );
|
||||
}
|
||||
convertor_init_for_recv( pDumpConv, 0, pData, 1, NULL );
|
||||
pDumpConv->pFunctions = dump_functions;
|
||||
|
||||
fake.iov_len = pData->size;
|
||||
convertor_unpack( pDumpConv, &fake, 1 );
|
||||
printf( "}\n" );
|
||||
}
|
15
src/datatype/dt_create_array.c
Обычный файл
15
src/datatype/dt_create_array.c
Обычный файл
@ -0,0 +1,15 @@
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
|
||||
int order, dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
int dt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib,
|
||||
int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType,
|
||||
dt_desc_t** newType )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
23
src/datatype/dt_create_dup.c
Обычный файл
23
src/datatype/dt_create_dup.c
Обычный файл
@ -0,0 +1,23 @@
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_duplicate( dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
dt_desc_t* pdt = dt_create( oldType->desc.used );
|
||||
void* temp = pdt->desc.desc; /* temporary copy of the desc pointer */
|
||||
|
||||
memcpy( pdt, oldType, sizeof(dt_desc_t) );
|
||||
pdt->desc.desc = temp;
|
||||
memcpy( pdt->desc.desc, oldType->desc.desc, sizeof(dt_elem_desc_t) * oldType->desc.used );
|
||||
pdt->id = 0;
|
||||
pdt->args = NULL;
|
||||
*newType = pdt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_create_contiguous( size_t count, dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
dt_desc_t* pdt = dt_create( oldType->desc.used + 2 );
|
||||
dt_add( pdt, oldType, count, 0, (oldType->ub - oldType->lb) );
|
||||
*newType = pdt;
|
||||
return 0;
|
||||
}
|
90
src/datatype/dt_create_indexed.c
Обычный файл
90
src/datatype/dt_create_indexed.c
Обычный файл
@ -0,0 +1,90 @@
|
||||
#include "datatype.h"
|
||||
|
||||
/* We try to merge together data that are contiguous */
|
||||
int dt_create_indexed( size_t count, int* pBlockLength, int* pDisp,
|
||||
dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
dt_desc_t* pdt;
|
||||
int i, dLength, endat, disp;
|
||||
long extent = oldType->ub - oldType->lb;
|
||||
|
||||
pdt = dt_create( count * (2 + oldType->desc.used) );
|
||||
disp = pDisp[0];
|
||||
dLength = pBlockLength[0];
|
||||
endat = disp + dLength;
|
||||
for( i = 1; i < count; i++ ) {
|
||||
if( endat == pDisp[i] ) {
|
||||
/* contiguous with the previsious */
|
||||
dLength += pBlockLength[i];
|
||||
endat += pBlockLength[i];
|
||||
} else {
|
||||
dt_add( pdt, oldType, dLength, disp * extent, extent );
|
||||
disp = pDisp[i];
|
||||
dLength = pBlockLength[i];
|
||||
endat = disp + pBlockLength[i];
|
||||
}
|
||||
}
|
||||
dt_add( pdt, oldType, dLength, disp * extent, extent );
|
||||
|
||||
*newType = pdt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_create_hindexed( size_t count, int* pBlockLength, long* pDisp,
|
||||
dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
dt_desc_t* pdt;
|
||||
int i, dLength;
|
||||
long extent = oldType->ub - oldType->lb;
|
||||
long disp, endat;
|
||||
|
||||
pdt = dt_create( count * (2 + oldType->desc.used) );
|
||||
disp = pDisp[0];
|
||||
dLength = pBlockLength[0];
|
||||
endat = disp + dLength * extent;
|
||||
for( i = 1; i < count; i++ ) {
|
||||
if( endat == pDisp[i] ) {
|
||||
/* contiguous with the previsious */
|
||||
dLength += pBlockLength[i];
|
||||
endat += pBlockLength[i] * extent;
|
||||
} else {
|
||||
dt_add( pdt, oldType, dLength, disp, extent );
|
||||
disp = pDisp[i];
|
||||
dLength = pBlockLength[i];
|
||||
endat = disp + pBlockLength[i] * extent;
|
||||
}
|
||||
}
|
||||
dt_add( pdt, oldType, dLength, disp, extent );
|
||||
|
||||
*newType = pdt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_create_indexed_block( size_t count, int bLength, int* pDisp,
|
||||
dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
dt_desc_t* pdt;
|
||||
int i, dLength, endat, disp;
|
||||
long extent = oldType->ub - oldType->lb;
|
||||
|
||||
pdt = dt_create( count * (2 + oldType->desc.used) );
|
||||
disp = pDisp[0];
|
||||
dLength = bLength;
|
||||
endat = disp + dLength;
|
||||
for( i = 1; i < count; i++ ) {
|
||||
if( endat == pDisp[i] ) {
|
||||
/* contiguous with the previsious */
|
||||
dLength += bLength;
|
||||
endat += bLength;
|
||||
} else {
|
||||
dt_add( pdt, oldType, dLength, disp * extent, extent );
|
||||
disp = pDisp[i];
|
||||
dLength = bLength;
|
||||
endat = disp + bLength;
|
||||
}
|
||||
}
|
||||
dt_add( pdt, oldType, dLength, disp * extent, extent );
|
||||
|
||||
*newType = pdt;
|
||||
return 0;
|
||||
}
|
42
src/datatype/dt_create_struct.c
Обычный файл
42
src/datatype/dt_create_struct.c
Обычный файл
@ -0,0 +1,42 @@
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_create_struct( size_t count, size_t* pBlockLength, long* pDisp,
|
||||
dt_desc_t** pTypes, dt_desc_t** newType )
|
||||
{
|
||||
int i;
|
||||
long disp, endto, lastExtent, lastDisp;
|
||||
size_t lastBlock;
|
||||
dt_desc_t *pdt, *lastType;
|
||||
/* if we compute the total number of elements before we can
|
||||
* avoid increasing the size of the desc array often.
|
||||
*/
|
||||
for( disp = 0, i = 0; i < count; i++ ) {
|
||||
disp += pTypes[i]->desc.used;
|
||||
if( pBlockLength[i] != 1 ) disp += 2;
|
||||
}
|
||||
lastType = pTypes[0];
|
||||
lastBlock = pBlockLength[0];
|
||||
lastExtent = lastType->ub - lastType->lb;
|
||||
lastDisp = pDisp[0];
|
||||
endto = pDisp[0] + lastExtent * lastBlock;
|
||||
|
||||
pdt = dt_create( disp );
|
||||
|
||||
for( i = 1; i < count; i++ ) {
|
||||
if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) {
|
||||
lastBlock += pBlockLength[i];
|
||||
endto = lastDisp + lastBlock * lastExtent;
|
||||
} else {
|
||||
dt_add( pdt, lastType, lastBlock, lastDisp, lastExtent );
|
||||
lastType = pTypes[i];
|
||||
lastExtent = lastType->ub - lastType->lb;
|
||||
lastBlock = pBlockLength[i];
|
||||
lastDisp = pDisp[i];
|
||||
endto = lastDisp + lastExtent * lastBlock;
|
||||
}
|
||||
}
|
||||
dt_add( pdt, lastType, lastBlock, lastDisp, lastExtent );
|
||||
|
||||
*newType = pdt;
|
||||
return 0;
|
||||
}
|
64
src/datatype/dt_create_vector.c
Обычный файл
64
src/datatype/dt_create_vector.c
Обычный файл
@ -0,0 +1,64 @@
|
||||
#include "datatype.h"
|
||||
|
||||
/* Open questions ...
|
||||
* - how to improuve the handling of these vectors (creating a temporary datatype
|
||||
* can be ONLY a initial solution.
|
||||
*
|
||||
*/
|
||||
|
||||
int dt_create_vector( size_t count, int bLength, long stride,
|
||||
dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
long extent = oldType->ub - oldType->lb;
|
||||
dt_desc_t *pTempData, *pData;
|
||||
|
||||
if( bLength == stride ) {
|
||||
/* the elements are contiguous */
|
||||
pData = dt_create( oldType->desc.used + 2 );
|
||||
dt_add( pData, oldType, count * bLength, 0, extent );
|
||||
} else {
|
||||
if( count > 1 ) {
|
||||
if( bLength == 1 ) {
|
||||
pData = dt_create( oldType->desc.used + 2 );
|
||||
dt_add( pData, oldType, count - 1, 0, stride * extent );
|
||||
} else {
|
||||
pTempData = dt_create( oldType->desc.used + 2 );
|
||||
pData = dt_create( oldType->desc.used + 2 + 2 );
|
||||
dt_add( pTempData, oldType, bLength, 0, extent );
|
||||
dt_add( pData, pTempData, count - 1, 0, stride * extent );
|
||||
dt_free( &pTempData );
|
||||
}
|
||||
} else {
|
||||
pData = dt_create( oldType->desc.used + 2 );
|
||||
}
|
||||
dt_add( pData, oldType, bLength, (count - 1) * extent * stride, extent );
|
||||
}
|
||||
*newType = pData;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_create_hvector( size_t count, int bLength, long stride,
|
||||
dt_desc_t* oldType, dt_desc_t** newType )
|
||||
{
|
||||
long extent = oldType->ub - oldType->lb;
|
||||
dt_desc_t *pTempData, *pData;
|
||||
|
||||
if( (extent * bLength) == stride ) {
|
||||
/* contiguous */
|
||||
pData = dt_create( oldType->desc.used + 2 );
|
||||
dt_add( pData, oldType, count * bLength, 0, extent );
|
||||
} else {
|
||||
if( count > 1 ) {
|
||||
pTempData = dt_create( oldType->desc.used + 2 );
|
||||
pData = dt_create( oldType->desc.used + 2 + 2 );
|
||||
dt_add( pTempData, oldType, bLength, 0, extent );
|
||||
dt_add( pData, pTempData, count - 1, 0, stride );
|
||||
dt_free( &pTempData );
|
||||
} else {
|
||||
pData = dt_create( oldType->desc.used + 2 );
|
||||
}
|
||||
dt_add( pData, oldType, bLength, (count - 1) * stride, extent );
|
||||
}
|
||||
*newType = pData;
|
||||
return 0;
|
||||
}
|
21
src/datatype/dt_destroy.c
Обычный файл
21
src/datatype/dt_destroy.c
Обычный файл
@ -0,0 +1,21 @@
|
||||
#include "datatype.h"
|
||||
|
||||
/* This function should never be called directly. It's called by the dt_decrease_ref
|
||||
* when the number of references on the data reach ZERO.
|
||||
*/
|
||||
int dt_destroy( dt_desc_t** dt )
|
||||
{
|
||||
dt_desc_t* pData = *dt;
|
||||
|
||||
if( !(pData->flags & DT_FLAG_FOREVER) )
|
||||
return LAM_ERROR;
|
||||
|
||||
/* I still have the data description ? */
|
||||
if( pData->args != NULL ) {
|
||||
fprintf( stderr, "Data description has not been removed prior to data destruction" );
|
||||
}
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) free( pData->opt_desc.desc );
|
||||
if( pData->desc.desc != NULL ) free( pData->desc.desc );
|
||||
return 0;
|
||||
}
|
71
src/datatype/dt_module.c
Обычный файл
71
src/datatype/dt_module.c
Обычный файл
@ -0,0 +1,71 @@
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
||||
struct _c_l {
|
||||
char c;
|
||||
long l;
|
||||
};
|
||||
|
||||
struct _c_d {
|
||||
char c;
|
||||
double d;
|
||||
};
|
||||
|
||||
struct _c_ll {
|
||||
char c;
|
||||
long long ll;
|
||||
};
|
||||
|
||||
struct _c_ld {
|
||||
char c;
|
||||
long double ld;
|
||||
};
|
||||
|
||||
struct _c_f {
|
||||
char c;
|
||||
float f;
|
||||
};
|
||||
|
||||
int dt_load( void )
|
||||
{
|
||||
/* we have to compute the correct alignement for several types of basic datatypes */
|
||||
struct _c_f c_f;
|
||||
struct _c_l c_l;
|
||||
struct _c_d c_d;
|
||||
struct _c_ll c_ll;
|
||||
struct _c_ld c_ld;
|
||||
int i;
|
||||
|
||||
basicDatatypes[DT_FLOAT].align = (char*)&(c_f.f) - (char*)&(c_f);
|
||||
basicDatatypes[DT_LONG].align = (char*)&(c_l.l) - (char*)&(c_l);
|
||||
basicDatatypes[DT_DOUBLE].align = (char*)&(c_d.d) - (char*)&(c_d);
|
||||
basicDatatypes[DT_LONG_DOUBLE].align = (char*)&(c_ld.ld) - (char*)&(c_ld);
|
||||
basicDatatypes[DT_LONG_LONG].align = (char*)&(c_ll.ll) - (char*)&(c_ll);
|
||||
|
||||
for( i = 0; i < DT_MAX_PREDEFINED; i++ ) {
|
||||
basicDatatypes[i].desc.desc = (dt_elem_desc_t*)malloc(sizeof(dt_elem_desc_t));
|
||||
basicDatatypes[i].desc.desc->flags = DT_FLAG_BASIC | DT_FLAG_CONTIGUOUS;
|
||||
basicDatatypes[i].desc.desc->type = i;
|
||||
basicDatatypes[i].desc.desc->count = 1;
|
||||
basicDatatypes[i].desc.desc->disp = 0;
|
||||
basicDatatypes[i].desc.desc->extent = basicDatatypes[i].size;
|
||||
basicDatatypes[i].desc.length = 1;
|
||||
basicDatatypes[i].desc.used = 1;
|
||||
basicDatatypes[i].btypes[i] = 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_unload( void )
|
||||
{
|
||||
int i;
|
||||
|
||||
for( i =0; i < DT_MAX_PREDEFINED; i++ ) {
|
||||
free( basicDatatypes[i].desc.desc );
|
||||
basicDatatypes[i].desc.desc = NULL;
|
||||
basicDatatypes[i].desc.length = 0;
|
||||
basicDatatypes[i].desc.used = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
48
src/datatype/dt_old_limits.c
Обычный файл
48
src/datatype/dt_old_limits.c
Обычный файл
@ -0,0 +1,48 @@
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_type_ub( dt_desc_t* pData, long* disp )
|
||||
{
|
||||
*disp = pData->ub;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_lb( dt_desc_t* pData, long* disp )
|
||||
{
|
||||
*disp = pData->lb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_extent( dt_desc_t* pData, long* extent )
|
||||
{
|
||||
*extent = pData->ub - pData->lb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_size ( dt_desc_t* pData, int *size )
|
||||
{
|
||||
*size = pData->size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent)
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)datatype;
|
||||
|
||||
*lb = pData->lb;
|
||||
*extent = pData->ub - pData->lb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent)
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)datatype;
|
||||
|
||||
*true_lb = pData->true_lb;
|
||||
*true_extent = pData->true_ub - pData->true_lb;
|
||||
return 0;
|
||||
}
|
151
src/datatype/dt_optimize.c
Обычный файл
151
src/datatype/dt_optimize.c
Обычный файл
@ -0,0 +1,151 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
||||
/* printf( "save in %s:%d at %p DT_BYTE disp %ld count %d\n", __FILE__, __LINE__, (PELEM), (DISP), (COUNT) ); \ */
|
||||
#define SAVE_DESC( PELEM, DISP, COUNT ) \
|
||||
do { \
|
||||
(PELEM)->flags = DT_FLAG_BASIC; \
|
||||
(PELEM)->type = DT_BYTE; \
|
||||
(PELEM)->count = (COUNT); \
|
||||
(PELEM)->disp = (DISP); \
|
||||
(PELEM)->extent = 1; \
|
||||
(PELEM)++; \
|
||||
nbElems++; \
|
||||
} while(0)
|
||||
|
||||
/* printf( "save in %s:%d type %d flags %x count %d disp %ld extent %d\n", \ */
|
||||
/* __FILE__, __LINE__, (TYPE), (FLAGS), (COUNT), (DISP), (EXTENT) ); \ */
|
||||
#define SAVE_ELEM( PELEM, TYPE, FLAGS, COUNT, DISP, EXTENT ) \
|
||||
do { \
|
||||
(PELEM)->flags = (FLAGS); \
|
||||
(PELEM)->type = (TYPE); \
|
||||
(PELEM)->count = (COUNT); \
|
||||
(PELEM)->disp = (DISP); \
|
||||
(PELEM)->extent = (EXTENT); \
|
||||
(PELEM)++; \
|
||||
nbElems++; \
|
||||
} while(0)
|
||||
|
||||
static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem )
|
||||
{
|
||||
while( _pElem->type == DT_LOOP ) ++_pElem;
|
||||
return _pElem->disp;
|
||||
}
|
||||
|
||||
int dt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDesc )
|
||||
{
|
||||
dt_elem_desc_t* pElemDesc;
|
||||
long lastDisp = 0;
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int end_loop; /* last element in the actual loop */
|
||||
int stack_pos = 0;
|
||||
int type, lastLength = 0, nbElems = 0, changes = 0;
|
||||
long totalDisp;
|
||||
|
||||
pTypeDesc->length = 2 * pData->desc.used;
|
||||
pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length );
|
||||
|
||||
pStack = alloca( sizeof(dt_stack_t) * (pData->btypes[DT_LOOP]+1) );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->end_loop = pData->desc.used - 1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
next_loop:
|
||||
end_loop = pStack->end_loop;
|
||||
totalDisp = pStack->disp;
|
||||
while( pos_desc <= end_loop ) {
|
||||
if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
dt_elem_desc_t* pStartLoop;
|
||||
if( lastLength != 0 ) {
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength );
|
||||
lastDisp += lastLength;
|
||||
lastLength = 0;
|
||||
}
|
||||
pStartLoop = (pElemDesc - nbElems);
|
||||
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].flags,
|
||||
nbElems, pData->desc.desc[pos_desc].disp,
|
||||
pData->desc.desc[pos_desc].extent );
|
||||
nbElems += pStartLoop->disp;
|
||||
pStartLoop->disp = (pElemDesc - 1)->count;
|
||||
stack_pos--;
|
||||
pStack--;
|
||||
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
|
||||
dt_elem_desc_t* pEndLoop = &(pData->desc.desc[pos_desc + pData->desc.desc[pos_desc].disp]);
|
||||
long loop_disp = GET_LOOP_DISP( &(pData->desc.desc[pos_desc]) );
|
||||
if( pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
/* the loop is contiguous or composed by contiguous elements with a gap */
|
||||
if( pData->desc.desc[pos_desc].extent == pEndLoop->extent ) {
|
||||
/* the whole loop is contiguous */
|
||||
if( (lastDisp + lastLength) != (totalDisp + loop_disp) ) {
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength );
|
||||
lastLength = 0;
|
||||
lastDisp = totalDisp + loop_disp;
|
||||
}
|
||||
lastLength += pData->desc.desc[pos_desc].count * pEndLoop->extent;
|
||||
} else {
|
||||
int counter = pData->desc.desc[pos_desc].count;
|
||||
if( (lastDisp + lastLength) == (totalDisp + loop_disp) ) {
|
||||
lastLength += pEndLoop->extent;
|
||||
counter--;
|
||||
}
|
||||
if( lastLength != 0 ) {
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength );
|
||||
lastDisp += lastLength;
|
||||
lastLength = 0;
|
||||
}
|
||||
/* we have a gap in the begining or the end of the loop but the whole
|
||||
* loop can be merged in just one memcpy.
|
||||
*/
|
||||
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
|
||||
counter, (long)2, pData->desc.desc[pos_desc].extent );
|
||||
SAVE_DESC( pElemDesc, loop_disp, pEndLoop->extent );
|
||||
SAVE_ELEM( pElemDesc, DT_END_LOOP, pEndLoop->flags,
|
||||
2, pEndLoop->disp, pEndLoop->extent );
|
||||
}
|
||||
pos_desc += pData->desc.desc[pos_desc].disp + 1;
|
||||
changes++;
|
||||
} else {
|
||||
if( lastLength != 0 ) {
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength );
|
||||
lastDisp += lastLength;
|
||||
lastLength = 0;
|
||||
}
|
||||
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
|
||||
pData->desc.desc[pos_desc].count, (long)nbElems,
|
||||
pData->desc.desc[pos_desc].extent );
|
||||
nbElems = 1;
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count,
|
||||
totalDisp, pos_desc + pData->desc.desc[pos_desc].disp );
|
||||
pos_desc++;
|
||||
DUMP_STACK( pStack, stack_pos, pData->desc, "advance loops" );
|
||||
}
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pData->desc.desc[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].disp) ) {
|
||||
lastLength += pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
if( lastLength != 0 )
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength );
|
||||
lastDisp = totalDisp + pData->desc.desc[pos_desc].disp;
|
||||
lastLength = pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
|
||||
if( lastLength != 0 )
|
||||
SAVE_DESC( pElemDesc, lastDisp, lastLength );
|
||||
/* cleanup the stack */
|
||||
pTypeDesc->used = nbElems;
|
||||
return 0;
|
||||
}
|
478
src/datatype/dt_pack.c
Обычный файл
478
src/datatype/dt_pack.c
Обычный файл
@ -0,0 +1,478 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
||||
static int convertor_pack_general( convertor_t* pConvertor, struct iovec* out, unsigned int outCount )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int count_desc; /* the number of items already done in the actual pos_desc */
|
||||
int end_loop; /* last element in the actual loop */
|
||||
int type; /* type at current position */
|
||||
unsigned int advance; /* number of bytes that we should advance the buffer */
|
||||
int rc;
|
||||
long disp_desc = 0; /* compute displacement for truncated data */
|
||||
long disp; /* displacement at the beging of the last loop */
|
||||
dt_desc_t *pData = pConvertor->pDesc;
|
||||
dt_elem_desc_t* pElem;
|
||||
char* pOutput = pConvertor->pBaseBuf;
|
||||
int oCount = (pData->ub - pData->lb) * pConvertor->count;
|
||||
char* pInput = out[0].iov_base;
|
||||
int iCount = out[0].iov_len;
|
||||
|
||||
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
|
||||
out[0].iov_base, out[0].iov_len, outCount );
|
||||
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
||||
pos_desc = pStack->index;
|
||||
disp = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) pElem = pData->opt_desc.desc;
|
||||
else pElem = pData->desc.desc;
|
||||
|
||||
if( pos_desc == -1 ) {
|
||||
pos_desc = 0;
|
||||
count_desc = pElem[0].count;
|
||||
disp_desc = pElem[0].disp;
|
||||
} else {
|
||||
count_desc = pStack->count;
|
||||
if( pElem[pos_desc].type != DT_LOOP ) {
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
disp = pStack->disp;
|
||||
disp_desc = ( pElem[pos_desc].disp +
|
||||
(pElem[pos_desc].count - count_desc) * pElem[pos_desc].extent);
|
||||
}
|
||||
}
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
next_loop:
|
||||
end_loop = pStack->end_loop;
|
||||
while( pConvertor->stack_pos >= 0 ) {
|
||||
if( pos_desc == end_loop ) { /* end of the current loop */
|
||||
while( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
if( pConvertor->stack_pos == -1 )
|
||||
return 1; /* completed */
|
||||
}
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElem[pos_desc].extent;
|
||||
pos_desc++;
|
||||
disp = pStack->disp;
|
||||
count_desc = pElem[pos_desc].count;
|
||||
disp_desc = pElem[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElem[pos_desc].type == DT_LOOP ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, pElem[pos_desc].count,
|
||||
disp, pos_desc + pElem[pos_desc].disp + 1);
|
||||
pos_desc++;
|
||||
} while( pElem[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loops" );
|
||||
/* update the current state */
|
||||
count_desc = pElem[pos_desc].count;
|
||||
disp_desc = pElem[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElem[pos_desc].type;
|
||||
rc = pConvertor->pFunctions[type]( count_desc,
|
||||
pOutput + disp + disp_desc, oCount, pElem[pos_desc].extent,
|
||||
pInput, iCount, pElem[pos_desc].extent,
|
||||
&advance );
|
||||
if( rc <= 0 ) {
|
||||
printf( "trash in the input buffer\n" );
|
||||
return -1;
|
||||
}
|
||||
iCount -= advance; /* decrease the available space in the buffer */
|
||||
pInput += advance; /* increase the pointer to the buffer */
|
||||
pConvertor->bConverted += advance;
|
||||
if( rc != count_desc ) {
|
||||
/* not all data has been converted. Keep the state */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
count_desc - rc,
|
||||
disp + rc * pElem[pos_desc].extent,
|
||||
pos_desc );
|
||||
if( iCount != 0 )
|
||||
printf( "there is still room in the input buffer %d bytes\n", iCount );
|
||||
return 0;
|
||||
}
|
||||
pConvertor->converted += rc; /* number of elementd converted so far */
|
||||
pos_desc++; /* advance to the next data */
|
||||
count_desc = pElem[pos_desc].count;
|
||||
disp_desc = pElem[pos_desc].disp;
|
||||
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
|
||||
}
|
||||
|
||||
/* out of the loop: we have complete the data conversion or no more space
|
||||
* in the buffer.
|
||||
*/
|
||||
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
|
||||
|
||||
/* I complete an element, next step I should go to the next one */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem[pos_desc].count,
|
||||
disp, pos_desc );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int convertor_pack_homogeneous( convertor_t* pConv, struct iovec* iov, unsigned int out_size )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pDestBuf;
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pDestBuf = iov[0].iov_base;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrc = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
|
||||
|
||||
type = pConv->count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
MEMCPY( pDestBuf, pSrc, iov[0].iov_len );
|
||||
pConv->bConverted += iov[0].iov_len;
|
||||
} else {
|
||||
char* pSrcBuf = pConv->pBaseBuf + pData->true_lb;
|
||||
long extent = pData->ub - pData->lb;
|
||||
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
|
||||
MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
pConv->bConverted += type;
|
||||
}
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
}
|
||||
pStack = pConv->pStack;
|
||||
pStack->count = pConv->count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength + pLast->extent );
|
||||
i = 1;
|
||||
} else {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
i = 0;
|
||||
}
|
||||
pDestBuf += lastLength;
|
||||
lastLength = pLast->extent;
|
||||
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
pDestBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
pDestBuf += lastLength;
|
||||
pConv->bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
pConv->bConverted += lastLength;
|
||||
/* cleanup the stack */
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PRINT_MEMCPY( DST, SRC, LENGTH ) \
|
||||
{ \
|
||||
printf( "%5d: memcpy dst = %p src %p length %ld bytes (so far %d)[%d]\n", \
|
||||
__index++, (DST), (SRC), (long)(LENGTH), __sofar, __LINE__ ); \
|
||||
__sofar += (LENGTH); \
|
||||
}
|
||||
|
||||
int dt_unroll( dt_desc_t* pData, int count )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pDestBuf;
|
||||
int bConverted = 0, __index = 0, __sofar = 0;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pDestBuf = NULL;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrc = (char*)pData->true_lb;
|
||||
|
||||
type = count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
|
||||
bConverted += (pData->size * count);
|
||||
} else {
|
||||
char* pSrcBuf = (char*)pData->true_lb;
|
||||
long extent = pData->ub - pData->lb;
|
||||
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
bConverted += type;
|
||||
}
|
||||
return (bConverted == (pData->size * count));
|
||||
}
|
||||
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
|
||||
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
|
||||
i = 1;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
|
||||
i = 0;
|
||||
}
|
||||
lastLength = pLast->extent;
|
||||
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += lastLength;
|
||||
bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The pack routines should do 2 things:
|
||||
* - first if the provided iovec contains NULL pointers then they should provide
|
||||
* buffer space. If the data is contiguous the it should provide directly pointers
|
||||
* the the user space depending on the iov_len argument. If -1 then all the buffer
|
||||
* can be supplied in one time, if not several steps need to be executed, it should
|
||||
* provide the correct pointer every time. But if the user provide a buffer, then
|
||||
* some parts of the data should be packed inside this buffer, but we still should
|
||||
* able to have pointers to the user buf on the subsequents calls.
|
||||
* Return 0 if everything went OK and if there is still room before the complete
|
||||
* conversion of the data (need additional call with others input buffers )
|
||||
* 1 if everything went fine and the data was completly converted
|
||||
* -1 something wrong occurs.
|
||||
*/
|
||||
int convertor_pack( convertor_t* pConv, struct iovec* out, unsigned int out_size )
|
||||
{
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
int extent;
|
||||
|
||||
if( pConv->count == 0 ) return 1; /* nothing to do */
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
if( pData->size == (extent = (pData->ub - pData->lb)) ) {
|
||||
if( out[0].iov_base == NULL ) {
|
||||
out[0].iov_base = pConv->pBaseBuf + pData->true_lb;
|
||||
out[0].iov_len = pData->size * pConv->count;
|
||||
} else {
|
||||
/* contiguous data just memcpy the smallest data in the user buffer */
|
||||
out[0].iov_len = IMIN( out[0].iov_len, pData->size * pConv->count );
|
||||
MEMCPY( out[0].iov_base, pConv->pBaseBuf + pData->true_lb, out[0].iov_len);
|
||||
}
|
||||
pConv->bConverted += out[0].iov_len;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if( out[0].iov_base == NULL ) {
|
||||
out[0].iov_len = pConv->count * pData->size;
|
||||
out[0].iov_base = (void*)malloc( out[0].iov_len );
|
||||
pConv->freebuf = out[0].iov_base;
|
||||
}
|
||||
return convertor_progress( pConv, out, out_size );
|
||||
}
|
||||
|
||||
int convertor_init_for_send( convertor_t* pConv, unsigned int flags,
|
||||
dt_desc_t* dt, int count, void* pUserBuf )
|
||||
{
|
||||
dt_increase_ref( dt );
|
||||
pConv->pDesc = dt;
|
||||
pConv->flags = CONVERTOR_SEND;
|
||||
if( pConv->pStack != NULL ) free( pConv->pStack );
|
||||
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (dt->btypes[DT_LOOP] + 2) );
|
||||
pConv->stack_pos = 0;
|
||||
pConv->pStack[0].index = -1; /* fake entry for the first step */
|
||||
pConv->pStack[0].count = count; /* fake entry for the first step */
|
||||
pConv->pStack[0].disp = 0;
|
||||
/* first hre we should select which data representation will be used for
|
||||
* this operation: normal one or the optimized version ? */
|
||||
pConv->pStack[0].end_loop = dt->desc.used;
|
||||
pConv->pBaseBuf = pUserBuf;
|
||||
pConv->available_space = count * (dt->ub - dt->lb);
|
||||
pConv->count = count;
|
||||
pConv->pFunctions = copy_functions;
|
||||
pConv->converted = 0;
|
||||
pConv->bConverted = 0;
|
||||
if( (dt->flags & DT_FLAG_CONTIGUOUS) && (dt->size == (dt->ub - dt->lb)) )
|
||||
pConv->flags |= DT_FLAG_CONTIGUOUS;
|
||||
pConv->fAdvance = convertor_pack_homogeneous;
|
||||
if( pConv->freebuf != NULL ) {
|
||||
free( pConv->freebuf );
|
||||
pConv->freebuf = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
convertor_t* convertor_create( int remote_arch, int mode )
|
||||
{
|
||||
convertor_t* pConv = (convertor_t*)calloc( 1, sizeof(convertor_t) );
|
||||
|
||||
pConv->pStack = NULL;
|
||||
pConv->remoteArch = remote_arch;
|
||||
pConv->fAdvance = convertor_pack_homogeneous;
|
||||
return pConv;
|
||||
}
|
||||
|
||||
/* Actually we suppose that we can only do receiver side conversion */
|
||||
int convertor_get_packed_size( convertor_t* pConv, unsigned int* pSize )
|
||||
{
|
||||
if( dt_type_size( pConv->pDesc, pSize ) != 0 )
|
||||
return -1;
|
||||
*pSize = (*pSize) * pConv->count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int convertor_get_unpacked_size( convertor_t* pConv, unsigned int* pSize )
|
||||
{
|
||||
int i;
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
|
||||
if( pConv->count == 0 ) {
|
||||
*pSize = 0;
|
||||
return 0;
|
||||
}
|
||||
if( pConv->remoteArch == 0 ) { /* same architecture */
|
||||
*pSize = pData->size * pConv->count;
|
||||
return 0;
|
||||
}
|
||||
*pSize = 0;
|
||||
for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {
|
||||
if( pData->bdt_used & (1<<i) ) {
|
||||
/* TODO replace with the remote size */
|
||||
*pSize += (pData->btypes[i] * basicDatatypes[i].size);
|
||||
}
|
||||
}
|
||||
*pSize *= pConv->count;
|
||||
return 0;
|
||||
}
|
617
src/datatype/dt_unpack.c
Обычный файл
617
src/datatype/dt_unpack.c
Обычный файл
@ -0,0 +1,617 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
||||
void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name )
|
||||
{
|
||||
printf( "\nStack %p stack_pos %d name %s\n", pStack, stack_pos, name );
|
||||
for( ;stack_pos >= 0; stack_pos-- ) {
|
||||
printf( "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
|
||||
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
|
||||
if( pStack[stack_pos].index != -1 )
|
||||
printf( "[desc count %d disp %ld extent %d]\n",
|
||||
pDesc[pStack[stack_pos].index].count,
|
||||
pDesc[pStack[stack_pos].index].disp,
|
||||
pDesc[pStack[stack_pos].index].extent );
|
||||
else
|
||||
printf( "\n" );
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
|
||||
/*
|
||||
* Remember that the first item in the stack (ie. position 0) is the number
|
||||
* of times the datatype is involved in the operation (ie. the count argument
|
||||
* in the MPI_ call).
|
||||
*/
|
||||
/* Convert data from multiple input buffers (as received from the network layer)
|
||||
* to a contiguous output buffer with a predefined size.
|
||||
* Return 0 if everything went OK and if there is still room before the complete
|
||||
* conversion of the data (need additional call with others input buffers )
|
||||
* 1 if everything went fine and the data was completly converted
|
||||
* -1 something wrong occurs.
|
||||
*/
|
||||
static int convertor_unpack_general( convertor_t* pConvertor,
|
||||
struct iovec* pInputv,
|
||||
unsigned int inputCount )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int count_desc; /* the number of items already done in the actual pos_desc */
|
||||
int end_loop; /* last element in the actual loop */
|
||||
int type; /* type at current position */
|
||||
unsigned int advance; /* number of bytes that we should advance the buffer */
|
||||
int rc;
|
||||
long disp_desc = 0; /* compute displacement for truncated data */
|
||||
long disp; /* displacement at the beging of the last loop */
|
||||
dt_desc_t *pData = pConvertor->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
char* pOutput = pConvertor->pBaseBuf;
|
||||
int oCount = (pData->ub - pData->lb) * pConvertor->count;
|
||||
char* pInput = pInputv[0].iov_base;
|
||||
int iCount = pInputv[0].iov_len;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc;
|
||||
else pElems = pData->desc.desc;
|
||||
|
||||
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
|
||||
pInputv[0].iov_base, pInputv[0].iov_len, inputCount );
|
||||
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
||||
pos_desc = pStack->index;
|
||||
disp = 0;
|
||||
if( pos_desc == -1 ) {
|
||||
pos_desc = 0;
|
||||
count_desc = pElems[0].count;
|
||||
disp_desc = pElems[0].disp;
|
||||
} else {
|
||||
count_desc = pStack->count;
|
||||
if( pElems[pos_desc].type != DT_LOOP ) {
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
disp = pStack->disp;
|
||||
disp_desc = ( pElems[pos_desc].disp +
|
||||
(pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent);
|
||||
}
|
||||
}
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
next_loop:
|
||||
end_loop = pStack->end_loop;
|
||||
while( pConvertor->stack_pos >= 0 ) {
|
||||
if( pos_desc == end_loop ) { /* end of the current loop */
|
||||
while( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
if( pConvertor->stack_pos == -1 )
|
||||
return 1; /* completed */
|
||||
}
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
pos_desc++;
|
||||
disp = pStack->disp;
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, pElems[pos_desc].count,
|
||||
disp, pos_desc + pElems[pos_desc].disp + 1 );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
|
||||
/* update the current state */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
rc = pConvertor->pFunctions[type]( count_desc,
|
||||
pInput, iCount, pElems[pos_desc].extent,
|
||||
pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent,
|
||||
&advance );
|
||||
if( rc <= 0 ) {
|
||||
printf( "trash in the input buffer\n" );
|
||||
return -1;
|
||||
}
|
||||
iCount -= advance; /* decrease the available space in the buffer */
|
||||
pInput += advance; /* increase the pointer to the buffer */
|
||||
pConvertor->bConverted += advance;
|
||||
if( rc != count_desc ) {
|
||||
/* not all data has been converted. Keep the state */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, count_desc - rc,
|
||||
disp + rc * pElems[pos_desc].extent, pos_desc );
|
||||
if( iCount != 0 )
|
||||
printf( "there is still room in the input buffer %d bytes\n", iCount );
|
||||
return 0;
|
||||
}
|
||||
pConvertor->converted += rc; /* number of elementd converted so far */
|
||||
pos_desc++; /* advance to the next data */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
|
||||
}
|
||||
|
||||
/* out of the loop: we have complete the data conversion or no more space
|
||||
* in the buffer.
|
||||
*/
|
||||
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
|
||||
|
||||
/* I complete an element, next step I should go to the next one */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pElems[pos_desc].count, disp, pos_desc );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int convertor_unpack_homogeneous( convertor_t* pConv, struct iovec* iov, unsigned int out_size )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* counter for basic datatype with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pSrcBuf;
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pSrcBuf = iov[0].iov_base;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
|
||||
|
||||
if( pData->size == extent ) {
|
||||
/* contiguous data or basic datatype with count */
|
||||
MEMCPY( pDstBuf, pSrcBuf, iov[0].iov_len );
|
||||
pConv->bConverted += iov[0].iov_len;
|
||||
} else {
|
||||
type = iov[0].iov_len;
|
||||
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
|
||||
MEMCPY( pDstBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += pData->size;
|
||||
pDstBuf += extent;
|
||||
type -= pData->size;
|
||||
}
|
||||
pConv->bConverted += type;
|
||||
}
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
}
|
||||
|
||||
pStack = pConv->pStack;
|
||||
pStack->count = pConv->count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( lastLength == 0 ) {
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pSrcBuf += lastLength;
|
||||
}
|
||||
lastLength = pLast->extent;
|
||||
for( i = 0; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pSrcBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pSrcBuf += lastLength;
|
||||
pConv->bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pConv->bConverted += lastLength;
|
||||
|
||||
/* cleanup the stack */
|
||||
return 0;
|
||||
}
|
||||
|
||||
int convertor_unpack( convertor_t* pConvertor,
|
||||
struct iovec* pInputv,
|
||||
unsigned int inputCount )
|
||||
{
|
||||
dt_desc_t *pData = pConvertor->pDesc;
|
||||
char* pOutput = pConvertor->pBaseBuf;
|
||||
char* pInput = pInputv[0].iov_base;
|
||||
int rc;
|
||||
|
||||
if( pConvertor->count == 0 ) return 1; /* nothing to do */
|
||||
|
||||
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
if( pInputv[0].iov_base == NULL ) {
|
||||
rc = pConvertor->count * pData->size;
|
||||
if( pInputv[0].iov_len == 0 ) { /* give me the whole buffer */
|
||||
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb;
|
||||
pInputv[0].iov_len = rc;
|
||||
return 1;
|
||||
} else { /* what about the next chunk ? */
|
||||
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted;
|
||||
if( pInputv[0].iov_len > (rc - pConvertor->bConverted) )
|
||||
pInputv[0].iov_len = rc - pConvertor->bConverted;
|
||||
pConvertor->bConverted += pInputv[0].iov_len;
|
||||
return (pConvertor->bConverted == rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
if( (pInput >= pOutput) && (pInput < (pOutput + pConvertor->count * (pData->ub - pData->lb))) ) {
|
||||
return 1;
|
||||
}
|
||||
return convertor_progress( pConvertor, pInputv, inputCount );
|
||||
}
|
||||
|
||||
/* Return value:
|
||||
* 0 : nothing has been done
|
||||
* positive value: number of item converted.
|
||||
* negative value: -1 * number of items converted, less data provided than expected
|
||||
* and there are less data than the size on the remote host of the
|
||||
* basic datatype.
|
||||
*/
|
||||
#define COPY_TYPE( TYPENAME, TYPE ) \
|
||||
int copy_##TYPENAME( unsigned int count, \
|
||||
char* from, unsigned int from_len, long from_extent, \
|
||||
char* to, unsigned int to_len, long to_extent, \
|
||||
int* used ) \
|
||||
{ \
|
||||
int i, res = 1; \
|
||||
unsigned int remote_TYPE_size = sizeof(TYPE); /* TODO */ \
|
||||
\
|
||||
if( (remote_TYPE_size * count) > from_len ) { \
|
||||
count = from_len / remote_TYPE_size; \
|
||||
if( (count * remote_TYPE_size) != from_len ) { \
|
||||
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
|
||||
from_len - (count * remote_TYPE_size) ); \
|
||||
res = -1; \
|
||||
} \
|
||||
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
|
||||
#TYPE, count, from, from_len, to, to_len ); \
|
||||
} else \
|
||||
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
|
||||
#TYPE, count, from, from_len, to, to_len ); \
|
||||
\
|
||||
if( (from_extent == sizeof(TYPE)) && (to_extent == sizeof(TYPE)) ) { \
|
||||
MEMCPY( to, from, count * sizeof(TYPE) ); \
|
||||
} else { \
|
||||
for( i = 0; i < count; i++ ) { \
|
||||
MEMCPY( to, from, sizeof(TYPE) ); \
|
||||
to += to_extent; \
|
||||
from += from_extent; \
|
||||
} \
|
||||
} \
|
||||
*used = count * sizeof(TYPE) ; \
|
||||
return res * count; \
|
||||
}
|
||||
|
||||
COPY_TYPE( char, char );
|
||||
COPY_TYPE( short, short );
|
||||
COPY_TYPE( int, int );
|
||||
COPY_TYPE( float, float );
|
||||
COPY_TYPE( long, long );
|
||||
/*COPY_TYPE( double, double );*/
|
||||
COPY_TYPE( long_long, long long );
|
||||
COPY_TYPE( long_double, long double );
|
||||
COPY_TYPE( complex_float, complex_float_t );
|
||||
COPY_TYPE( complex_double, complex_double_t );
|
||||
|
||||
int copy_double( unsigned int count,
|
||||
char* from, unsigned int from_len, long from_extent,
|
||||
char* to, unsigned int to_len, long to_extent,
|
||||
int* used )
|
||||
{
|
||||
int i, res = 1;
|
||||
unsigned int remote_double_size = sizeof(double); /* TODO */
|
||||
|
||||
if( (remote_double_size * count) > from_len ) {
|
||||
count = from_len / remote_double_size;
|
||||
if( (count * remote_double_size) != from_len ) {
|
||||
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n",
|
||||
from_len - (count * remote_double_size) );
|
||||
res = -1;
|
||||
}
|
||||
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n",
|
||||
"double", count, from, from_len, to, to_len );
|
||||
} else
|
||||
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n",
|
||||
"double", count, from, from_len, to, to_len );
|
||||
|
||||
|
||||
if( (from_extent == sizeof(double)) && (to_extent == sizeof(double)) ) {
|
||||
MEMCPY( to, from, count * sizeof(double) );
|
||||
} else {
|
||||
for( i = 0; i < count; i++ ) {
|
||||
MEMCPY( to, from, sizeof(double) );
|
||||
to += to_extent;
|
||||
from += from_extent;
|
||||
}
|
||||
}
|
||||
*used = count * sizeof(double) ;
|
||||
return res * count;
|
||||
}
|
||||
|
||||
conversion_fct_t copy_functions[DT_MAX_PREDEFINED] = {
|
||||
(conversion_fct_t)NULL, /* DT_LOOP */
|
||||
(conversion_fct_t)NULL, /* DT_LB */
|
||||
(conversion_fct_t)NULL, /* DT_UB */
|
||||
(conversion_fct_t)NULL, /* DT_SPACE */
|
||||
(conversion_fct_t)copy_char, /* DT_CHAR */
|
||||
(conversion_fct_t)copy_char, /* DT_BYTE */
|
||||
(conversion_fct_t)copy_short, /* DT_SHORT */
|
||||
(conversion_fct_t)copy_int, /* DT_INT */
|
||||
(conversion_fct_t)copy_float, /* DT_FLOAT */
|
||||
(conversion_fct_t)copy_long, /* DT_LONG */
|
||||
(conversion_fct_t)copy_double, /* DT_DOUBLE */
|
||||
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
|
||||
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
|
||||
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
|
||||
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
|
||||
};
|
||||
|
||||
/* Should we supply buffers to the convertor or can we use directly
|
||||
* the user buffer ?
|
||||
*/
|
||||
int convertor_need_buffers( convertor_t* pConvertor )
|
||||
{
|
||||
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int convertor_init_for_recv( convertor_t* pConv, unsigned int flags,
|
||||
dt_desc_t* pData, int count, void* pUserBuf )
|
||||
{
|
||||
dt_increase_ref( pData );
|
||||
pConv->pDesc = pData;
|
||||
pConv->flags = CONVERTOR_RECV;
|
||||
if( pConv->pStack != NULL ) free( pConv->pStack );
|
||||
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (pData->btypes[DT_LOOP] + 2) );
|
||||
pConv->stack_pos = 0;
|
||||
pConv->pStack[0].index = -1; /* fake entry for the first step */
|
||||
pConv->pStack[0].count = count; /* fake entry for the first step */
|
||||
pConv->pStack[0].disp = 0;
|
||||
/* first we should decide which data representation will be used TODO */
|
||||
pConv->pStack[0].end_loop = pData->desc.used;
|
||||
pConv->pBaseBuf = pUserBuf;
|
||||
pConv->available_space = count * (pData->ub - pData->lb);
|
||||
pConv->count = count;
|
||||
pConv->pFunctions = copy_functions;
|
||||
pConv->converted = 0;
|
||||
pConv->bConverted = 0;
|
||||
if( (pData->flags & DT_FLAG_CONTIGUOUS) && (pData->size == (pData->ub - pData->lb)) )
|
||||
pConv->flags |= DT_FLAG_CONTIGUOUS;
|
||||
pConv->fAdvance = convertor_unpack_homogeneous;
|
||||
return 0;
|
||||
}
|
||||
|
||||
convertor_t* convertor_get_copy( convertor_t* pConvertor )
|
||||
{
|
||||
convertor_t* pConv = (convertor_t*)calloc( 1, sizeof(convertor_t) );
|
||||
MEMCPY( pConv, pConvertor, sizeof(convertor_t) );
|
||||
pConv->pStack = NULL;
|
||||
pConv->pDesc = NULL;
|
||||
pConv->count = 0;
|
||||
pConv->converted = 0;
|
||||
pConv->bConverted = 0;
|
||||
pConv->freebuf = NULL;
|
||||
return pConv;
|
||||
}
|
||||
|
||||
int convertor_destroy( convertor_t** ppConv )
|
||||
{
|
||||
if( (*ppConv) == NULL ) return 0;
|
||||
if( (*ppConv)->pStack != NULL ) free( (*ppConv)->pStack );
|
||||
if( (*ppConv)->pDesc != NULL ) dt_decrease_ref( (*ppConv)->pDesc );
|
||||
if( (*ppConv)->freebuf != NULL ) free( (*ppConv)->freebuf );
|
||||
free( (*ppConv) );
|
||||
*ppConv = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get the number of elements from the data associated with this convertor that can be
|
||||
* retrieved from a recevied buffer with the size iSize.
|
||||
* To spped-up this function you should use it with a iSize == to the modulo
|
||||
* of the original size and the size of the data.
|
||||
* This function should be called with a initialized clean convertor.
|
||||
* Return value:
|
||||
* positive = number of basic elements inside
|
||||
* negative = some error occurs
|
||||
*/
|
||||
int dt_get_element_count( dt_desc_t* pData, size_t iSize )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int end_loop; /* last element in the actual loop */
|
||||
int type; /* type at current position */
|
||||
int rc, nbElems = 0;
|
||||
int stack_pos = 0;
|
||||
|
||||
DUMP( "dt_count_elements( %p, %d )\n", pData, iSize );
|
||||
pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP] + 2) );
|
||||
pStack->count = 1;
|
||||
pStack->index = -1;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
next_loop:
|
||||
end_loop = pStack->end_loop;
|
||||
while( stack_pos >= 0 ) {
|
||||
if( pos_desc == end_loop ) { /* end of the current loop */
|
||||
while( --(pStack->count) == 0 ) { /* end of loop */
|
||||
stack_pos--;
|
||||
pStack--;
|
||||
if( stack_pos == -1 )
|
||||
return nbElems; /* completed */
|
||||
}
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pData->desc.desc[pos_desc].extent;
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count,
|
||||
0, pos_desc + pData->desc.desc[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pData->desc.desc[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
DUMP_STACK( pStack, stack_pos, pData->desc, "advance loops" );
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pData->desc.desc[pos_desc].type;
|
||||
rc = pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
|
||||
if( rc >= iSize ) {
|
||||
nbElems += iSize / basicDatatypes[type].size;
|
||||
break;
|
||||
}
|
||||
nbElems += pData->desc.desc[pos_desc].count;
|
||||
iSize -= rc;
|
||||
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
|
||||
/* cleanup the stack */
|
||||
return nbElems;
|
||||
}
|
||||
|
||||
int dt_copy_content_same_dt( dt_desc_t* pData, int count,
|
||||
char* pDestBuf, char* pSrcBuf )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int stack_pos = 0;
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
if( (pData->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) {
|
||||
/* basic datatype with count */
|
||||
MEMCPY( pDestBuf, pSrcBuf, pData->size * count );
|
||||
return 0;
|
||||
}
|
||||
|
||||
pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP]+1) );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
|
||||
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
|
||||
/* cleanup the stack */
|
||||
return 0;
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user