From 5b0c1354664226ffe18643afb489914704c2e206 Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Thu, 18 Mar 2004 18:06:33 +0000 Subject: [PATCH] New datatype engine. This commit was SVN r918. --- src/datatype/Makefile.am | 14 +- src/datatype/datatype.h | 901 +++++++++---------------------- src/datatype/datatype_internal.h | 79 +++ src/datatype/ddt_test.c | 603 +++++++++++++++++++++ src/datatype/dt_add.c | 188 +++++++ src/datatype/dt_create.c | 260 +++++++++ src/datatype/dt_create_array.c | 15 + src/datatype/dt_create_dup.c | 23 + src/datatype/dt_create_indexed.c | 90 +++ src/datatype/dt_create_struct.c | 42 ++ src/datatype/dt_create_vector.c | 64 +++ src/datatype/dt_destroy.c | 21 + src/datatype/dt_module.c | 71 +++ src/datatype/dt_old_limits.c | 48 ++ src/datatype/dt_optimize.c | 151 ++++++ src/datatype/dt_pack.c | 478 ++++++++++++++++ src/datatype/dt_unpack.c | 617 +++++++++++++++++++++ 17 files changed, 3004 insertions(+), 661 deletions(-) create mode 100644 src/datatype/datatype_internal.h create mode 100644 src/datatype/ddt_test.c create mode 100644 src/datatype/dt_add.c create mode 100644 src/datatype/dt_create.c create mode 100644 src/datatype/dt_create_array.c create mode 100644 src/datatype/dt_create_dup.c create mode 100644 src/datatype/dt_create_indexed.c create mode 100644 src/datatype/dt_create_struct.c create mode 100644 src/datatype/dt_create_vector.c create mode 100644 src/datatype/dt_destroy.c create mode 100644 src/datatype/dt_module.c create mode 100644 src/datatype/dt_old_limits.c create mode 100644 src/datatype/dt_optimize.c create mode 100644 src/datatype/dt_pack.c create mode 100644 src/datatype/dt_unpack.c diff --git a/src/datatype/Makefile.am b/src/datatype/Makefile.am index bc93b72333..1700dbe49c 100644 --- a/src/datatype/Makefile.am +++ b/src/datatype/Makefile.am @@ -9,19 +9,13 @@ noinst_LTLIBRARIES = libdatatype.la # Source code files -headers = \ - datatype.h +headers = datatype.h datatype_internal.h libdatatype_la_SOURCES = \ $(headers) \ - datatype.c \ - datatype_copy.c \ - datatype_crc32.c \ - datatype_create.c \ - datatype_delete.c \ - datatype_memcpy.c \ - datatype_pack.c \ - datatype_sum32.c + dt_add.c dt_create.c dt_create_array.c dt_create_dup.c dt_create_indexed.c \ + dt_create_struct.c dt_create_vector.c dt_destroy.c dt_module.c dt_old_limits.c \ + dt_optimize.c dt_pack.c dt_unpack.c # Conditionally install the header files diff --git a/src/datatype/datatype.h b/src/datatype/datatype.h index f9693d96c8..2200624cf1 100644 --- a/src/datatype/datatype.h +++ b/src/datatype/datatype.h @@ -1,664 +1,263 @@ -/* - * $HEADER$ - */ +/* -*- Mode: C; c-basic-offset:3 ; -*- */ -/** @file - * - * lam_datatype_t interface for LAM internal data type representation - * - * lam_datatype_t is a class which represents contiguous or - * non-contiguous datat together with constituent type-related - * information. It is the LAM's-eye view of MPI_Datatype. - */ - -#ifndef LAM_DATATYPE_H_INCLUDED -#define LAM_DATATYPE_H_INCLUDED 1 - -#include -#include -#include -#include -#include +#ifndef DATATYPE_H_HAS_BEEN_INCLUDED +#define DATATYPE_H_HAS_BEEN_INCLUDED #include "lam_config.h" #include "constants.h" #include "lfc/lam_object.h" #include "lfc/lam_hash_table.h" #include "types.h" - #include "mpi.h" - -/* fortran sizes and alignments ***************************************/ - -extern int lam_sizeof_f77_integer; -extern int lam_sizeof_f77_real; -extern int lam_sizeof_f77_dblprec; -extern int lam_sizeof_f77_complex; -extern int lam_sizeof_f77_dblcomplex; - -extern int lam_alignment_f77_integer; -extern int lam_alignment_f77_real; -extern int lam_alignment_f77_dblprec; -extern int lam_alignment_f77_complex; -extern int lam_alignment_f77_dblcomplex; - - -/* enums **************************************************************/ - -/** - * Datatype state flags +#define DT_LOOP 0x00 +#define DT_LB 0x01 +#define DT_UB 0x02 +#define DT_SPACE 0x03 +#define DT_CHAR 0x04 +#define DT_BYTE 0x05 +#define DT_SHORT 0x06 +#define DT_INT 0x07 +#define DT_FLOAT 0x08 +#define DT_LONG 0x09 +#define DT_DOUBLE 0x0A +#define DT_LONG_LONG 0x0B +#define DT_LONG_DOUBLE 0x0C +#define DT_COMPLEX_FLOAT 0x0D +#define DT_COMPLEX_DOUBLE 0x0E +#define DT_END_LOOP 0x0F +/* if there are more basic datatypes than the number of bytes in the int type + * the bdt_used field of the data description struct should be changed to long. */ -enum lam_datatype_state_t { - LAM_DATATYPE_STATE_COMMITTED = 1 << 0, - LAM_DATATYPE_STATE_CONTIGUOUS = 1 << 1, - LAM_DATATYPE_STATE_FORTRAN = 1 << 2, - LAM_DATATYPE_STATE_OPTIMIZED = 1 << 3, - LAM_DATATYPE_STATE_DONT_OPTIMIZE = 1 << 4, - LAM_DATATYPE_STATE_XDR = 1 << 5, - /* etc. */ +#define DT_MAX_PREDEFINED 0x10 + +/* flags for the datatypes. */ +#define DT_FLAG_DESTROYED 0x0001 /* user destroyed but some other layers still have a reference */ +#define DT_FLAG_COMMITED 0x0002 /* ready to be used for a send/recv operation */ +#define DT_FLAG_CONTIGUOUS 0x0004 /* contiguous datatype */ +#define DT_FLAG_OVERLAP 0x0008 /* datatype is unpropper for a recv operation */ +#define DT_FLAG_USER_LB 0x0010 /* has a user defined LB */ +#define DT_FLAG_USER_UB 0x0020 /* has a user defined UB */ +#define DT_FLAG_FOREVER 0x0040 /* cannot be removed: initial and predefined datatypes */ +#define DT_FLAG_IN_LOOP 0x0080 /* we are inside a loop */ +#define DT_FLAG_INITIAL 0x0100 /* one of the initial datatype */ +#define DT_FLAG_DATA 0x0200 /* data or control structure */ +#define DT_FLAG_BASIC (DT_FLAG_INITIAL | DT_FLAG_COMMITED | DT_FLAG_FOREVER | DT_FLAG_CONTIGUOUS) + +#define DT_INCREASE_STACK 32 + +/* the basic element. A data description is composed + * by a set of basic elements. + */ +typedef struct __dt_elem_desc { + unsigned short flags; /* flags for the record */ + unsigned short type; /* the basic data type id */ + unsigned int count; /* number of elements */ + long disp; /* displacement of the first element */ + unsigned int extent; /* extent of each element */ +} dt_elem_desc_t; + +typedef struct { + float r; + float i; +} complex_float_t; + +typedef struct { + double r; + double i; +} complex_double_t; + +/* The basic memory zone description. The idea is to be able to represent the + * data as a array of zones, thus allowing us to simply find when concatenating + * several data leads to merging contiguous zones of memory. + */ +typedef struct __dt_zone_desc { + int useless; +} dt_zone_desc_t; + +typedef struct __dt_struct_desc { + int length; /* the maximum number of elements in the description array */ + int used; /* the number of used elements in the description array */ + dt_elem_desc_t* desc; +} dt_type_desc_t; + +/* the data description. + */ +typedef struct __dt_desc { + lam_object_t super; + unsigned int size; /* total size in bytes of the memory used by the data if + * the data is put on a contiguous buffer */ + long true_lb; + long true_ub; /* the true ub of the data without user defined lb and ub */ + unsigned int align; /* data should be aligned to */ + long lb; /* lower bound in memory */ + long ub; /* upper bound in memory */ + unsigned short flags; /* the flags */ + unsigned short id; /* data id, normally the index in the data array. */ + unsigned int nbElems; /* total number of elements inside the datatype */ + unsigned int bdt_used; /* which basic datatypes are used in the data description */ + + /* Attribute fields */ + lam_hash_table_t *keyhash; + char name[MPI_MAX_OBJECT_NAME]; + + dt_type_desc_t desc; /* the data description */ + dt_type_desc_t opt_desc; /* short description of the data used when conversion is useless + * or in the send case (without conversion) */ + void* args; /* data description for the user */ + + /* basic elements count used to compute the size of the datatype for + * remote nodes */ + unsigned int btypes[DT_MAX_PREDEFINED]; +} dt_desc_t, lam_datatype_t; + +OBJ_CLASS_DECLARATION( lam_datatype_t ); + +extern dt_desc_t basicDatatypes[]; + +#if defined(__GNUC__) +#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a); }) +#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); }) +#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); }) +#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); }) +#else +static long LMAX( long a, long b ) { return ( a < b ? b : a ); } +static long LMIN( long a, long b ) { return ( a < b ? a : b ); } +static int IMAX( int a, int b ) { return ( a < b ? b : a ); } +static int IMIN( int a, int b ) { return ( a < b ? a : b ); } +#endif /* __GNU__ */ + +typedef struct __dt_stack { + int index; + int count; + int end_loop; + long disp; +} dt_stack_t; + +typedef struct __dt_convert { + char* buf; + unsigned int length; + dt_stack_t* pStack; + dt_desc_t* pDesc; +} dt_convert_t; + +int dt_load( void ); +int dt_unload( void ); +dt_desc_t* dt_create( int expectedSize ); +int dt_commit( dt_desc_t** ); +#define dt_free dt_destroy +int dt_free( dt_desc_t** ); +int dt_destroy( dt_desc_t** ); +void dt_dump( dt_desc_t* pData ); +void dt_dump_complete( dt_desc_t* pData ); +/* data creation functions */ +int dt_duplicate( dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_contiguous( size_t count, dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_vector( size_t count, int bLength, long stride, + dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_hvector( size_t count, int bLength, long stride, + dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_indexed( size_t count, int* pBlockLength, int* pDisp, + dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_hindexed( size_t count, int* pBlockLength, long* pDisp, + dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_indexed_block( size_t count, int bLength, int* pDisp, + dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_struct( size_t count, size_t* pBlockLength, long* pDisp, + dt_desc_t** pTypes, dt_desc_t** newType ); +int dt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType ); +int dt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts, + int order, dt_desc_t* oldType, dt_desc_t** newType ); +int dt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib, + int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType, + dt_desc_t** newType ); + +int dt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent ); + +int dt_type_lb( dt_desc_t* pData, long* disp ); +int dt_type_ub( dt_desc_t* pData, long* disp ); +int dt_type_size ( dt_desc_t* pData, int *size ); +int dt_type_extent( dt_desc_t* pData, long* extent ); + +int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew ); +int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent); +int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent); +int dt_get_element_count( dt_desc_t* datatype, size_t iSize ); +int dt_copy_content_same_dt( dt_desc_t* pData, int count, char* pDestBuf, char* pSrcBuf ); + +#define dt_increase_ref(PDT) OBJ_RETAIN( PDT ) +#define dt_decrease_ref(PDT) OBJ_RELEASE( PDT ) + +int dt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDesc ); + +#define REMOVE_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) ^ (FLAG) +#define SET_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) | (FLAG) +#define UNSET_FLAG( INT_VALUE, FLAG) (INT_VALUE) = (INT_VALUE) & (~(FLAG)) + +#define REMOVE_CONTIGUOUS_FLAG( INT_VALUE ) REMOVE_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS) +#define SET_CONTIGUOUS_FLAG( INT_VALUE ) SET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS) +#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) UNSET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS) + +/* flags for the datatypes */ + +typedef int (*conversion_fct_t)( unsigned int count, + void* from, unsigned int from_len, long from_extent, + void* to, unsigned int in_length, long to_extent, + unsigned int* used ); + +/* keep the last 16 bits free for data flags */ +#define CONVERTOR_USELESS 0x00010000 +#define CONVERTOR_RECV 0x00020000 +#define CONVERTOR_SEND 0x00040000 + +#define CONVERTOR_STATE_MASK 0xFF000000 +#define CONVERTOR_STATE_START 0x01000000 +#define CONVEROTR_STATE_COMPLETE 0x02000000 +#define CONVERTOR_STATE_ALLOC 0x04000000 + +typedef struct __struct_convertor convertor_t; +typedef int (*convertor_advance_fct_t)( convertor_t* pConvertor, + struct iovec* pInputv, + unsigned int inputCount ); + +/* and now the convertor stuff */ +struct __struct_convertor { + dt_desc_t* pDesc; + long remoteArch; + dt_stack_t* pStack; + /* the convertor functions pointer */ + /* the local stack for the actual conversion */ + int converted; /* the number of already converted elements */ + int bConverted; /* the size of already converted elements in bytes */ + unsigned int flags; + unsigned int count; + unsigned int stack_pos; + char* pBaseBuf; + unsigned int available_space; + void* freebuf; + convertor_advance_fct_t fAdvance; + conversion_fct_t* pFunctions; }; +extern conversion_fct_t copy_functions[DT_MAX_PREDEFINED]; + +/* some convertor flags */ +#define convertor_progress( PCONV, IOVEC, COUNT ) \ + (PCONV)->fAdvance( (PCONV), (IOVEC), (COUNT) ); + +/* and finally the convertor functions */ +convertor_t* convertor_create( int remote_arch, int mode ); +int convertor_init_for_send( convertor_t* pConv, unsigned int flags, + dt_desc_t* pData, int count, void* pUserBuf ); +int convertor_init_for_recv( convertor_t* pConv, unsigned int flags, + dt_desc_t* pData, int count, void* pUserBuf ); +convertor_t* convertor_get_copy( convertor_t* pConvertor ); +int convertor_need_buffers( convertor_t* pConvertor ); +int convertor_pack( convertor_t* pConv, struct iovec* in, unsigned int in_size ); +int convertor_unpack( convertor_t* pConv, struct iovec* out, unsigned int out_size ); +int convertor_destroy( convertor_t** ppConv ); +int convertor_get_packed_size( convertor_t* pConv, unsigned int* pSize ); +int convertor_get_unpacked_size( convertor_t* pConv, unsigned int* pSize ); + +#endif /* DATATYPE_H_HAS_BEEN_INCLUDED */ -enum { - LAM_DATATYPE_PACK = 0, - LAM_DATATYPE_UNPACK, - LAM_DATATYPE_PACK_COMPLETE = 0, - LAM_DATATYPE_PACK_INCOMPLETE, - TYPE_PACK_INCOMPLETE_VECTOR, - TYPE_PACK_INCOMPLETE_DATAVEC_REPEAT, - TYPE_PACK_INCOMPLETE_DATAVEC_ELEMENT, - TYPE_PACK_ERROR = -1 -}; - - -/** - * Enumeration of datatype creation functions - */ -enum lam_datatype_kind_t { - - LAM_DATATYPE_KIND_BASIC = 0, - - LAM_DATATYPE_KIND_CONTIG, - LAM_DATATYPE_KIND_DUP, - LAM_DATATYPE_KIND_HINDEXED, - LAM_DATATYPE_KIND_HVECTOR, - LAM_DATATYPE_KIND_INDEXED, - LAM_DATATYPE_KIND_LB, - LAM_DATATYPE_KIND_PACKED, - LAM_DATATYPE_KIND_STRUCT, - LAM_DATATYPE_KIND_UB, - LAM_DATATYPE_KIND_VECTOR, - - LAM_DATATYPE_KIND_CONTIG_FORTRAN, - LAM_DATATYPE_KIND_HINDEXED_FORTRAN, - LAM_DATATYPE_KIND_HVECTOR_FORTRAN, - LAM_DATATYPE_KIND_INDEXED_FORTRAN, - LAM_DATATYPE_KIND_STRUCT_FORTRAN, - LAM_DATATYPE_KIND_VECTOR_FORTRAN -}; - - -typedef enum lam_datatype_state_t lam_datatype_state_t; -typedef enum lam_datatype_kind_t lam_datatype_kind_t; - - -/* types **************************************************************/ - -typedef struct lam_datatype_t lam_datatype_t; -typedef struct lam_datavec_element_t lam_datavec_element_t; -typedef struct lam_datavec_t lam_datavec_t; -typedef struct lam_dataxdr_t lam_dataxdr_t; -typedef struct lam_pack_state_t lam_pack_state_t; -typedef struct lam_memcpy_state_t lam_memcpy_state_t; - -/** - * Function prototype for a generalized memcpy() - */ -typedef void *(lam_memcpy_fn_t) (void *restrict dst, - const void *restrict src, - size_t size, lam_memcpy_state_t *check); - - -/** - * Internal representation of MPI datatype - */ -struct lam_datatype_t { - - lam_object_t super; /**< object super class */ - char name[MPI_MAX_OBJECT_NAME]; /**< object name */ - int flags; /**< bit flags */ - - /* Attributes */ - - lam_hash_table_t *keyhash; - - /* cached information */ - - ssize_t lower_bound; - size_t extent; - size_t packed_size; /**< size in bytes, ignoring gaps */ - int nbasic; /**< number of basic elements */ - - /* optimized representation */ - - size_t datavec_size; /**< size of optimized representation */ - lam_datavec_t *datavec; /**< optimized representation (may be null) */ - - /* XDR representation */ - - size_t dataxdr_size; /**< size of XDR representation */ - lam_dataxdr_t *dataxdr; /**< XDR representation (may be null) */ - - /* full representation (c.f. MPI_Type_create_struct) */ - - struct { - lam_datatype_kind_t c_kind; /**< creation function */ - int c_count; /**< number of blocks */ - int *c_blocklengths; /**< number of elements in each block */ - MPI_Aint *c_offset; /**< stride/displacement as appropriate */ - lam_datatype_t **c_types; /**< array of types (array) */ - } creator; -}; - -OBJ_CLASS_DECLARATION(lam_datatype_t); - - -/** - * An optimized representation of noncontiguous data used by packing - * routines - */ -struct lam_datavec_t { - size_t nrepeat; - ssize_t repeat_offset; - size_t nelement; - lam_datavec_element_t *element; -}; - - -/** - * An element of a data type in optimized form - */ -struct lam_datavec_element_t { - size_t size; /**< size in bytes of element */ - ssize_t offset; /**< offset from start of data type */ - ssize_t seq_offset; /**< offset from start of packed data type */ -}; - - -/** - * XDR representation of a datatype - */ -struct lam_dataxdr_element_t { - /* to be done */ - void *xdrs; /**< XDR stream */ -}; - - -/** - * State of incremental memcpy with checksum or CRC - */ -struct lam_memcpy_state_t { - size_t size; /**< total size in bytes of the object being checksummed / CRCed */ - size_t partial_size; /**< size of non- uint32_t to be carried over to next call */ - uint32_t partial_int; /**< value of non- uint32_t to be carried over to next call */ - uint32_t sum; /**< current value of the CRC or checksum */ - bool first_call; /**< is this the first call for this checksum/CRC? */ -}; - - -/** - * Pack state - * - * Structure to store the state of an incremental pack/unpack of a - * datatype. - */ -struct lam_pack_state_t { - size_t type_index; /**< current index of datatype */ - size_t repeat_index; /**< current index of datavec repeat */ - size_t element_index; /**< current index of datavec element */ - size_t datavec_offset; /**< current offset into datavec element */ - size_t packed_offset; /**< current offset into packed buffer */ -}; - - -/* interface **********************************************************/ - -BEGIN_C_DECLS - -/** - * Test 32-bit alignment of an address - * - * @param address An address - * @return true if the address is 32-bit aligned - */ -static inline bool lam_aligned32(void *addr) -{ - if (((uintptr_t) addr & (uintptr_t) 3) == (uintptr_t) 0) { - return true; - } else { - return false; - } -} - - -/** - * Test 64-bit alignment of an address - * - * @param address An address - * @return true if the address is 64-bit aligned - */ -static inline bool lam_aligned64(void *addr) -{ - if (((uintptr_t) addr & (uintptr_t) 7) == (uintptr_t) 0) { - return true; - } else { - return false; - } -} - - -/** - * Return a 32-bit checksum of (the contents of) an array of data - * types - * - * @param addr Data type array - * @param count Size of array - * @param datatype Datatype descriptor - * @return Checksum - */ -uint32_t lam_datatype_sum32(const void *addr, - size_t count, - lam_datatype_t *datatype); - - -/** - * Copy (the contents of) an array of data types - * - * @param dst Output data type array - * @param src Input data type array - * @param count Size of array - * @param datatype Datatype descriptor - * @param check Pointer to checksum or CRC - * @return 0 on success, -1 on error - */ -int lam_datatype_copy(void *dst, - const void *src, - size_t count, - lam_datatype_t *datatype, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); - -/** - * Copy (the contents of) an array of data types, and convert to - * another datatype - * - * @param dst Output data type array - * @param dst_count Size of output array - * @param dst_datatype Output datatype descriptor - * @param src Input data type array - * @param src_count Size of input array - * @param src_datatype Input datatype descriptor - * @param checksum Checksum - * @return 0 on success, -1 on error - */ -int lam_datatype_convert(void *dst, - lam_datatype_t *dst_datatype, - size_t dst_count, - const void *src, - lam_datatype_t *src_datatype, - size_t src_count, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); - - -/** - * Initialize pack state structure - * - * @param state Pointer to state structure - * @return LAM return code - */ -static inline int lam_pack_state_init(lam_pack_state_t *state) -{ - assert(state); - - state->type_index = 0; - state->repeat_index = 0; - state->element_index = 0; - state->datavec_offset = 0; - state->packed_offset = 0; -} - - -/** - * Incrementally pack or unpack a buffer to/from an array of - * datatypes. - * - * DO NOT USE THIS FUNCTION DIRECTLY: lam_datatype_pack or - * lam_datatype_unpack instead. - * - * @param direction 0 for pack , non-zero for unpack - * @param state current state of the incremental pack/unpack - * @param typebuf array of types - * @param ntype size of type array - * @param buf buffer to pack into/unpack from - * @param bufsize size of buffer - * @param datatype type descriptor - * @param memcpy_fn pointer to memcpy function - * @param check pointer to checksum - * @return 0 complete, non-zero otherwise - * - * Incrementally copy data type arrays to/from a packed buffer by - * iterating over the type and type_map until we finish or run out of - * room. - * - * The state (all members) should be initialized to 0 before the first - * call. - */ -int lam_datatype_packer(lam_pack_state_t *state, - void *buf, - size_t bufsize, - void *typebuf, - size_t ntype, - lam_datatype_t *datatype, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check, - int pack_direction); - - -/** - * Incrementally pack a buffer from an array of datatypes. - * - * The arguments for this function are the same as for - * lam_datatype_packer except that the last argument (pack_direction) - * is not required. - */ -static inline int lam_datatype_pack(lam_pack_state_t *state, - void *buf, - size_t bufsize, - const void *typebuf, - size_t ntype, - lam_datatype_t *datatype, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check) -{ - return lam_datatype_packer(state, buf, bufsize, (void *) typebuf, - ntype, datatype, memcpy_fn, check, - LAM_DATATYPE_PACK); -} - - -/** - * Incrementally unpack a buffer to an array of datatypes. - * - * The arguments for this function are the same as for - * lam_datatype_packer except that the last argument (pack_direction) - * is not required. - */ -static inline int lam_datatype_unpack(lam_pack_state_t *state, - const void *buf, - size_t bufsize, - void *typebuf, - size_t ntype, - lam_datatype_t *datatype, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check) -{ - return lam_datatype_packer(state, (void *) buf, bufsize, typebuf, - ntype, datatype, memcpy_fn, check, - LAM_DATATYPE_UNPACK); -} - - -/** - * Incrementally generate an iovec for gathering from an array of - * datatypes - * - * @param state current state of the incremental pack/unpack - * @param base_addr base address for iovec offsets - * @param vec iovec buffer - * @param vec_count maximum length of iovec buffer - * @param max_bytes maximum bytes addressed by iovec - * @param buf buffer to pack into/unpack from - * @param bufsize size of buffer - * @param typebuf array of types - * @param ntype size of type array - * @param type type descriptor - * @return 0 if complete, non-zero otherwise - * - * Incrementally traverse an array of datatypes and generate an iovec - * of at most length vec_count and addressing at most max_bytes. This - * can be used to do a (partial) RDMA gather of the datatype array. - * - * The state (all members) should be initialized to 0 before the first - * call. - */ -int lam_datatype_gather_iovec(lam_pack_state_t *state, - void *base_addr, - struct iovec *vec, - size_t vec_count, - size_t max_bytes, - const void *typebuf, - size_t ntype, - lam_datatype_t *datatype, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *); - -/** - * Incrementally generate an iovec for scattering from a packed array - * of datatypes - * - * @param state current state of the incremental pack/unpack - * @param base_addr base address for iovec offsets - * @param vec iovec buffer - * @param vec_count maximum length of iovec buffer - * @param max_bytes maximum bytes addressed by iovec - * @param buf packed buffer - * @param bufsize size of buffer - * @param typebuf array of types - * @param ntype size of type array - * @param type type descriptor - * @return 0 if complete, non-zero otherwise - * - * Incrementally copy data type arrays to/from a packed buffer. by - * iterating over the type and type_map until we finish or run out of - * room. - * - * Incrementally traverse a packed array of datatypes and generate an - * iovec of at most length vec_count and addressing at most max_bytes. - * This can be used to do a (partial) RDMA scatter of the datatype - * array. - * - * The state (all members) should be initialized to 0 before the first - * call. - */ -int lam_datatype_scatter_iovec(lam_pack_state_t *state, - void *base_addr, - struct iovec *vec, - size_t vec_count, - size_t max_bytes, - const void *buf, - size_t bufsize, - lam_datatype_t *datatype, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); - - - - - -/* - * incremental memcpy with checksum / CRC functions - */ - - -/** - * initialize the state for an incremental memcpy with checksum / CRC - * - * @param state pointer to state object for the current sequence of copies - * @param sum_size the length of the entire buffer to be checksummed - */ -static inline void -lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size) -{ - state->size = sum_size; - state->first_call = true; -} - - -/** - * Copy data from one buffer to another - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param check unused - * @return the original value of dst - */ -static inline void *lam_memcpy(void *dst, const void *src, size_t size, - void *check) -{ - return memcpy(dst, src, size); -} - - -/** - * An alternative version of memcpy that may out-perform the system - * version on some (silly) systems. - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param state unused - * @return the original value of dst - */ -void *lam_memcpy_alt(void *dst, const void *src, size_t size, - lam_memcpy_state_t *state); - - -/** - * Generate a 32-bit CRC for a buffer - * - * @param buffer Data buffer - * @param size Size of buffer - * @param initial_crc Initial value of the CRC register - * @return The CRC - * - * Generate a 32-bit for a data buffer starting from a given CRC - * value. - */ -uint32_t lam_crc32(const void *buffer, size_t size, - uint32_t initial_crc); - - -/** - * Generate a 32-bit checksum for a buffer - * - * @param buffer Data buffer - * @param size Size of buffer - * @return The CRC - * - * Generate a 32-bit for a data buffer starting from a given CRC - * value. - */ -uint32_t lam_sum32(const void *buffer, size_t size); - - -/** - * Copy data from one buffer to another and calculate a 32-bit CRC - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param state pointer to a memcpy with checksum/CRC state structure - * @return the original value of dst - * - * This handles cumulative CRCs for for arbitrary lengths and address - * alignments as best as it can. The initial contents of state->sum is - * used as the starting value of the CRC. The final CRC is placed - * back in state->sum. - */ -void *lam_memcpy_crc32(void *dst, - const void *src, - size_t size, - lam_memcpy_state_t *check); - - -/** - * Copy data from one buffer to another and calculate a 32-bit checksum - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param state pointer to a memcpy with checksum/CRC state structure - * @return the original value of dst - * - * This handles cumulative checksumming for arbitrary lengths and - * address alignments as best as it can; the contents of - * lastPartialLong and lastPartialLength are updated to reflected the - * last partial word's value and length (in bytes) -- this should - * allow proper handling of checksumming contiguous or noncontiguous - * buffers via multiple calls of bcopy_csum() - Mitch - */ -void *lam_memcpy_sum32(void *dst, - const void *src, - size_t size, - lam_memcpy_state_t *check); - - -/** - * Copy data from one buffer to another and calculate a 32-bit checksum - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param state pointer to a memcpy with checksum/CRC state structure - * @return the original value of dst - */ -void *lam_memcpy_sum64(void *dst, - const void *src, - size_t size, - lam_memcpy_state_t *check); - - -/** - * Create a LAM/MPI datatype - * - * @param combiner integer identifying the kind of MPI create function - * @param ninteger number of integers passed to the create function - * @param integer array of integers passed to the create function - * @param naddress number of addresses passed to the create function - * @param address array of addresses passed to the create function - * @param ntype number of data types passed to the create function - * @param type array of data types passed to the create function - * @param newtype pointer to address of new type - * @return LAM_SUCCESS on successful creation, LAM_ERROR otherwise - * - * This is the central location for creation of data types in LAM/MPI. - * All MPI_Type_create functions rely upon this to do the actual type - * creation. - */ -int lam_datatype_create(int combiner, - int nintegers, - int integers[], - int naddresses, - ssize_t addresses[], - int ntypes, - lam_datatype_t *types[], lam_datatype_t **newtype); - - -/** - * Delete a LAM/MPI datatype (actually, just mark it for deletion) - * - * @param type datatype - * @return LAM_SUCCESS on success, LAM_ERROR otherwise - * - * This is the central location for creation of data types in LAM/MPI. - * All MPI_Type_create functions rely upon this to do the actual type - * creation. - */ -int lam_datatype_delete(lam_datatype_t *type); - -END_C_DECLS - -#endif /* LAM_DATATYPE_H_INCLUDED */ diff --git a/src/datatype/datatype_internal.h b/src/datatype/datatype_internal.h new file mode 100644 index 0000000000..c0a4563370 --- /dev/null +++ b/src/datatype/datatype_internal.h @@ -0,0 +1,79 @@ +#ifndef DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED +#define DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED + +#if defined(VERBOSE) +# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) \ + dump_stack( (PSTACK), (STACK_POS), (PDESC), (NAME) ) +# if defined(__GNUC__) +# define DUMP(ARGS...) printf(ARGS) +# else +# if defined(ACCEPT_C99) +# define DUMP( ARGS... ) printf(__VA_ARGS__) +# else +# define DUMP printf +# endif /* ACCEPT_C99 */ +# endif /* __GNUC__ */ +#else +# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) +# if defined(__GNUC__) +# define DUMP(ARGS...) +# else +# if defined(ACCEPT_C99) +# define DUMP(ARGS...) +# else + static void DUMP() { /* empty hopefully removed by the compiler */ } +# endif /* ACCEPT_C99 */ +# endif /* __GNUC__ */ +#endif /* VERBOSE */ + +extern void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name ); +#define SAVE_STACK( PSTACK, INDEX, COUNT, DISP, END_LOOP) \ +do { \ + (PSTACK)->index = (INDEX); \ + (PSTACK)->count = (COUNT); \ + (PSTACK)->disp = (DISP); \ + (PSTACK)->end_loop = (END_LOOP); \ +} while(0) + +#define PUSH_STACK( PSTACK, STACK_POS, INDEX, COUNT, DISP, END_LOOP) \ +do { \ + dt_stack_t* pTempStack = (PSTACK) + 1; \ + SAVE_STACK( pTempStack, (INDEX), (COUNT), (DISP), (END_LOOP) ); \ + (STACK_POS)++; \ + (PSTACK) = pTempStack; \ +} while(0) + +#define MEMCPY( DST, SRC, BLENGTH ) memcpy( (DST), (SRC), (BLENGTH) ) + +#ifdef USELESS +#define MEMCPY_LIMIT 1 + +#define MEMCPY( DST, SRC, BLENGTH ) \ +do { \ + if( (BLENGTH) < (MEMCPY_LIMIT) ) { \ + long mask = sizeof(int) - 1; \ + char *dst = (char*)(DST), *src = (char*)(SRC); \ + int i; \ + if( ((long)(DST) & mask) == ((long)(SRC) & mask) ) { \ + int *idst = (int*)((long)(DST) & (~mask)); \ + int *isrc = (int*)((long)(SRC) & (~mask)); \ + for( i = 0; i < ((long)(DST) & mask); i++ ) { \ + *dst = *src; dst++; src++; \ + } \ + if( ((char*)idst) != dst ) { \ + idst++; isrc++; \ + } \ + for( i = 0; i < ((BLENGTH) >> 2); i++ ) { \ + *idst = *isrc; idst++; isrc++; \ + } \ + } else { \ + for( i = 0; i < (BLENGTH); i++ ) { \ + *dst = *src; dst++; src++; \ + } \ + } \ + } else \ + memcpy( (DST), (SRC), (BLENGTH) ); \ +} while(0) +#endif /* USELESS */ + +#endif /* DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */ diff --git a/src/datatype/ddt_test.c b/src/datatype/ddt_test.c new file mode 100644 index 0000000000..6becd4372d --- /dev/null +++ b/src/datatype/ddt_test.c @@ -0,0 +1,603 @@ +#include "ddt.h" +#include +#include +#include +#include + +#define TIMER_DATA_TYPE struct timeval +#define GET_TIME(TV) gettimeofday( &(TV), NULL ) +#define ELAPSED_TIME(TSTART, TEND) (((TEND).tv_sec - (TSTART).tv_sec) * 1000000 + ((TEND).tv_usec - (TSTART).tv_usec)) + +int mpich_typeub( void ) +{ + int errs = 0; + long extent, lb, extent1, extent2, extent3; + long displ[2]; + int blens[2]; + dt_desc_t *type1, *type2, *type3, *types[2]; + + dt_create_vector( 2, 1, 4, &(basicDatatypes[DT_INT]), &type1 ); + dt_commit( &type1 ); + dt_get_extent( type1, &lb, &extent ); + extent1 = 5 * sizeof(int); + if (extent != extent1) { + printf("EXTENT 1 %ld != %ld\n",extent,extent1); + errs++; + printf("extent(type1)=%ld\n",(long)extent); + } + + blens[0]=1; + blens[1]=1; + displ[0]=0; + displ[1]=sizeof(int)*4; + types[0]=type1; + types[1]=&(basicDatatypes[DT_UB]); + extent2 = displ[1]; + + /* using MPI_UB and Type_struct, monkey with the extent, making it 16 + */ + dt_create_struct( 2, blens, displ, types, &type2 ); + dt_commit( &type2 ); + dt_get_extent( type2, &lb, &extent ); + if (extent != extent2) { + printf("EXTENT 2 %ld != %ld\n",extent,extent2); + errs++; + printf("extent(type2)=%ld\n",(long)extent); + } + + /* monkey with the extent again, making it 4 + * ===> MPICH gives 4 + * ===> MPIF gives 16, the old extent + */ + displ[1]=sizeof(int); + types[0]=type2; + types[1]=&(basicDatatypes[DT_UB]); + extent3 = extent2; + + dt_create_struct( 2, blens, displ, types, &type3 ); + dt_commit( &type3 ); + + dt_get_extent( type3, &lb, &extent ); + if (extent != extent3) { + printf("EXTENT 3 %ld != %ld\n",extent,extent3); + errs++; + printf("extent(type3)=%ld\n",(long)extent); + } + + dt_free( &type1 ); + dt_free( &type2 ); + dt_free( &type3 ); + return errs; +} + +int mpich_typeub2( void ) +{ + int blocklen[3], err = 0, sz1, sz2, sz3; + long disp[3], lb, ub, ex1, ex2, ex3; + dt_desc_t *types[3], *dt1, *dt2, *dt3; + + blocklen[0] = 1; + blocklen[1] = 1; + blocklen[2] = 1; + disp[0] = -3; + disp[1] = 0; + disp[2] = 6; + types[0] = &(basicDatatypes[DT_LB]); + types[1] = &(basicDatatypes[DT_INT]); + types[2] = &(basicDatatypes[DT_UB]); + + dt_create_struct(3,blocklen,disp, types,&dt1); + dt_commit(&dt1); + + dt_type_lb(dt1, &lb); dt_type_ub(dt1, &ub); + dt_type_extent(dt1,&ex1); dt_type_size(dt1,&sz1); + + /* Values should be lb = -3, ub = 6 extent 9; size depends on implementation */ + if (lb != -3 || ub != 6 || ex1 != 9) { + printf("Example 3.26 type1 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex1, sz1); + err++; + } + else + printf("Example 3.26 type1 correct\n" ); + + dt_create_contiguous(2,dt1,&dt2); + dt_type_lb(dt2, &lb); dt_type_ub(dt2, &ub); + dt_type_extent(dt2,&ex2); dt_type_size(dt2,&sz2); + /* Values should be lb = -3, ub = 15, extent = 18, size depends on implementation */ + if (lb != -3 || ub != 15 || ex2 != 18) { + printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8); + printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex2, sz2); + err++; + } + else + printf("Example 3.26 type1 correct\n" ); + + dt_create_contiguous(2,dt1,&dt2); + dt_type_lb(dt2, &lb); dt_type_ub(dt2, &ub); + dt_type_extent(dt2,&ex2); dt_type_size(dt2,&sz2); + /* Values should be lb = -3, ub = 15, extent = 18, size depends on implementation */ + if (lb != -3 || ub != 15 || ex2 != 18) { + printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8); + printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex2, sz2); + err++; + } + else + printf( "Example 3.26 type2 correct\n" ); + + types[0]=dt1; types[1]=dt1; + blocklen[0]=1; blocklen[1]=1; + disp[0]=0; disp[1]=ex1; + + dt_create_struct(2, blocklen, disp, types, &dt3); + dt_commit(&dt3); + + dt_type_lb(dt3, &lb); dt_type_ub(dt3, &ub); + dt_type_extent(dt3,&ex3); dt_type_size(dt3,&sz3); + /* Another way to express type2 */ + if (lb != -3 || ub != 15 || ex3 != 18) { + printf("type3 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8); + printf("type3 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex3, sz2); + err++; + } + else + printf( "type3 correct\n" ); + + dt_free( &dt1 ); + dt_free( &dt2 ); + dt_free( &dt3 ); + return err; +} + +int mpich_typeub3( void ) +{ + int blocklen[2], sz, err = 0, idisp[3]; + long disp[3], lb, ub, ex; + dt_desc_t *types[3], *dt1, *dt2, *dt3, *dt4, *dt5; + + /* Create a datatype with explicit LB and UB */ + blocklen[0] = 1; + blocklen[1] = 1; + blocklen[2] = 1; + disp[0] = -3; + disp[1] = 0; + disp[2] = 6; + types[0] = &(basicDatatypes[DT_LB]); + types[1] = &(basicDatatypes[DT_INT]); + types[2] = &(basicDatatypes[DT_UB]); + + /* Generate samples for contiguous, hindexed, hvector, indexed, and vector (struct and contiguous tested in typeub2) */ + dt_create_struct(3,blocklen,disp, types,&dt1); + dt_commit(&dt1); + +/* This type is the same as in typeub2, and is tested there */ + + types[0]=dt1; types[1]=dt1; + blocklen[0]=1; blocklen[1]=1; + disp[0]=-4; disp[1]=7; + idisp[0]=-4; idisp[1]=7; + + dt_create_hindexed( 2, blocklen, disp, dt1, &dt2 ); + dt_commit( &dt2 ); + + dt_type_lb( dt2, &lb ); dt_type_ub( dt2, &ub ); + dt_type_extent( dt2, &ex ); dt_type_size( dt2, &sz ); + + if (lb != -7 || ub != 13 || ex != 20) { + printf("hindexed lb %d ub %d extent %d size %d\n", (int)-7, (int)13, (int)20, sz); + printf("hindexed lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz); + err++; + } + else + printf( "hindexed ok\n" ); + + dt_create_indexed( 2, blocklen, idisp, dt1, &dt3 ); + dt_commit( &dt3 ); + + dt_type_lb( dt3, &lb ); dt_type_ub( dt3, &ub ); + dt_type_extent( dt3, &ex ); dt_type_size( dt3, &sz ); + + if (lb != -39 || ub != 69 || ex != 108) { + printf("indexed lb %d ub %d extent %d size %d\n", (int)-39, (int)69, (int)108, sz); + printf("indexed lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz); + err++; + } + else + printf( "indexed ok\n" ); + + dt_create_hvector( 2, 1, 14, dt1, &dt4 ); + dt_commit( &dt4 ); + + dt_type_lb( dt4, &lb ); dt_type_ub( dt4, &ub ); + dt_type_extent( dt4, &ex ); dt_type_size( dt4, &sz ); + + if (lb != -3 || ub != 20 || ex != 23) { + printf("hvector lb %d ub %d extent %d size %d\n", (int)-3, (int)20, (int)23, sz); + printf("hvector lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz); + err++; + } + else + printf( "hvector ok\n" ); + + dt_create_vector( 2, 1, 14, dt1, &dt5 ); + dt_commit( &dt5 ); + + dt_type_lb( dt5, &lb ); dt_type_ub( dt5, &ub ); + dt_type_extent( dt5, &ex ); dt_type_size( dt5, &sz ); + + + if (lb != -3 || ub != 132 || ex != 135) { + printf("vector lb %d ub %d extent %d size %d\n", (int)-3, (int)132, (int)135, sz); + printf("vector lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz); + err++; + } + else + printf( "vector ok\n" ); + + dt_free( &dt1 ); + dt_free( &dt2 ); + dt_free( &dt3 ); + dt_free( &dt4 ); + dt_free( &dt5 ); + return err; +} + +void print_double_mat( size_t N, double* mat ) +{ + int i, j; + double* pMat; + + for( i = 0; i < N; i++ ) { + printf( "(%4d) :", i * N * sizeof(double) ); + pMat = mat + i * N; + for( j = 0; j < N; j++ ) { + printf( "%5.1f ", *pMat ); + pMat++; + } + printf( "\n" ); + } +} + +int init_random_upper_matrix( size_t N, double* mat ) +{ + int i, j; + + srand( time(NULL) ); + for( i = 0; i < N; i++ ) + for( j = i; j < N; j++ ) { + *mat = (double)random(); + mat++; + } + return 0; +} + +int check_diag_matrix( size_t N, double* mat1, double* mat2 ) +{ + int i, j; + + for( i = 0; i < N; i++ ) { + mat1 += i; + mat2 += i; + for( j = i; j < N; j++ ) { + if( *mat1 != *mat2 ) { + printf( "error in position (%d, %d) expect %f and find %f\n", + i, j, *mat1, *mat2 ); + return -1; + } + mat1++; mat2++; + } + } + return 0; +} + +dt_desc_t* upper_matrix( size_t mat_size ) +{ + int *disp, i; + size_t *blocklen; + dt_desc_t* upper; + + disp = (int*)malloc( sizeof(int) * mat_size ); + blocklen = (size_t*)malloc( sizeof(size_t) * mat_size ); + + for( i = 0; i < mat_size; i++ ) { + disp[i] = i * mat_size + i; + blocklen[i] = mat_size - i; + } + + dt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]), + &upper ); + free( disp ); + free( blocklen ); + return upper; +} + +dt_desc_t* lower_matrix( size_t mat_size ) +{ + int *disp, i; + size_t *blocklen; + dt_desc_t* upper; + + disp = (int*)malloc( sizeof(int) * mat_size ); + blocklen = (size_t*)malloc( sizeof(size_t) * mat_size ); + + for( i = 0; i < mat_size; i++ ) { + disp[i] = i * mat_size; + blocklen[i] = i; + } + + dt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]), + &upper ); + free( disp ); + free( blocklen ); + return upper; +} + +extern long conversion_elapsed; + +int test_upper( size_t length ) +{ + double *mat1, *mat2, *inbuf; + dt_desc_t *pdt, *pdt1; + convertor_t * pConv; + char *ptr; + int i, j, split_chunk, total_length, rc; + struct iovec a; + TIMER_DATA_TYPE start, end; + long total_time; + + printf( "test upper matrix\n" ); + pdt = upper_matrix( length ); + pdt1 = lower_matrix( length ); + /*dt_dump( pdt );*/ + + mat1 = malloc( length * length * sizeof(double) ); + init_random_upper_matrix( length, mat1 ); + mat2 = calloc( length * length, sizeof(double) ); + + total_length = length * (length + 1) / 2 * sizeof(double); + inbuf = (double*)malloc( total_length ); + ptr = (char*)inbuf; + /* copy upper matrix in the array simulating the input buffer */ + for( i = 0; i < length; i++ ) + for( j = i; j < length; j++ ) { + *inbuf = mat1[i * length + j]; + inbuf++; + } + inbuf = (double*)ptr; + pConv = convertor_create( 0, 0 ); + convertor_init_for_recv( pConv, 0, pdt, 1, mat2 ); + +/* test the automatic destruction pf the data */ + dt_destroy( &pdt ); + dt_destroy( &pdt1 ); + + GET_TIME( start ); + split_chunk = (length + 1) * sizeof(double); +/* split_chunk = (total_length + 1) * sizeof(double); */ + for( i = total_length; i > 0; ) { + if( i < split_chunk ) split_chunk = i; + a.iov_base = ptr; + a.iov_len = split_chunk; + convertor_unpack( pConv, &a, 1 ); + ptr += split_chunk; + i -= split_chunk; + } + GET_TIME( end ); + total_time = ELAPSED_TIME( start, end ); + printf( "complete unpacking in %ld microsec\n", total_time ); +/* printf( "conversion done in %ld microsec\n", conversion_elapsed ); */ +/* printf( "stack management in %ld microsec\n", total_time - conversion_elapsed ); */ + free( inbuf ); + rc = check_diag_matrix( length, mat1, mat2 ); + free( mat1 ); + free( mat2 ); + return rc; +} + +dt_desc_t* test_matrix_borders( unsigned int size, unsigned int width ) +{ + dt_desc_t *pdt, *pdt_line; + int disp[2]; + size_t blocklen[2]; + + disp[0] = 0; + blocklen[0] = width; + disp[1] = (size - width) * sizeof(double); + blocklen[1] = width; + + dt_create_indexed( 2, blocklen, disp, &(basicDatatypes[DT_DOUBLE]), + &pdt_line ); + dt_create_contiguous( size, pdt_line, &pdt ); + dt_destroy( &pdt_line ); + return pdt; +} + +dt_desc_t* test_contiguous( void ) +{ + dt_desc_t *pdt, *pdt1, *pdt2; + + printf( "test contiguous (alignement)\n" ); + pdt1 = dt_create( -1 ); + dt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 ); + dt_dump( pdt1 ); + dt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 ); + dt_dump( pdt1 ); + dt_create_contiguous( 4, pdt1, &pdt2 ); + dt_destroy( &pdt1 ); + dt_dump( pdt2 ); + dt_create_contiguous( 2, pdt2, &pdt ); + dt_destroy( &pdt2 ); + dt_dump( pdt ); + dt_dump_complete( pdt ); + return pdt; +} + +dt_desc_t* test_struct( void ) +{ + dt_desc_t* types[] = { &(basicDatatypes[DT_FLOAT]), + NULL, + &(basicDatatypes[DT_CHAR]) }; + int lengths[] = { 2, 1, 3 }; + long disp[] = { 0, 16, 26 }; + dt_desc_t* pdt, *pdt1; + + printf( "test struct\n" ); + pdt1 = dt_create( -1 ); + dt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 ); + dt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 ); + dt_dump_complete( pdt1 ); + + types[1] = pdt1; + + dt_create_struct( 3, lengths, disp, types, &pdt ); + dt_destroy( &pdt1 ); + dt_dump_complete( pdt ); + return pdt; +} + +typedef struct { + int i1; + int gap; + int i2; +} sdata_intern; + +typedef struct { + int counter; + sdata_intern v[10]; + int last; +} sstrange; + +#define SSTRANGE_CNT 10 +#define USE_RESIZED + +dt_desc_t* create_strange_dt( void ) +{ + sdata_intern v[2]; + long displ[3]; + dt_desc_t* types[3] = { &(basicDatatypes[DT_INT]) }; + sstrange t[2]; + int pBlock[3] = {1, 10, 1}, dispi[3]; + dt_desc_t *pdt, *pdt1, *pdt2, *pdtTemp; + + dispi[0] = (int)((char*)&(v[0].i1) - (char*)&(v[0])); /* 0 */ + dispi[1] = (int)(((char*)(&(v[0].i2)) - (char*)&(v[0])) / sizeof(int)); /* 2 */ + dt_create_indexed_block( 2, 1, dispi, &(basicDatatypes[DT_INT]), &pdtTemp ); +#ifdef USE_RESIZED + /* optional */ + displ[0] = 0; + displ[1] = (char*)&(v[1]) - (char*)&(v[0]); + dt_create_resized( pdtTemp, displ[0], displ[1], &pdt1 ); + dt_destroy( &pdtTemp ); +#else + pdt1 = pdtTemp; +#endif /* USE_RESIZED */ + + types[1] = pdt1; + types[2] = &(basicDatatypes[DT_INT]); + displ[0] = 0; + displ[1] = (long)((char*)&(t[0].v[0]) - (char*)&(t[0])); + displ[2] = (long)((char*)&(t[0].last) - (char*)&(t[0])); + dt_create_struct( 3, pBlock, displ, types, &pdtTemp ); +#ifdef USE_RESIZED + /* optional */ + displ[1] = (char*)&(t[1]) - (char*)&(t[0]); + dt_create_resized( pdtTemp, displ[0], displ[1], &pdt2 ); + dt_destroy( &pdtTemp ); +#else + pdt2 = pdtTemp; +#endif /* USE_RESIZED */ + + dt_create_contiguous( SSTRANGE_CNT, pdt2, &pdt ); + + dt_destroy( &pdt1 ); + dt_destroy( &pdt2 ); + dt_dump( pdt ); + { + dt_elem_desc_t* pElemDesc; + dt_optimize_short( pdt, 1, &pElemDesc ); + + free( pElemDesc ); + } + return pdt; +} + +int local_copy_ddt_count( dt_desc_t* pdt, int count ) +{ + long extent; + void *pdst, *psrc; + dt_type_extent( pdt, &extent ); + + pdst = malloc( extent * count ); + psrc = malloc( extent * count ); + + pdt = create_strange_dt(); + + //dt_copy_content_same_dt( pdt, count, pdst, psrc ); + + free(pdst ); + free( psrc ); + return 0; +} +int main( int argc, char* argv[] ) +{ + dt_desc_t *pdt, *pdt1, *pdt2, *pdt3; + int rc, length = 500; + + pdt = create_strange_dt(); + return 0; + /* + local_copy_ddt_count(pdt, 10); + dt_destroy( &pdt ); + */ + pdt = upper_matrix(100); + local_copy_ddt_count(pdt, 1); + dt_destroy( &pdt ); + return 0; + + return 0; + + mpich_typeub(); + mpich_typeub2(); + mpich_typeub3(); + + rc = test_upper( length ); + if( rc == 0 ) + printf( "decode [PASSED]\n" ); + else + printf( "decode [NOT PASSED]\n" ); + + pdt = test_matrix_borders( length, 100 ); + dt_dump( pdt ); + dt_free( &pdt ); + + printf( ">>--------------------------------------------<<\n" ); + pdt = test_contiguous(); + dt_destroy( &pdt ); + printf( ">>--------------------------------------------<<\n" ); + pdt = test_struct(); + dt_destroy( &pdt ); + printf( ">>--------------------------------------------<<\n" ); + + pdt1 = dt_create( -1 ); + pdt2 = dt_create( -1 ); + pdt3 = dt_create( -1 ); + dt_add( pdt3, &(basicDatatypes[DT_INT]), 10, 0, -1 ); + dt_add( pdt3, &(basicDatatypes[DT_FLOAT]), 5, 10 * sizeof(int), -1 ); + + dt_add( pdt2, &(basicDatatypes[DT_INT]), 1, 0, -1 ); + dt_add( pdt2, pdt3, 3, sizeof(int) * 1, -1 ); + + dt_add( pdt1, &(basicDatatypes[DT_LONG_LONG]), 5, 0, -1 ); + dt_add( pdt1, &(basicDatatypes[DT_LONG_DOUBLE]), 2, sizeof(long long) * 5, -1 ); + + printf( ">>--------------------------------------------<<\n" ); + dt_dump( pdt1 ); + printf( ">>--------------------------------------------<<\n" ); + dt_dump( pdt2 ); + printf( ">>--------------------------------------------<<\n" ); + dt_dump( pdt3 ); + + dt_destroy( &pdt1 ); + dt_destroy( &pdt2 ); + dt_destroy( &pdt3 ); + return 0; +} diff --git a/src/datatype/dt_add.c b/src/datatype/dt_add.c new file mode 100644 index 0000000000..ecb803530c --- /dev/null +++ b/src/datatype/dt_add.c @@ -0,0 +1,188 @@ +#include "datatype.h" + +/* When we add a datatype we should update it's definition depending on + * the initial displacement for the whole data, so the displacement of + * all elements inside a datatype depend only on the loop displacement + * and it's own displacement. + */ + +/* we have 3 differents structures to update: + * the first is the real representation of the datatype + * the second is the internal representation using extents + * the last is the representation used for send operations + */ +int dt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, unsigned int count, long disp, long extent ) +{ + int newLength, place_needed = 0, i; + short localFlags; + dt_elem_desc_t *pLast, *pLoop = NULL; + long lb, ub; + + /* the extent should be always be positive. So a negative + * value here have a special meaning ie. default extent as + * computed by ub - lb + */ + if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb); + + /* first make sure that we have enought place to + * put the new element inside */ + if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { + place_needed = 1; + /* handle special cases for DT_LB and DT_UB */ + if( pdtAdd == &(basicDatatypes[DT_LB]) ) { + pdtBase->bdt_used |= (1<< DT_LB); + if( pdtBase->flags & DT_FLAG_USER_LB ) { + pdtBase->lb = LMIN( pdtBase->lb, disp ); + } else { + pdtBase->lb = disp; + pdtBase->flags |= DT_FLAG_USER_LB; + } + return 0; + } else if( pdtAdd == &(basicDatatypes[DT_UB]) ) { + pdtBase->bdt_used |= (1<< DT_UB); + if( pdtBase->flags & DT_FLAG_USER_UB ) { + pdtBase->ub = LMAX( pdtBase->ub, disp ); + } else { + pdtBase->ub = disp; + pdtBase->flags |= DT_FLAG_USER_UB; + } + return 0; + } + } else { + place_needed = pdtAdd->desc.used; + if( count != 1 ) place_needed += 2; + } + + dt_increase_ref( pdtAdd ); + + /* compute the new memory alignement */ + pdtBase->align = IMAX( pdtBase->align, pdtAdd->align ); + + pdtBase->bdt_used |= pdtAdd->bdt_used; + newLength = pdtBase->desc.used + place_needed; + if( newLength > pdtBase->desc.length ) { + newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK; + printf( "increase the size of the data desc array from %d to %d (old ptr = %p ", + pdtBase->desc.length, newLength, pdtBase->desc.desc ); + pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc, newLength ); + printf( "new ptr = %p\n", pdtBase->desc.desc ); + pdtBase->desc.length = newLength; + } + pLast = &(pdtBase->desc.desc[pdtBase->desc.used]); + if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { /* add a basic datatype */ + pLast->type = pdtAdd->id; + pLast->count = count; + pLast->disp = disp; + pLast->extent = extent; + pdtBase->desc.used++; + pdtBase->btypes[pdtAdd->id] += count; + pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS); + if( extent == pdtAdd->size ) + pLast->flags |= DT_FLAG_CONTIGUOUS; + } else { + /* now we add a complex datatype */ + if( disp != pdtBase->ub ) { /* add the initial gap */ + if( disp < pdtBase->ub ) pdtBase->flags |= DT_FLAG_OVERLAP; + } + /* keep trace of the total number of basic datatypes in the datatype definition */ + pdtBase->btypes[DT_LOOP] += pdtAdd->btypes[DT_LOOP]; + for( i = 3; i < DT_END_LOOP; i++ ) + if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]); + pdtBase->btypes[DT_END_LOOP] += pdtAdd->btypes[DT_END_LOOP]; + + /* if the extent of the datatype if the same as the extent of the loop + * description of the datatype then we simply have to update the main loop. + */ + if( count != 1 ) { + pLoop = pLast; + pLast->type = DT_LOOP; + pLast->count = count; + pLast->disp = (long)pdtAdd->desc.used + 1; + pLast->extent = extent; + pLast->flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER)); + localFlags = DT_FLAG_IN_LOOP; + pdtBase->btypes[DT_LOOP] += 2; + pdtBase->desc.used += 2; + pLast++; + } + + for( i = 0; i < pdtAdd->desc.used; i++ ) { + pLast->type = pdtAdd->desc.desc[i].type; + pLast->flags = pdtAdd->desc.desc[i].flags | localFlags; + pLast->count = pdtAdd->desc.desc[i].count; + pLast->extent = pdtAdd->desc.desc[i].extent; + pLast->disp = pdtAdd->desc.desc[i].disp; + if( pdtAdd->desc.desc[i].type != DT_LOOP ) + pLast->disp += disp/* + pdtAdd->lb */; + pLast++; + } + pdtBase->desc.used += pdtAdd->desc.used; + if( pLoop != NULL ) { + pLast->type = DT_END_LOOP; + pLast->count = pdtAdd->desc.used + 1; /* where the loop start */ + pLast->disp = disp + (count - 1) * extent + + (pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */ + pLast->extent = pdtAdd->size; /* the size of the data inside the loop */ + pLast->flags = pLoop->flags; + } + /* should I add some space until the extent of this datatype ? */ + } + + pdtBase->size += count * pdtAdd->size; + pdtBase->true_lb = LMIN( pdtBase->true_lb, pdtAdd->true_lb + disp ); + pdtBase->true_ub = LMAX( pdtBase->true_ub, + disp + pdtAdd->true_lb + + (count - 1) * extent + pdtAdd->true_ub ); + + /* the lower bound should be inherited from the parents if and only + * if the USER has explicitly set it. The result lb is the MIN between + * the all lb + disp if and only if all or nobody flags's contain the LB. + */ + if( (pdtAdd->flags ^ pdtBase->flags) & DT_FLAG_USER_LB ) { + pdtBase->flags |= DT_FLAG_USER_LB; + if( pdtAdd->flags & DT_FLAG_USER_LB ) + lb = pdtAdd->lb + disp; + else + lb = pdtBase->lb; + } else { + lb = LMIN( pdtBase->lb, pdtAdd->lb + disp ); + } + + /* the same apply for the upper bound except for the case where + * either of them has the flag UB, in which case we should + * compute the UB including the natural alignement of the data. + */ + if( (pdtBase->flags ^ pdtAdd->flags) & DT_FLAG_USER_UB ) { + if( pdtBase->flags & DT_FLAG_USER_UB ) + ub = pdtBase->ub; + else { + pdtBase->flags |= DT_FLAG_USER_UB; + ub = disp + pdtAdd->lb + count * extent; + } + } else { + if( pdtBase->flags & DT_FLAG_USER_UB ) + ub = LMAX( pdtBase->ub, disp + pdtAdd->lb + count * (extent) ); + else { + /* we should compute the extent depending on the alignement */ + long ubN = (disp + pdtAdd->lb + count * (extent)); + ub = LMAX( ((pdtBase->ub / pdtBase->align) * pdtBase->align), + (((ubN + pdtBase->align - 1)/ pdtBase->align) * pdtBase->align) ); + } + } + /* update the extent and size */ + pdtBase->lb = lb; + pdtBase->ub = ub; + pdtBase->nbElems += (count * pdtAdd->nbElems); + + /* Is the data still contiguous ? + * The only way for the data to be contiguous is to have the true extent equal to his size. + * In other words to avoid having internal gaps between elements. + */ + if( (pdtBase->size != (pdtBase->true_ub - pdtBase->true_lb)) || + !(pdtBase->flags & DT_FLAG_CONTIGUOUS) || !(pdtAdd->flags & DT_FLAG_CONTIGUOUS) ) + UNSET_CONTIGUOUS_FLAG(pdtBase->flags); + + dt_decrease_ref( pdtAdd ); + + return 0; +} diff --git a/src/datatype/dt_create.c b/src/datatype/dt_create.c new file mode 100644 index 0000000000..07bb54de1a --- /dev/null +++ b/src/datatype/dt_create.c @@ -0,0 +1,260 @@ +/* -*- Mode: C; c-basic-offset:3 ; -*- */ + +#include "datatype.h" +#include "limits.h" + +/* other fields starting after bdt_used (index of DT_LOOP should be ONE) */ +#define EMPTY_DATA NULL, "", {0, 0, NULL}, {0, 0, NULL}, NULL, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +#define BASEOBJ_DATA { NULL, 1 } +dt_desc_t basicDatatypes[] = { + /*super size true_lb true_ub align lb ub flags id nbElems bdt_used others fields */ +/*0x00*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC, DT_LOOP, 1, (1<size = 0; + pData->id = 0; + pData->nbElems = 0; + pData->bdt_used = 0; + for( i = 0; i < DT_MAX_PREDEFINED; i++ ) + pData->btypes[i] = 0; + pData->btypes[DT_LOOP] = 1; + + pData->opt_desc.desc = NULL; + pData->opt_desc.length = 0; + pData->opt_desc.used = 0; + pData->args = NULL; + pData->align = 1; + pData->flags = DT_FLAG_CONTIGUOUS; + pData->true_lb = LONG_MAX; + pData->true_ub = LONG_MIN; + pData->lb = LONG_MAX; + pData->ub = LONG_MIN; +} +OBJ_CLASS_INSTANCE(lam_datatype_t, lam_object_t, __get_free_dt_struct, dt_destroy ); + +dt_desc_t* dt_create( int expectedSize ) +{ + dt_desc_t* pdt = (dt_desc_t*)OBJ_NEW(lam_datatype_t); + + if( expectedSize == -1 ) expectedSize = DT_INCREASE_STACK; + pdt->desc.length = expectedSize; + pdt->desc.used = 0; + pdt->desc.desc = (dt_elem_desc_t*)calloc(pdt->desc.length, sizeof(dt_elem_desc_t)); + return pdt; +} + +int dt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType ) +{ + dt_duplicate( oldType, newType ); + (*newType)->lb = lb; + (*newType)->ub = lb + extent; + return 0; +} + +int dt_commit( dt_desc_t** data ) +{ + dt_desc_t* pData = (dt_desc_t*)*data; + + if( pData->flags & DT_FLAG_COMMITED ) return -1; + pData->flags |= DT_FLAG_COMMITED; + /* If the data is contiguous is useless to generate an optimized version. */ + if( pData->size != (pData->true_ub - pData->true_lb) ) + (void)dt_optimize_short( pData, 1, &(pData->opt_desc) ); + return 0; +} + +static void _dump_data_flags( unsigned short usflags ) +{ + char flags[12] = "-----------"; + + if( usflags & DT_FLAG_DESTROYED ) flags[0] = 'd'; + if( usflags & DT_FLAG_COMMITED ) flags[1] = 'c'; + if( usflags & DT_FLAG_CONTIGUOUS ) flags[2] = 'C'; + if( usflags & DT_FLAG_OVERLAP ) flags[3] = 'o'; + if( usflags & DT_FLAG_USER_LB ) flags[4] = 'l'; + if( usflags & DT_FLAG_USER_UB ) flags[5] = 'u'; + if( usflags & DT_FLAG_FOREVER ) flags[6] = 'F'; + if( usflags & DT_FLAG_IN_LOOP ) flags[7] = 'L'; + if( usflags & DT_FLAG_DATA ) flags[8] = 'D'; + if( usflags & DT_FLAG_INITIAL ) flags[9] = 'I'; + if( (usflags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) flags[10] = 'B'; + flags[11] = 0; + printf( "%s\t", flags ); +} + +int __dump_data_desc( dt_elem_desc_t* pDesc, int nbElems ) +{ + char* dtName; + int i; + + for( i = 0; i < nbElems; i++ ) { + if( pDesc->type > DT_MAX_PREDEFINED ) dtName = basicDatatypeNames[DT_MAX_PREDEFINED]; + else dtName = basicDatatypeNames[pDesc->type]; + _dump_data_flags( pDesc->flags ); + if( pDesc->type == DT_LOOP ) + printf( "%15s %d times the next %d elements extent %d\n", dtName, + pDesc->count, (int)pDesc->disp, pDesc->extent ); + else + printf( "%15s count %d disp 0x%lx (%ld) extent %d\n", dtName, + pDesc->count, pDesc->disp, pDesc->disp, pDesc->extent ); + pDesc++; + } + return 0; +} + +void __dt_contain_basic_datatypes( dt_desc_t* pData ) +{ + int i, mask = 1; + + if( pData->flags & DT_FLAG_USER_LB ) printf( "lb " ); + if( pData->flags & DT_FLAG_USER_UB ) printf( "ub " ); + for( i = 0; i < DT_MAX_PREDEFINED; i++ ) { + if( pData->bdt_used & mask ) + printf( "%s ", basicDatatypeNames[i] ); + mask <<= 1; + } +} + +void dt_dump( dt_desc_t* data ) +{ + dt_desc_t* pData = (dt_desc_t*)data; + + printf( "Datatype %p size %d align %d id %d length %d used %d\n\ + true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n\ + nbElems %d loops %d flags %X (", + pData, pData->size, pData->align, pData->id, pData->desc.length, pData->desc.used, + pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb, + pData->lb, pData->ub, pData->ub - pData->lb, + pData->nbElems, pData->btypes[DT_LOOP], pData->flags ); + /* dump the flags */ + if( pData->flags == DT_FLAG_BASIC ) printf( "basic datatype " ); + else { + if( pData->flags & DT_FLAG_DESTROYED ) printf( "destroyed " ); + if( pData->flags & DT_FLAG_COMMITED ) printf( "commited " ); + if( pData->flags & DT_FLAG_CONTIGUOUS) printf( "contiguous " ); + } + printf( ")" ); _dump_data_flags( pData->flags ); + printf( "\n contain " ); __dt_contain_basic_datatypes( pData ); printf( "\n" ); + __dump_data_desc( pData->desc.desc, pData->desc.used ); + if( pData->opt_desc.desc != NULL ) { + printf( "Optimized description \n" ); + __dump_data_desc( pData->opt_desc.desc, pData->opt_desc.used ); + } +} +#define DUMP_TYPE( TYPENAME, TYPE ) \ +int dump_##TYPENAME( unsigned int count, \ + char* from, unsigned int from_len, long from_extent, \ + char* to, unsigned int to_len, long to_extent, \ + int* used ) \ +{ \ + int remote_type_size = sizeof(TYPE), res = 1; \ + if( (remote_type_size * count) > from_len ) { \ + count = from_len / remote_type_size; \ + if( (count * remote_type_size) != from_len ) { \ + printf( "oops should I keep this data somewhere (excedent %d bytes)?\n", \ + from_len - (count * remote_type_size) ); \ + res = -1; \ + } \ + printf( "correct: %s count %d from %p with length %d to %p space %d\n", \ + #TYPE, count, from, from_len, to, to_len ); \ + } else \ + printf( " %s count %d from %p with length %d to %p space %d\n", \ + #TYPE, count, from, from_len, to, to_len ); \ + \ + *used = count * to_extent; \ + return res * count; \ +} + +DUMP_TYPE( char, char ); +DUMP_TYPE( short, short ); +DUMP_TYPE( int, int ); +DUMP_TYPE( float, float ); +DUMP_TYPE( long, long ); +DUMP_TYPE( double, double ); +DUMP_TYPE( long_long, long long ); +DUMP_TYPE( long_double, long double ); +DUMP_TYPE( complex_float, complex_float_t ); +DUMP_TYPE( complex_double, complex_double_t ); + +static convertor_t* pDumpConv = NULL; + +static conversion_fct_t dump_functions[] = { + (conversion_fct_t)NULL, /* DT_LOOP */ + (conversion_fct_t)NULL, /* DT_LB */ + (conversion_fct_t)NULL, /* DT_UB */ + (conversion_fct_t)NULL, /* DT_SPACE */ + (conversion_fct_t)dump_char, /* DT_CHAR */ + (conversion_fct_t)dump_char, /* DT_BYTE */ + (conversion_fct_t)dump_short, /* DT_SHORT */ + (conversion_fct_t)dump_int, /* DT_INT */ + (conversion_fct_t)dump_float, /* DT_FLOAT */ + (conversion_fct_t)dump_long, /* DT_LONG */ + (conversion_fct_t)dump_double, /* DT_DOUBLE */ + (conversion_fct_t)dump_long_long, /* DT_LONG_LONG */ + (conversion_fct_t)dump_long_double, /* DT_LONG_DOUBLE */ + (conversion_fct_t)dump_complex_float, /* DT_COMPLEX_FLOAT */ + (conversion_fct_t)dump_complex_double, /* DT_COMPLEX_DOUBLE */ +}; + +void dt_dump_complete( dt_desc_t* data ) +{ + dt_desc_t* pData = (dt_desc_t*)data; + struct iovec fake = { (void*)0, 0 }; + + printf( "Datatype %p size %d align %d id %d length %d used %d\n\ + true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n\ + nbElems %d loops %d flags %X (", + pData, pData->size, pData->align, pData->id, pData->desc.length, pData->desc.used, + pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb, + pData->lb, pData->ub, pData->ub - pData->lb, + pData->nbElems, pData->btypes[DT_LOOP], pData->flags ); + + /* dump the flags */ + if( pData->flags == DT_FLAG_BASIC ) printf( "basic datatype " ); + else { + if( pData->flags & DT_FLAG_DESTROYED ) printf( "destroyed " ); + if( pData->flags & DT_FLAG_COMMITED ) printf( "commited " ); + if( pData->flags & DT_FLAG_CONTIGUOUS) printf( "contiguous " ); + if( pData->flags & DT_FLAG_OVERLAP ) printf( "overlap " ); + } + printf( ")\n contain " ); __dt_contain_basic_datatypes( pData ); + printf( "\n{\n" ); + if( pDumpConv == NULL ) { + pDumpConv = convertor_create( 0, 0 ); + } + convertor_init_for_recv( pDumpConv, 0, pData, 1, NULL ); + pDumpConv->pFunctions = dump_functions; + + fake.iov_len = pData->size; + convertor_unpack( pDumpConv, &fake, 1 ); + printf( "}\n" ); +} diff --git a/src/datatype/dt_create_array.c b/src/datatype/dt_create_array.c new file mode 100644 index 0000000000..b4e8a18bd9 --- /dev/null +++ b/src/datatype/dt_create_array.c @@ -0,0 +1,15 @@ +#include "datatype.h" + +int dt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts, + int order, dt_desc_t* oldType, dt_desc_t** newType ) +{ + return -1; +} + +int dt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib, + int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType, + dt_desc_t** newType ) +{ + return -1; +} + diff --git a/src/datatype/dt_create_dup.c b/src/datatype/dt_create_dup.c new file mode 100644 index 0000000000..a0ea41ba57 --- /dev/null +++ b/src/datatype/dt_create_dup.c @@ -0,0 +1,23 @@ +#include "datatype.h" + +int dt_duplicate( dt_desc_t* oldType, dt_desc_t** newType ) +{ + dt_desc_t* pdt = dt_create( oldType->desc.used ); + void* temp = pdt->desc.desc; /* temporary copy of the desc pointer */ + + memcpy( pdt, oldType, sizeof(dt_desc_t) ); + pdt->desc.desc = temp; + memcpy( pdt->desc.desc, oldType->desc.desc, sizeof(dt_elem_desc_t) * oldType->desc.used ); + pdt->id = 0; + pdt->args = NULL; + *newType = pdt; + return 0; +} + +int dt_create_contiguous( size_t count, dt_desc_t* oldType, dt_desc_t** newType ) +{ + dt_desc_t* pdt = dt_create( oldType->desc.used + 2 ); + dt_add( pdt, oldType, count, 0, (oldType->ub - oldType->lb) ); + *newType = pdt; + return 0; +} diff --git a/src/datatype/dt_create_indexed.c b/src/datatype/dt_create_indexed.c new file mode 100644 index 0000000000..a05f3ba8ab --- /dev/null +++ b/src/datatype/dt_create_indexed.c @@ -0,0 +1,90 @@ +#include "datatype.h" + +/* We try to merge together data that are contiguous */ +int dt_create_indexed( size_t count, int* pBlockLength, int* pDisp, + dt_desc_t* oldType, dt_desc_t** newType ) +{ + dt_desc_t* pdt; + int i, dLength, endat, disp; + long extent = oldType->ub - oldType->lb; + + pdt = dt_create( count * (2 + oldType->desc.used) ); + disp = pDisp[0]; + dLength = pBlockLength[0]; + endat = disp + dLength; + for( i = 1; i < count; i++ ) { + if( endat == pDisp[i] ) { + /* contiguous with the previsious */ + dLength += pBlockLength[i]; + endat += pBlockLength[i]; + } else { + dt_add( pdt, oldType, dLength, disp * extent, extent ); + disp = pDisp[i]; + dLength = pBlockLength[i]; + endat = disp + pBlockLength[i]; + } + } + dt_add( pdt, oldType, dLength, disp * extent, extent ); + + *newType = pdt; + return 0; +} + +int dt_create_hindexed( size_t count, int* pBlockLength, long* pDisp, + dt_desc_t* oldType, dt_desc_t** newType ) +{ + dt_desc_t* pdt; + int i, dLength; + long extent = oldType->ub - oldType->lb; + long disp, endat; + + pdt = dt_create( count * (2 + oldType->desc.used) ); + disp = pDisp[0]; + dLength = pBlockLength[0]; + endat = disp + dLength * extent; + for( i = 1; i < count; i++ ) { + if( endat == pDisp[i] ) { + /* contiguous with the previsious */ + dLength += pBlockLength[i]; + endat += pBlockLength[i] * extent; + } else { + dt_add( pdt, oldType, dLength, disp, extent ); + disp = pDisp[i]; + dLength = pBlockLength[i]; + endat = disp + pBlockLength[i] * extent; + } + } + dt_add( pdt, oldType, dLength, disp, extent ); + + *newType = pdt; + return 0; +} + +int dt_create_indexed_block( size_t count, int bLength, int* pDisp, + dt_desc_t* oldType, dt_desc_t** newType ) +{ + dt_desc_t* pdt; + int i, dLength, endat, disp; + long extent = oldType->ub - oldType->lb; + + pdt = dt_create( count * (2 + oldType->desc.used) ); + disp = pDisp[0]; + dLength = bLength; + endat = disp + dLength; + for( i = 1; i < count; i++ ) { + if( endat == pDisp[i] ) { + /* contiguous with the previsious */ + dLength += bLength; + endat += bLength; + } else { + dt_add( pdt, oldType, dLength, disp * extent, extent ); + disp = pDisp[i]; + dLength = bLength; + endat = disp + bLength; + } + } + dt_add( pdt, oldType, dLength, disp * extent, extent ); + + *newType = pdt; + return 0; +} diff --git a/src/datatype/dt_create_struct.c b/src/datatype/dt_create_struct.c new file mode 100644 index 0000000000..ec5c21ca3a --- /dev/null +++ b/src/datatype/dt_create_struct.c @@ -0,0 +1,42 @@ +#include "datatype.h" + +int dt_create_struct( size_t count, size_t* pBlockLength, long* pDisp, + dt_desc_t** pTypes, dt_desc_t** newType ) +{ + int i; + long disp, endto, lastExtent, lastDisp; + size_t lastBlock; + dt_desc_t *pdt, *lastType; + /* if we compute the total number of elements before we can + * avoid increasing the size of the desc array often. + */ + for( disp = 0, i = 0; i < count; i++ ) { + disp += pTypes[i]->desc.used; + if( pBlockLength[i] != 1 ) disp += 2; + } + lastType = pTypes[0]; + lastBlock = pBlockLength[0]; + lastExtent = lastType->ub - lastType->lb; + lastDisp = pDisp[0]; + endto = pDisp[0] + lastExtent * lastBlock; + + pdt = dt_create( disp ); + + for( i = 1; i < count; i++ ) { + if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) { + lastBlock += pBlockLength[i]; + endto = lastDisp + lastBlock * lastExtent; + } else { + dt_add( pdt, lastType, lastBlock, lastDisp, lastExtent ); + lastType = pTypes[i]; + lastExtent = lastType->ub - lastType->lb; + lastBlock = pBlockLength[i]; + lastDisp = pDisp[i]; + endto = lastDisp + lastExtent * lastBlock; + } + } + dt_add( pdt, lastType, lastBlock, lastDisp, lastExtent ); + + *newType = pdt; + return 0; +} diff --git a/src/datatype/dt_create_vector.c b/src/datatype/dt_create_vector.c new file mode 100644 index 0000000000..9870d14f6f --- /dev/null +++ b/src/datatype/dt_create_vector.c @@ -0,0 +1,64 @@ +#include "datatype.h" + +/* Open questions ... + * - how to improuve the handling of these vectors (creating a temporary datatype + * can be ONLY a initial solution. + * + */ + +int dt_create_vector( size_t count, int bLength, long stride, + dt_desc_t* oldType, dt_desc_t** newType ) +{ + long extent = oldType->ub - oldType->lb; + dt_desc_t *pTempData, *pData; + + if( bLength == stride ) { + /* the elements are contiguous */ + pData = dt_create( oldType->desc.used + 2 ); + dt_add( pData, oldType, count * bLength, 0, extent ); + } else { + if( count > 1 ) { + if( bLength == 1 ) { + pData = dt_create( oldType->desc.used + 2 ); + dt_add( pData, oldType, count - 1, 0, stride * extent ); + } else { + pTempData = dt_create( oldType->desc.used + 2 ); + pData = dt_create( oldType->desc.used + 2 + 2 ); + dt_add( pTempData, oldType, bLength, 0, extent ); + dt_add( pData, pTempData, count - 1, 0, stride * extent ); + dt_free( &pTempData ); + } + } else { + pData = dt_create( oldType->desc.used + 2 ); + } + dt_add( pData, oldType, bLength, (count - 1) * extent * stride, extent ); + } + *newType = pData; + return 0; +} + +int dt_create_hvector( size_t count, int bLength, long stride, + dt_desc_t* oldType, dt_desc_t** newType ) +{ + long extent = oldType->ub - oldType->lb; + dt_desc_t *pTempData, *pData; + + if( (extent * bLength) == stride ) { + /* contiguous */ + pData = dt_create( oldType->desc.used + 2 ); + dt_add( pData, oldType, count * bLength, 0, extent ); + } else { + if( count > 1 ) { + pTempData = dt_create( oldType->desc.used + 2 ); + pData = dt_create( oldType->desc.used + 2 + 2 ); + dt_add( pTempData, oldType, bLength, 0, extent ); + dt_add( pData, pTempData, count - 1, 0, stride ); + dt_free( &pTempData ); + } else { + pData = dt_create( oldType->desc.used + 2 ); + } + dt_add( pData, oldType, bLength, (count - 1) * stride, extent ); + } + *newType = pData; + return 0; +} diff --git a/src/datatype/dt_destroy.c b/src/datatype/dt_destroy.c new file mode 100644 index 0000000000..753251f45f --- /dev/null +++ b/src/datatype/dt_destroy.c @@ -0,0 +1,21 @@ +#include "datatype.h" + +/* This function should never be called directly. It's called by the dt_decrease_ref + * when the number of references on the data reach ZERO. + */ +int dt_destroy( dt_desc_t** dt ) +{ + dt_desc_t* pData = *dt; + + if( !(pData->flags & DT_FLAG_FOREVER) ) + return LAM_ERROR; + + /* I still have the data description ? */ + if( pData->args != NULL ) { + fprintf( stderr, "Data description has not been removed prior to data destruction" ); + } + + if( pData->opt_desc.desc != NULL ) free( pData->opt_desc.desc ); + if( pData->desc.desc != NULL ) free( pData->desc.desc ); + return 0; +} diff --git a/src/datatype/dt_module.c b/src/datatype/dt_module.c new file mode 100644 index 0000000000..e65591acb2 --- /dev/null +++ b/src/datatype/dt_module.c @@ -0,0 +1,71 @@ +#include "datatype.h" +#include "datatype_internal.h" + +struct _c_l { + char c; + long l; +}; + +struct _c_d { + char c; + double d; +}; + +struct _c_ll { + char c; + long long ll; +}; + +struct _c_ld { + char c; + long double ld; +}; + +struct _c_f { + char c; + float f; +}; + +int dt_load( void ) +{ + /* we have to compute the correct alignement for several types of basic datatypes */ + struct _c_f c_f; + struct _c_l c_l; + struct _c_d c_d; + struct _c_ll c_ll; + struct _c_ld c_ld; + int i; + + basicDatatypes[DT_FLOAT].align = (char*)&(c_f.f) - (char*)&(c_f); + basicDatatypes[DT_LONG].align = (char*)&(c_l.l) - (char*)&(c_l); + basicDatatypes[DT_DOUBLE].align = (char*)&(c_d.d) - (char*)&(c_d); + basicDatatypes[DT_LONG_DOUBLE].align = (char*)&(c_ld.ld) - (char*)&(c_ld); + basicDatatypes[DT_LONG_LONG].align = (char*)&(c_ll.ll) - (char*)&(c_ll); + + for( i = 0; i < DT_MAX_PREDEFINED; i++ ) { + basicDatatypes[i].desc.desc = (dt_elem_desc_t*)malloc(sizeof(dt_elem_desc_t)); + basicDatatypes[i].desc.desc->flags = DT_FLAG_BASIC | DT_FLAG_CONTIGUOUS; + basicDatatypes[i].desc.desc->type = i; + basicDatatypes[i].desc.desc->count = 1; + basicDatatypes[i].desc.desc->disp = 0; + basicDatatypes[i].desc.desc->extent = basicDatatypes[i].size; + basicDatatypes[i].desc.length = 1; + basicDatatypes[i].desc.used = 1; + basicDatatypes[i].btypes[i] = 1; + } + + return 0; +} + +int dt_unload( void ) +{ + int i; + + for( i =0; i < DT_MAX_PREDEFINED; i++ ) { + free( basicDatatypes[i].desc.desc ); + basicDatatypes[i].desc.desc = NULL; + basicDatatypes[i].desc.length = 0; + basicDatatypes[i].desc.used = 0; + } + return 0; +} diff --git a/src/datatype/dt_old_limits.c b/src/datatype/dt_old_limits.c new file mode 100644 index 0000000000..4bbad44880 --- /dev/null +++ b/src/datatype/dt_old_limits.c @@ -0,0 +1,48 @@ +#include "datatype.h" + +int dt_type_ub( dt_desc_t* pData, long* disp ) +{ + *disp = pData->ub; + return 0; +} + +int dt_type_lb( dt_desc_t* pData, long* disp ) +{ + *disp = pData->lb; + return 0; +} + +int dt_type_extent( dt_desc_t* pData, long* extent ) +{ + *extent = pData->ub - pData->lb; + return 0; +} + +int dt_type_size ( dt_desc_t* pData, int *size ) +{ + *size = pData->size; + return 0; +} + +int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew ) +{ + return 0; +} + +int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent) +{ + dt_desc_t* pData = (dt_desc_t*)datatype; + + *lb = pData->lb; + *extent = pData->ub - pData->lb; + return 0; +} + +int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent) +{ + dt_desc_t* pData = (dt_desc_t*)datatype; + + *true_lb = pData->true_lb; + *true_extent = pData->true_ub - pData->true_lb; + return 0; +} diff --git a/src/datatype/dt_optimize.c b/src/datatype/dt_optimize.c new file mode 100644 index 0000000000..b775653f04 --- /dev/null +++ b/src/datatype/dt_optimize.c @@ -0,0 +1,151 @@ +/* -*- Mode: C; c-basic-offset:3 ; -*- */ + +#include "datatype.h" +#include "datatype_internal.h" + +/* printf( "save in %s:%d at %p DT_BYTE disp %ld count %d\n", __FILE__, __LINE__, (PELEM), (DISP), (COUNT) ); \ */ +#define SAVE_DESC( PELEM, DISP, COUNT ) \ +do { \ + (PELEM)->flags = DT_FLAG_BASIC; \ + (PELEM)->type = DT_BYTE; \ + (PELEM)->count = (COUNT); \ + (PELEM)->disp = (DISP); \ + (PELEM)->extent = 1; \ + (PELEM)++; \ + nbElems++; \ +} while(0) + +/* printf( "save in %s:%d type %d flags %x count %d disp %ld extent %d\n", \ */ +/* __FILE__, __LINE__, (TYPE), (FLAGS), (COUNT), (DISP), (EXTENT) ); \ */ +#define SAVE_ELEM( PELEM, TYPE, FLAGS, COUNT, DISP, EXTENT ) \ +do { \ + (PELEM)->flags = (FLAGS); \ + (PELEM)->type = (TYPE); \ + (PELEM)->count = (COUNT); \ + (PELEM)->disp = (DISP); \ + (PELEM)->extent = (EXTENT); \ + (PELEM)++; \ + nbElems++; \ +} while(0) + +static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem ) +{ + while( _pElem->type == DT_LOOP ) ++_pElem; + return _pElem->disp; +} + +int dt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDesc ) +{ + dt_elem_desc_t* pElemDesc; + long lastDisp = 0; + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int end_loop; /* last element in the actual loop */ + int stack_pos = 0; + int type, lastLength = 0, nbElems = 0, changes = 0; + long totalDisp; + + pTypeDesc->length = 2 * pData->desc.used; + pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length ); + + pStack = alloca( sizeof(dt_stack_t) * (pData->btypes[DT_LOOP]+1) ); + pStack->count = count; + pStack->index = -1; + pStack->end_loop = pData->desc.used - 1; + pStack->disp = 0; + pos_desc = 0; + + next_loop: + end_loop = pStack->end_loop; + totalDisp = pStack->disp; + while( pos_desc <= end_loop ) { + if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */ + dt_elem_desc_t* pStartLoop; + if( lastLength != 0 ) { + SAVE_DESC( pElemDesc, lastDisp, lastLength ); + lastDisp += lastLength; + lastLength = 0; + } + pStartLoop = (pElemDesc - nbElems); + SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].flags, + nbElems, pData->desc.desc[pos_desc].disp, + pData->desc.desc[pos_desc].extent ); + nbElems += pStartLoop->disp; + pStartLoop->disp = (pElemDesc - 1)->count; + stack_pos--; + pStack--; + + pos_desc++; + goto next_loop; + } + if( pData->desc.desc[pos_desc].type == DT_LOOP ) { + dt_elem_desc_t* pEndLoop = &(pData->desc.desc[pos_desc + pData->desc.desc[pos_desc].disp]); + long loop_disp = GET_LOOP_DISP( &(pData->desc.desc[pos_desc]) ); + if( pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS ) { + /* the loop is contiguous or composed by contiguous elements with a gap */ + if( pData->desc.desc[pos_desc].extent == pEndLoop->extent ) { + /* the whole loop is contiguous */ + if( (lastDisp + lastLength) != (totalDisp + loop_disp) ) { + SAVE_DESC( pElemDesc, lastDisp, lastLength ); + lastLength = 0; + lastDisp = totalDisp + loop_disp; + } + lastLength += pData->desc.desc[pos_desc].count * pEndLoop->extent; + } else { + int counter = pData->desc.desc[pos_desc].count; + if( (lastDisp + lastLength) == (totalDisp + loop_disp) ) { + lastLength += pEndLoop->extent; + counter--; + } + if( lastLength != 0 ) { + SAVE_DESC( pElemDesc, lastDisp, lastLength ); + lastDisp += lastLength; + lastLength = 0; + } + /* we have a gap in the begining or the end of the loop but the whole + * loop can be merged in just one memcpy. + */ + SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags, + counter, (long)2, pData->desc.desc[pos_desc].extent ); + SAVE_DESC( pElemDesc, loop_disp, pEndLoop->extent ); + SAVE_ELEM( pElemDesc, DT_END_LOOP, pEndLoop->flags, + 2, pEndLoop->disp, pEndLoop->extent ); + } + pos_desc += pData->desc.desc[pos_desc].disp + 1; + changes++; + } else { + if( lastLength != 0 ) { + SAVE_DESC( pElemDesc, lastDisp, lastLength ); + lastDisp += lastLength; + lastLength = 0; + } + SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags, + pData->desc.desc[pos_desc].count, (long)nbElems, + pData->desc.desc[pos_desc].extent ); + nbElems = 1; + PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count, + totalDisp, pos_desc + pData->desc.desc[pos_desc].disp ); + pos_desc++; + DUMP_STACK( pStack, stack_pos, pData->desc, "advance loops" ); + } + goto next_loop; + } + /* now here we have a basic datatype */ + type = pData->desc.desc[pos_desc].type; + if( (lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].disp) ) { + lastLength += pData->desc.desc[pos_desc].count * basicDatatypes[type].size; + } else { + if( lastLength != 0 ) + SAVE_DESC( pElemDesc, lastDisp, lastLength ); + lastDisp = totalDisp + pData->desc.desc[pos_desc].disp; + lastLength = pData->desc.desc[pos_desc].count * basicDatatypes[type].size; + } + pos_desc++; /* advance to the next data */ + } + + if( lastLength != 0 ) + SAVE_DESC( pElemDesc, lastDisp, lastLength ); + /* cleanup the stack */ + pTypeDesc->used = nbElems; + return 0; +} diff --git a/src/datatype/dt_pack.c b/src/datatype/dt_pack.c new file mode 100644 index 0000000000..47e2756dbc --- /dev/null +++ b/src/datatype/dt_pack.c @@ -0,0 +1,478 @@ +/* -*- Mode: C; c-basic-offset:3 ; -*- */ + +#include "datatype.h" +#include "datatype_internal.h" + +static int convertor_pack_general( convertor_t* pConvertor, struct iovec* out, unsigned int outCount ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int count_desc; /* the number of items already done in the actual pos_desc */ + int end_loop; /* last element in the actual loop */ + int type; /* type at current position */ + unsigned int advance; /* number of bytes that we should advance the buffer */ + int rc; + long disp_desc = 0; /* compute displacement for truncated data */ + long disp; /* displacement at the beging of the last loop */ + dt_desc_t *pData = pConvertor->pDesc; + dt_elem_desc_t* pElem; + char* pOutput = pConvertor->pBaseBuf; + int oCount = (pData->ub - pData->lb) * pConvertor->count; + char* pInput = out[0].iov_base; + int iCount = out[0].iov_len; + + DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor, + out[0].iov_base, out[0].iov_len, outCount ); + pStack = pConvertor->pStack + pConvertor->stack_pos; + pos_desc = pStack->index; + disp = 0; + + if( pData->opt_desc.desc != NULL ) pElem = pData->opt_desc.desc; + else pElem = pData->desc.desc; + + if( pos_desc == -1 ) { + pos_desc = 0; + count_desc = pElem[0].count; + disp_desc = pElem[0].disp; + } else { + count_desc = pStack->count; + if( pElem[pos_desc].type != DT_LOOP ) { + pConvertor->stack_pos--; + pStack--; + disp = pStack->disp; + disp_desc = ( pElem[pos_desc].disp + + (pElem[pos_desc].count - count_desc) * pElem[pos_desc].extent); + } + } + DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "starting" ); + DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc ); + DUMP( "top stack info {index = %d, count = %d}\n", + pStack->index, pStack->count ); + + next_loop: + end_loop = pStack->end_loop; + while( pConvertor->stack_pos >= 0 ) { + if( pos_desc == end_loop ) { /* end of the current loop */ + while( --(pStack->count) == 0 ) { /* end of loop */ + pConvertor->stack_pos--; + pStack--; + if( pConvertor->stack_pos == -1 ) + return 1; /* completed */ + } + pos_desc = pStack->index; + if( pos_desc == -1 ) + pStack->disp += (pData->ub - pData->lb); + else + pStack->disp += pElem[pos_desc].extent; + pos_desc++; + disp = pStack->disp; + count_desc = pElem[pos_desc].count; + disp_desc = pElem[pos_desc].disp; + goto next_loop; + } + if( pElem[pos_desc].type == DT_LOOP ) { + do { + PUSH_STACK( pStack, pConvertor->stack_pos, + pos_desc, pElem[pos_desc].count, + disp, pos_desc + pElem[pos_desc].disp + 1); + pos_desc++; + } while( pElem[pos_desc].type == DT_LOOP ); /* let's start another loop */ + DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loops" ); + /* update the current state */ + count_desc = pElem[pos_desc].count; + disp_desc = pElem[pos_desc].disp; + goto next_loop; + } + /* now here we have a basic datatype */ + type = pElem[pos_desc].type; + rc = pConvertor->pFunctions[type]( count_desc, + pOutput + disp + disp_desc, oCount, pElem[pos_desc].extent, + pInput, iCount, pElem[pos_desc].extent, + &advance ); + if( rc <= 0 ) { + printf( "trash in the input buffer\n" ); + return -1; + } + iCount -= advance; /* decrease the available space in the buffer */ + pInput += advance; /* increase the pointer to the buffer */ + pConvertor->bConverted += advance; + if( rc != count_desc ) { + /* not all data has been converted. Keep the state */ + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, + count_desc - rc, + disp + rc * pElem[pos_desc].extent, + pos_desc ); + if( iCount != 0 ) + printf( "there is still room in the input buffer %d bytes\n", iCount ); + return 0; + } + pConvertor->converted += rc; /* number of elementd converted so far */ + pos_desc++; /* advance to the next data */ + count_desc = pElem[pos_desc].count; + disp_desc = pElem[pos_desc].disp; + if( iCount == 0 ) break; /* break if there is no more data in the buffer */ + } + + /* out of the loop: we have complete the data conversion or no more space + * in the buffer. + */ + if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */ + + /* I complete an element, next step I should go to the next one */ + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem[pos_desc].count, + disp, pos_desc ); + + return 0; +} + +int convertor_pack_homogeneous( convertor_t* pConv, struct iovec* iov, unsigned int out_size ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int type; /* type at current position */ + int i; /* index for basic elements with extent */ + int stack_pos = 0; /* position on the stack */ + long lastDisp = 0, lastLength = 0; + char* pDestBuf; + dt_desc_t* pData = pConv->pDesc; + dt_elem_desc_t* pElems; + + pDestBuf = iov[0].iov_base; + + if( pData->flags & DT_FLAG_CONTIGUOUS ) { + long extent = pData->ub - pData->lb; + char* pSrc = pConv->pBaseBuf + pData->true_lb + pConv->bConverted; + + type = pConv->count * pData->size; + if( pData->size == extent /* true extent at this point */ ) { + /* we can do it with just one memcpy */ + MEMCPY( pDestBuf, pSrc, iov[0].iov_len ); + pConv->bConverted += iov[0].iov_len; + } else { + char* pSrcBuf = pConv->pBaseBuf + pData->true_lb; + long extent = pData->ub - pData->lb; + for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) { + MEMCPY( pDestBuf, pSrcBuf, pData->size ); + pSrcBuf += extent; + pDestBuf += pData->size; + } + pConv->bConverted += type; + } + return (pConv->bConverted == (pData->size * pConv->count)); + } + pStack = pConv->pStack; + pStack->count = pConv->count; + pStack->index = -1; + pStack->disp = 0; + pos_desc = 0; + + if( pData->opt_desc.desc != NULL ) { + pElems = pData->opt_desc.desc; + pStack->end_loop = pData->opt_desc.used; + } else { + pElems = pData->desc.desc; + pStack->end_loop = pData->desc.used; + } + + DUMP_STACK( pStack, stack_pos, pElems, "starting" ); + DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc ); + DUMP( "top stack info {index = %d, count = %d}\n", + pStack->index, pStack->count ); + next_loop: + while( pos_desc <= pStack->end_loop ) { + if( pos_desc == pStack->end_loop ) { /* end of the current loop */ + if( --(pStack->count) == 0 ) { /* end of loop */ + pStack--; + if( --stack_pos == -1 ) break; + } else { + pos_desc = pStack->index; + if( pos_desc == -1 ) + pStack->disp += (pData->ub - pData->lb); + else + pStack->disp += pElems[pos_desc].extent; + } + pos_desc++; + goto next_loop; + } + if( pElems[pos_desc].type == DT_LOOP ) { + if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) { + dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]); + if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) { + MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength + pLast->extent ); + i = 1; + } else { + MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength ); + i = 0; + } + pDestBuf += lastLength; + lastLength = pLast->extent; + for( ; i < (pElems[pos_desc].count - 1); i++ ) { + MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength ); + pDestBuf += pLast->extent; + lastDisp += pElems[pos_desc].extent; + } + pos_desc += pElems[pos_desc].disp + 1; + goto next_loop; + } else { + do { + PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count, + pStack->disp, pos_desc + pElems[pos_desc].disp ); + pos_desc++; + } while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */ + } + } + /* now here we have a basic datatype */ + type = pElems[pos_desc].type; + if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) { + lastLength += pElems[pos_desc].count * basicDatatypes[type].size; + } else { + MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength ); + pDestBuf += lastLength; + pConv->bConverted += lastLength; + lastDisp = pStack->disp + pElems[pos_desc].disp; + lastLength = pElems[pos_desc].count * basicDatatypes[type].size; + } + pos_desc++; /* advance to the next data */ + } + + MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength ); + pConv->bConverted += lastLength; + /* cleanup the stack */ + return 0; +} + +#define PRINT_MEMCPY( DST, SRC, LENGTH ) \ +{ \ + printf( "%5d: memcpy dst = %p src %p length %ld bytes (so far %d)[%d]\n", \ + __index++, (DST), (SRC), (long)(LENGTH), __sofar, __LINE__ ); \ + __sofar += (LENGTH); \ +} + +int dt_unroll( dt_desc_t* pData, int count ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int type; /* type at current position */ + int i; /* index for basic elements with extent */ + int stack_pos = 0; /* position on the stack */ + long lastDisp = 0, lastLength = 0; + char* pDestBuf; + int bConverted = 0, __index = 0, __sofar = 0; + dt_elem_desc_t* pElems; + + pDestBuf = NULL; + + if( pData->flags & DT_FLAG_CONTIGUOUS ) { + long extent = pData->ub - pData->lb; + char* pSrc = (char*)pData->true_lb; + + type = count * pData->size; + if( pData->size == extent /* true extent at this point */ ) { + /* we can do it with just one memcpy */ + PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count ); + bConverted += (pData->size * count); + } else { + char* pSrcBuf = (char*)pData->true_lb; + long extent = pData->ub - pData->lb; + for( pos_desc = 0; pos_desc < count; pos_desc++ ) { + PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size ); + pSrcBuf += extent; + pDestBuf += pData->size; + } + bConverted += type; + } + return (bConverted == (pData->size * count)); + } + pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] ); + pStack->count = count; + pStack->index = -1; + pStack->disp = 0; + pos_desc = 0; + + if( pData->opt_desc.desc != NULL ) { + pElems = pData->opt_desc.desc; + pStack->end_loop = pData->opt_desc.used; + } else { + pElems = pData->desc.desc; + pStack->end_loop = pData->desc.used; + } + + DUMP_STACK( pStack, stack_pos, pElems, "starting" ); + DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc ); + DUMP( "top stack info {index = %d, count = %d}\n", + pStack->index, pStack->count ); + next_loop: + while( pos_desc <= pStack->end_loop ) { + if( pos_desc == pStack->end_loop ) { /* end of the current loop */ + if( --(pStack->count) == 0 ) { /* end of loop */ + pStack--; + if( --stack_pos == -1 ) break; + } else { + pos_desc = pStack->index; + if( pos_desc == -1 ) + pStack->disp += (pData->ub - pData->lb); + else + pStack->disp += pElems[pos_desc].extent; + } + pos_desc++; + goto next_loop; + } + if( pElems[pos_desc].type == DT_LOOP ) { + if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) { + dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]); + if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) { + PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent ); + lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent; + i = 1; + } else { + PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength ); + lastDisp = pStack->disp + pElems[pos_desc + 1].disp; + i = 0; + } + lastLength = pLast->extent; + for( ; i < (pElems[pos_desc].count - 1); i++ ) { + PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength ); + pDestBuf += pLast->extent; + lastDisp += pElems[pos_desc].extent; + } + pos_desc += pElems[pos_desc].disp + 1; + goto next_loop; + } else { + do { + PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count, + pStack->disp, pos_desc + pElems[pos_desc].disp ); + pos_desc++; + } while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */ + } + } + /* now here we have a basic datatype */ + type = pElems[pos_desc].type; + if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) { + lastLength += pElems[pos_desc].count * basicDatatypes[type].size; + } else { + PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength ); + pDestBuf += lastLength; + bConverted += lastLength; + lastDisp = pStack->disp + pElems[pos_desc].disp; + lastLength = pElems[pos_desc].count * basicDatatypes[type].size; + } + pos_desc++; /* advance to the next data */ + } + PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength ); + return 0; +} + +/* The pack routines should do 2 things: + * - first if the provided iovec contains NULL pointers then they should provide + * buffer space. If the data is contiguous the it should provide directly pointers + * the the user space depending on the iov_len argument. If -1 then all the buffer + * can be supplied in one time, if not several steps need to be executed, it should + * provide the correct pointer every time. But if the user provide a buffer, then + * some parts of the data should be packed inside this buffer, but we still should + * able to have pointers to the user buf on the subsequents calls. + * Return 0 if everything went OK and if there is still room before the complete + * conversion of the data (need additional call with others input buffers ) + * 1 if everything went fine and the data was completly converted + * -1 something wrong occurs. + */ +int convertor_pack( convertor_t* pConv, struct iovec* out, unsigned int out_size ) +{ + dt_desc_t* pData = pConv->pDesc; + int extent; + + if( pConv->count == 0 ) return 1; /* nothing to do */ + if( pData->flags & DT_FLAG_CONTIGUOUS ) { + if( pData->size == (extent = (pData->ub - pData->lb)) ) { + if( out[0].iov_base == NULL ) { + out[0].iov_base = pConv->pBaseBuf + pData->true_lb; + out[0].iov_len = pData->size * pConv->count; + } else { + /* contiguous data just memcpy the smallest data in the user buffer */ + out[0].iov_len = IMIN( out[0].iov_len, pData->size * pConv->count ); + MEMCPY( out[0].iov_base, pConv->pBaseBuf + pData->true_lb, out[0].iov_len); + } + pConv->bConverted += out[0].iov_len; + return 0; + } + } + if( out[0].iov_base == NULL ) { + out[0].iov_len = pConv->count * pData->size; + out[0].iov_base = (void*)malloc( out[0].iov_len ); + pConv->freebuf = out[0].iov_base; + } + return convertor_progress( pConv, out, out_size ); +} + +int convertor_init_for_send( convertor_t* pConv, unsigned int flags, + dt_desc_t* dt, int count, void* pUserBuf ) +{ + dt_increase_ref( dt ); + pConv->pDesc = dt; + pConv->flags = CONVERTOR_SEND; + if( pConv->pStack != NULL ) free( pConv->pStack ); + pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (dt->btypes[DT_LOOP] + 2) ); + pConv->stack_pos = 0; + pConv->pStack[0].index = -1; /* fake entry for the first step */ + pConv->pStack[0].count = count; /* fake entry for the first step */ + pConv->pStack[0].disp = 0; + /* first hre we should select which data representation will be used for + * this operation: normal one or the optimized version ? */ + pConv->pStack[0].end_loop = dt->desc.used; + pConv->pBaseBuf = pUserBuf; + pConv->available_space = count * (dt->ub - dt->lb); + pConv->count = count; + pConv->pFunctions = copy_functions; + pConv->converted = 0; + pConv->bConverted = 0; + if( (dt->flags & DT_FLAG_CONTIGUOUS) && (dt->size == (dt->ub - dt->lb)) ) + pConv->flags |= DT_FLAG_CONTIGUOUS; + pConv->fAdvance = convertor_pack_homogeneous; + if( pConv->freebuf != NULL ) { + free( pConv->freebuf ); + pConv->freebuf = NULL; + } + return 0; +} + +convertor_t* convertor_create( int remote_arch, int mode ) +{ + convertor_t* pConv = (convertor_t*)calloc( 1, sizeof(convertor_t) ); + + pConv->pStack = NULL; + pConv->remoteArch = remote_arch; + pConv->fAdvance = convertor_pack_homogeneous; + return pConv; +} + +/* Actually we suppose that we can only do receiver side conversion */ +int convertor_get_packed_size( convertor_t* pConv, unsigned int* pSize ) +{ + if( dt_type_size( pConv->pDesc, pSize ) != 0 ) + return -1; + *pSize = (*pSize) * pConv->count; + return 0; +} + +int convertor_get_unpacked_size( convertor_t* pConv, unsigned int* pSize ) +{ + int i; + dt_desc_t* pData = pConv->pDesc; + + if( pConv->count == 0 ) { + *pSize = 0; + return 0; + } + if( pConv->remoteArch == 0 ) { /* same architecture */ + *pSize = pData->size * pConv->count; + return 0; + } + *pSize = 0; + for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) { + if( pData->bdt_used & (1<btypes[i] * basicDatatypes[i].size); + } + } + *pSize *= pConv->count; + return 0; +} diff --git a/src/datatype/dt_unpack.c b/src/datatype/dt_unpack.c new file mode 100644 index 0000000000..7a8af2b63f --- /dev/null +++ b/src/datatype/dt_unpack.c @@ -0,0 +1,617 @@ +/* -*- Mode: C; c-basic-offset:3 ; -*- */ + +#include "datatype.h" +#include "datatype_internal.h" + +void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name ) +{ + printf( "\nStack %p stack_pos %d name %s\n", pStack, stack_pos, name ); + for( ;stack_pos >= 0; stack_pos-- ) { + printf( "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index, + pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop ); + if( pStack[stack_pos].index != -1 ) + printf( "[desc count %d disp %ld extent %d]\n", + pDesc[pStack[stack_pos].index].count, + pDesc[pStack[stack_pos].index].disp, + pDesc[pStack[stack_pos].index].extent ); + else + printf( "\n" ); + } + printf( "\n" ); +} + +/* + * Remember that the first item in the stack (ie. position 0) is the number + * of times the datatype is involved in the operation (ie. the count argument + * in the MPI_ call). + */ +/* Convert data from multiple input buffers (as received from the network layer) + * to a contiguous output buffer with a predefined size. + * Return 0 if everything went OK and if there is still room before the complete + * conversion of the data (need additional call with others input buffers ) + * 1 if everything went fine and the data was completly converted + * -1 something wrong occurs. + */ +static int convertor_unpack_general( convertor_t* pConvertor, + struct iovec* pInputv, + unsigned int inputCount ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int count_desc; /* the number of items already done in the actual pos_desc */ + int end_loop; /* last element in the actual loop */ + int type; /* type at current position */ + unsigned int advance; /* number of bytes that we should advance the buffer */ + int rc; + long disp_desc = 0; /* compute displacement for truncated data */ + long disp; /* displacement at the beging of the last loop */ + dt_desc_t *pData = pConvertor->pDesc; + dt_elem_desc_t* pElems; + char* pOutput = pConvertor->pBaseBuf; + int oCount = (pData->ub - pData->lb) * pConvertor->count; + char* pInput = pInputv[0].iov_base; + int iCount = pInputv[0].iov_len; + + if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc; + else pElems = pData->desc.desc; + + DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor, + pInputv[0].iov_base, pInputv[0].iov_len, inputCount ); + pStack = pConvertor->pStack + pConvertor->stack_pos; + pos_desc = pStack->index; + disp = 0; + if( pos_desc == -1 ) { + pos_desc = 0; + count_desc = pElems[0].count; + disp_desc = pElems[0].disp; + } else { + count_desc = pStack->count; + if( pElems[pos_desc].type != DT_LOOP ) { + pConvertor->stack_pos--; + pStack--; + disp = pStack->disp; + disp_desc = ( pElems[pos_desc].disp + + (pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent); + } + } + DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" ); + DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc ); + DUMP( "top stack info {index = %d, count = %d}\n", + pStack->index, pStack->count ); + + next_loop: + end_loop = pStack->end_loop; + while( pConvertor->stack_pos >= 0 ) { + if( pos_desc == end_loop ) { /* end of the current loop */ + while( --(pStack->count) == 0 ) { /* end of loop */ + pConvertor->stack_pos--; + pStack--; + if( pConvertor->stack_pos == -1 ) + return 1; /* completed */ + } + pos_desc = pStack->index; + if( pos_desc == -1 ) + pStack->disp += (pData->ub - pData->lb); + else + pStack->disp += pElems[pos_desc].extent; + pos_desc++; + disp = pStack->disp; + count_desc = pElems[pos_desc].count; + disp_desc = pElems[pos_desc].disp; + goto next_loop; + } + if( pElems[pos_desc].type == DT_LOOP ) { + do { + PUSH_STACK( pStack, pConvertor->stack_pos, + pos_desc, pElems[pos_desc].count, + disp, pos_desc + pElems[pos_desc].disp + 1 ); + pos_desc++; + } while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */ + DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" ); + /* update the current state */ + count_desc = pElems[pos_desc].count; + disp_desc = pElems[pos_desc].disp; + goto next_loop; + } + /* now here we have a basic datatype */ + type = pElems[pos_desc].type; + rc = pConvertor->pFunctions[type]( count_desc, + pInput, iCount, pElems[pos_desc].extent, + pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent, + &advance ); + if( rc <= 0 ) { + printf( "trash in the input buffer\n" ); + return -1; + } + iCount -= advance; /* decrease the available space in the buffer */ + pInput += advance; /* increase the pointer to the buffer */ + pConvertor->bConverted += advance; + if( rc != count_desc ) { + /* not all data has been converted. Keep the state */ + PUSH_STACK( pStack, pConvertor->stack_pos, + pos_desc, count_desc - rc, + disp + rc * pElems[pos_desc].extent, pos_desc ); + if( iCount != 0 ) + printf( "there is still room in the input buffer %d bytes\n", iCount ); + return 0; + } + pConvertor->converted += rc; /* number of elementd converted so far */ + pos_desc++; /* advance to the next data */ + count_desc = pElems[pos_desc].count; + disp_desc = pElems[pos_desc].disp; + if( iCount == 0 ) break; /* break if there is no more data in the buffer */ + } + + /* out of the loop: we have complete the data conversion or no more space + * in the buffer. + */ + if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */ + + /* I complete an element, next step I should go to the next one */ + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, + pElems[pos_desc].count, disp, pos_desc ); + + return 0; +} + +int convertor_unpack_homogeneous( convertor_t* pConv, struct iovec* iov, unsigned int out_size ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int type; /* type at current position */ + int i; /* counter for basic datatype with extent */ + int stack_pos = 0; /* position on the stack */ + long lastDisp = 0, lastLength = 0; + char* pSrcBuf; + dt_desc_t* pData = pConv->pDesc; + dt_elem_desc_t* pElems; + + pSrcBuf = iov[0].iov_base; + + if( pData->flags & DT_FLAG_CONTIGUOUS ) { + long extent = pData->ub - pData->lb; + char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted; + + if( pData->size == extent ) { + /* contiguous data or basic datatype with count */ + MEMCPY( pDstBuf, pSrcBuf, iov[0].iov_len ); + pConv->bConverted += iov[0].iov_len; + } else { + type = iov[0].iov_len; + for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) { + MEMCPY( pDstBuf, pSrcBuf, pData->size ); + pSrcBuf += pData->size; + pDstBuf += extent; + type -= pData->size; + } + pConv->bConverted += type; + } + return (pConv->bConverted == (pData->size * pConv->count)); + } + + pStack = pConv->pStack; + pStack->count = pConv->count; + pStack->index = -1; + pStack->disp = 0; + pos_desc = 0; + + if( pData->opt_desc.desc != NULL ) { + pElems = pData->opt_desc.desc; + pStack->end_loop = pData->opt_desc.used; + } else { + pElems = pData->desc.desc; + pStack->end_loop = pData->desc.used; + } + + DUMP_STACK( pStack, stack_pos, pElems, "starting" ); + DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc ); + DUMP( "top stack info {index = %d, count = %d}\n", + pStack->index, pStack->count ); + next_loop: + while( pos_desc <= pStack->end_loop ) { + if( pos_desc == pStack->end_loop ) { /* end of the current loop */ + if( --(pStack->count) == 0 ) { /* end of loop */ + pStack--; + if( --stack_pos == -1 ) break; + } else { + pos_desc = pStack->index; + if( pos_desc == -1 ) + pStack->disp += (pData->ub - pData->lb); + else + pStack->disp += pElems[pos_desc].extent; + } + pos_desc++; + goto next_loop; + } + if( pElems[pos_desc].type == DT_LOOP ) { + if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) { + dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]); + if( lastLength == 0 ) { + MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength ); + pSrcBuf += lastLength; + } + lastLength = pLast->extent; + for( i = 0; i < (pElems[pos_desc].count - 1); i++ ) { + MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength ); + pSrcBuf += pLast->extent; + lastDisp += pElems[pos_desc].extent; + } + pos_desc += pElems[pos_desc].disp + 1; + goto next_loop; + } else { + do { + PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count, + pStack->disp, pos_desc + pElems[pos_desc].disp ); + pos_desc++; + } while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */ + } + } + /* now here we have a basic datatype */ + type = pElems[pos_desc].type; + if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) { + lastLength += pElems[pos_desc].count * basicDatatypes[type].size; + } else { + MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength ); + pSrcBuf += lastLength; + pConv->bConverted += lastLength; + lastDisp = pStack->disp + pElems[pos_desc].disp; + lastLength = pElems[pos_desc].count * basicDatatypes[type].size; + } + pos_desc++; /* advance to the next data */ + } + + MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength ); + pConv->bConverted += lastLength; + + /* cleanup the stack */ + return 0; +} + +int convertor_unpack( convertor_t* pConvertor, + struct iovec* pInputv, + unsigned int inputCount ) +{ + dt_desc_t *pData = pConvertor->pDesc; + char* pOutput = pConvertor->pBaseBuf; + char* pInput = pInputv[0].iov_base; + int rc; + + if( pConvertor->count == 0 ) return 1; /* nothing to do */ + + if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) { + if( pInputv[0].iov_base == NULL ) { + rc = pConvertor->count * pData->size; + if( pInputv[0].iov_len == 0 ) { /* give me the whole buffer */ + pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb; + pInputv[0].iov_len = rc; + return 1; + } else { /* what about the next chunk ? */ + pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted; + if( pInputv[0].iov_len > (rc - pConvertor->bConverted) ) + pInputv[0].iov_len = rc - pConvertor->bConverted; + pConvertor->bConverted += pInputv[0].iov_len; + return (pConvertor->bConverted == rc); + } + } + } + if( (pInput >= pOutput) && (pInput < (pOutput + pConvertor->count * (pData->ub - pData->lb))) ) { + return 1; + } + return convertor_progress( pConvertor, pInputv, inputCount ); +} + +/* Return value: + * 0 : nothing has been done + * positive value: number of item converted. + * negative value: -1 * number of items converted, less data provided than expected + * and there are less data than the size on the remote host of the + * basic datatype. + */ +#define COPY_TYPE( TYPENAME, TYPE ) \ +int copy_##TYPENAME( unsigned int count, \ + char* from, unsigned int from_len, long from_extent, \ + char* to, unsigned int to_len, long to_extent, \ + int* used ) \ +{ \ + int i, res = 1; \ + unsigned int remote_TYPE_size = sizeof(TYPE); /* TODO */ \ +\ + if( (remote_TYPE_size * count) > from_len ) { \ + count = from_len / remote_TYPE_size; \ + if( (count * remote_TYPE_size) != from_len ) { \ + DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \ + from_len - (count * remote_TYPE_size) ); \ + res = -1; \ + } \ + DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \ + #TYPE, count, from, from_len, to, to_len ); \ + } else \ + DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \ + #TYPE, count, from, from_len, to, to_len ); \ +\ + if( (from_extent == sizeof(TYPE)) && (to_extent == sizeof(TYPE)) ) { \ + MEMCPY( to, from, count * sizeof(TYPE) ); \ + } else { \ + for( i = 0; i < count; i++ ) { \ + MEMCPY( to, from, sizeof(TYPE) ); \ + to += to_extent; \ + from += from_extent; \ + } \ + } \ + *used = count * sizeof(TYPE) ; \ + return res * count; \ +} + +COPY_TYPE( char, char ); +COPY_TYPE( short, short ); +COPY_TYPE( int, int ); +COPY_TYPE( float, float ); +COPY_TYPE( long, long ); +/*COPY_TYPE( double, double );*/ +COPY_TYPE( long_long, long long ); +COPY_TYPE( long_double, long double ); +COPY_TYPE( complex_float, complex_float_t ); +COPY_TYPE( complex_double, complex_double_t ); + +int copy_double( unsigned int count, + char* from, unsigned int from_len, long from_extent, + char* to, unsigned int to_len, long to_extent, + int* used ) +{ + int i, res = 1; + unsigned int remote_double_size = sizeof(double); /* TODO */ + + if( (remote_double_size * count) > from_len ) { + count = from_len / remote_double_size; + if( (count * remote_double_size) != from_len ) { + DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", + from_len - (count * remote_double_size) ); + res = -1; + } + DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", + "double", count, from, from_len, to, to_len ); + } else + DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", + "double", count, from, from_len, to, to_len ); + + + if( (from_extent == sizeof(double)) && (to_extent == sizeof(double)) ) { + MEMCPY( to, from, count * sizeof(double) ); + } else { + for( i = 0; i < count; i++ ) { + MEMCPY( to, from, sizeof(double) ); + to += to_extent; + from += from_extent; + } + } + *used = count * sizeof(double) ; + return res * count; +} + +conversion_fct_t copy_functions[DT_MAX_PREDEFINED] = { + (conversion_fct_t)NULL, /* DT_LOOP */ + (conversion_fct_t)NULL, /* DT_LB */ + (conversion_fct_t)NULL, /* DT_UB */ + (conversion_fct_t)NULL, /* DT_SPACE */ + (conversion_fct_t)copy_char, /* DT_CHAR */ + (conversion_fct_t)copy_char, /* DT_BYTE */ + (conversion_fct_t)copy_short, /* DT_SHORT */ + (conversion_fct_t)copy_int, /* DT_INT */ + (conversion_fct_t)copy_float, /* DT_FLOAT */ + (conversion_fct_t)copy_long, /* DT_LONG */ + (conversion_fct_t)copy_double, /* DT_DOUBLE */ + (conversion_fct_t)copy_long_long, /* DT_LONG_LONG */ + (conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */ + (conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */ + (conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */ +}; + +/* Should we supply buffers to the convertor or can we use directly + * the user buffer ? + */ +int convertor_need_buffers( convertor_t* pConvertor ) +{ + if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) return 0; + return 1; +} + +int convertor_init_for_recv( convertor_t* pConv, unsigned int flags, + dt_desc_t* pData, int count, void* pUserBuf ) +{ + dt_increase_ref( pData ); + pConv->pDesc = pData; + pConv->flags = CONVERTOR_RECV; + if( pConv->pStack != NULL ) free( pConv->pStack ); + pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (pData->btypes[DT_LOOP] + 2) ); + pConv->stack_pos = 0; + pConv->pStack[0].index = -1; /* fake entry for the first step */ + pConv->pStack[0].count = count; /* fake entry for the first step */ + pConv->pStack[0].disp = 0; + /* first we should decide which data representation will be used TODO */ + pConv->pStack[0].end_loop = pData->desc.used; + pConv->pBaseBuf = pUserBuf; + pConv->available_space = count * (pData->ub - pData->lb); + pConv->count = count; + pConv->pFunctions = copy_functions; + pConv->converted = 0; + pConv->bConverted = 0; + if( (pData->flags & DT_FLAG_CONTIGUOUS) && (pData->size == (pData->ub - pData->lb)) ) + pConv->flags |= DT_FLAG_CONTIGUOUS; + pConv->fAdvance = convertor_unpack_homogeneous; + return 0; +} + +convertor_t* convertor_get_copy( convertor_t* pConvertor ) +{ + convertor_t* pConv = (convertor_t*)calloc( 1, sizeof(convertor_t) ); + MEMCPY( pConv, pConvertor, sizeof(convertor_t) ); + pConv->pStack = NULL; + pConv->pDesc = NULL; + pConv->count = 0; + pConv->converted = 0; + pConv->bConverted = 0; + pConv->freebuf = NULL; + return pConv; +} + +int convertor_destroy( convertor_t** ppConv ) +{ + if( (*ppConv) == NULL ) return 0; + if( (*ppConv)->pStack != NULL ) free( (*ppConv)->pStack ); + if( (*ppConv)->pDesc != NULL ) dt_decrease_ref( (*ppConv)->pDesc ); + if( (*ppConv)->freebuf != NULL ) free( (*ppConv)->freebuf ); + free( (*ppConv) ); + *ppConv = NULL; + return 0; +} + +/* Get the number of elements from the data associated with this convertor that can be + * retrieved from a recevied buffer with the size iSize. + * To spped-up this function you should use it with a iSize == to the modulo + * of the original size and the size of the data. + * This function should be called with a initialized clean convertor. + * Return value: + * positive = number of basic elements inside + * negative = some error occurs + */ +int dt_get_element_count( dt_desc_t* pData, size_t iSize ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int end_loop; /* last element in the actual loop */ + int type; /* type at current position */ + int rc, nbElems = 0; + int stack_pos = 0; + + DUMP( "dt_count_elements( %p, %d )\n", pData, iSize ); + pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP] + 2) ); + pStack->count = 1; + pStack->index = -1; + pStack->end_loop = pData->desc.used; + pStack->disp = 0; + pos_desc = 0; + + DUMP_STACK( pStack, stack_pos, pElems, "starting" ); + DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc ); + DUMP( "top stack info {index = %d, count = %d}\n", + pStack->index, pStack->count ); + + next_loop: + end_loop = pStack->end_loop; + while( stack_pos >= 0 ) { + if( pos_desc == end_loop ) { /* end of the current loop */ + while( --(pStack->count) == 0 ) { /* end of loop */ + stack_pos--; + pStack--; + if( stack_pos == -1 ) + return nbElems; /* completed */ + } + pos_desc = pStack->index; + if( pos_desc == -1 ) + pStack->disp += (pData->ub - pData->lb); + else + pStack->disp += pData->desc.desc[pos_desc].extent; + pos_desc++; + goto next_loop; + } + if( pData->desc.desc[pos_desc].type == DT_LOOP ) { + do { + PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count, + 0, pos_desc + pData->desc.desc[pos_desc].disp ); + pos_desc++; + } while( pData->desc.desc[pos_desc].type == DT_LOOP ); /* let's start another loop */ + DUMP_STACK( pStack, stack_pos, pData->desc, "advance loops" ); + goto next_loop; + } + /* now here we have a basic datatype */ + type = pData->desc.desc[pos_desc].type; + rc = pData->desc.desc[pos_desc].count * basicDatatypes[type].size; + if( rc >= iSize ) { + nbElems += iSize / basicDatatypes[type].size; + break; + } + nbElems += pData->desc.desc[pos_desc].count; + iSize -= rc; + + pos_desc++; /* advance to the next data */ + } + + /* cleanup the stack */ + return nbElems; +} + +int dt_copy_content_same_dt( dt_desc_t* pData, int count, + char* pDestBuf, char* pSrcBuf ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int type; /* type at current position */ + int stack_pos = 0; + long lastDisp = 0, lastLength = 0; + dt_elem_desc_t* pElems; + + if( (pData->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { + /* basic datatype with count */ + MEMCPY( pDestBuf, pSrcBuf, pData->size * count ); + return 0; + } + + pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP]+1) ); + pStack->count = count; + pStack->index = -1; + pStack->disp = 0; + pos_desc = 0; + + if( pData->opt_desc.desc != NULL ) { + pElems = pData->opt_desc.desc; + pStack->end_loop = pData->opt_desc.used; + } else { + pElems = pData->desc.desc; + pStack->end_loop = pData->desc.used; + } + + DUMP_STACK( pStack, stack_pos, pElems, "starting" ); + DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc ); + DUMP( "top stack info {index = %d, count = %d}\n", + pStack->index, pStack->count ); + + next_loop: + while( pos_desc <= pStack->end_loop ) { + if( pos_desc == pStack->end_loop ) { /* end of the current loop */ + if( --(pStack->count) == 0 ) { /* end of loop */ + pStack--; + if( --stack_pos == -1 ) break; + } else + pos_desc = pStack->index; + if( pos_desc == -1 ) + pStack->disp += (pData->ub - pData->lb); + else + pStack->disp += pElems[pos_desc].extent; + pos_desc++; + goto next_loop; + } + if( pElems[pos_desc].type == DT_LOOP ) { + do { + PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count, + pStack->disp, pos_desc + pElems[pos_desc].disp ); + pos_desc++; + } while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */ + DUMP_STACK( pStack, stack_pos, pElems, "advance loops" ); + goto next_loop; + } + /* now here we have a basic datatype */ + type = pElems[pos_desc].type; + if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) { + lastLength += pElems[pos_desc].count * basicDatatypes[type].size; + } else { + MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength ); + lastDisp = pStack->disp + pElems[pos_desc].disp; + lastLength = pElems[pos_desc].count * basicDatatypes[type].size; + } + pos_desc++; /* advance to the next data */ + } + + MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength ); + /* cleanup the stack */ + return 0; +}