1
1
This commit was SVN r918.
Этот коммит содержится в:
George Bosilca 2004-03-18 18:06:33 +00:00
родитель cd6be89d0d
Коммит 5b0c135466
17 изменённых файлов: 3004 добавлений и 661 удалений

Просмотреть файл

@ -9,19 +9,13 @@ noinst_LTLIBRARIES = libdatatype.la
# Source code files
headers = \
datatype.h
headers = datatype.h datatype_internal.h
libdatatype_la_SOURCES = \
$(headers) \
datatype.c \
datatype_copy.c \
datatype_crc32.c \
datatype_create.c \
datatype_delete.c \
datatype_memcpy.c \
datatype_pack.c \
datatype_sum32.c
dt_add.c dt_create.c dt_create_array.c dt_create_dup.c dt_create_indexed.c \
dt_create_struct.c dt_create_vector.c dt_destroy.c dt_module.c dt_old_limits.c \
dt_optimize.c dt_pack.c dt_unpack.c
# Conditionally install the header files

Просмотреть файл

@ -1,664 +1,263 @@
/*
* $HEADER$
*/
/* -*- Mode: C; c-basic-offset:3 ; -*- */
/** @file
*
* lam_datatype_t interface for LAM internal data type representation
*
* lam_datatype_t is a class which represents contiguous or
* non-contiguous datat together with constituent type-related
* information. It is the LAM's-eye view of MPI_Datatype.
*/
#ifndef LAM_DATATYPE_H_INCLUDED
#define LAM_DATATYPE_H_INCLUDED 1
#include <assert.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <string.h>
#include <stdlib.h>
#ifndef DATATYPE_H_HAS_BEEN_INCLUDED
#define DATATYPE_H_HAS_BEEN_INCLUDED
#include "lam_config.h"
#include "constants.h"
#include "lfc/lam_object.h"
#include "lfc/lam_hash_table.h"
#include "types.h"
#include "mpi.h"
/* fortran sizes and alignments ***************************************/
extern int lam_sizeof_f77_integer;
extern int lam_sizeof_f77_real;
extern int lam_sizeof_f77_dblprec;
extern int lam_sizeof_f77_complex;
extern int lam_sizeof_f77_dblcomplex;
extern int lam_alignment_f77_integer;
extern int lam_alignment_f77_real;
extern int lam_alignment_f77_dblprec;
extern int lam_alignment_f77_complex;
extern int lam_alignment_f77_dblcomplex;
/* enums **************************************************************/
/**
* Datatype state flags
#define DT_LOOP 0x00
#define DT_LB 0x01
#define DT_UB 0x02
#define DT_SPACE 0x03
#define DT_CHAR 0x04
#define DT_BYTE 0x05
#define DT_SHORT 0x06
#define DT_INT 0x07
#define DT_FLOAT 0x08
#define DT_LONG 0x09
#define DT_DOUBLE 0x0A
#define DT_LONG_LONG 0x0B
#define DT_LONG_DOUBLE 0x0C
#define DT_COMPLEX_FLOAT 0x0D
#define DT_COMPLEX_DOUBLE 0x0E
#define DT_END_LOOP 0x0F
/* if there are more basic datatypes than the number of bytes in the int type
* the bdt_used field of the data description struct should be changed to long.
*/
enum lam_datatype_state_t {
LAM_DATATYPE_STATE_COMMITTED = 1 << 0,
LAM_DATATYPE_STATE_CONTIGUOUS = 1 << 1,
LAM_DATATYPE_STATE_FORTRAN = 1 << 2,
LAM_DATATYPE_STATE_OPTIMIZED = 1 << 3,
LAM_DATATYPE_STATE_DONT_OPTIMIZE = 1 << 4,
LAM_DATATYPE_STATE_XDR = 1 << 5,
/* etc. */
#define DT_MAX_PREDEFINED 0x10
/* flags for the datatypes. */
#define DT_FLAG_DESTROYED 0x0001 /* user destroyed but some other layers still have a reference */
#define DT_FLAG_COMMITED 0x0002 /* ready to be used for a send/recv operation */
#define DT_FLAG_CONTIGUOUS 0x0004 /* contiguous datatype */
#define DT_FLAG_OVERLAP 0x0008 /* datatype is unpropper for a recv operation */
#define DT_FLAG_USER_LB 0x0010 /* has a user defined LB */
#define DT_FLAG_USER_UB 0x0020 /* has a user defined UB */
#define DT_FLAG_FOREVER 0x0040 /* cannot be removed: initial and predefined datatypes */
#define DT_FLAG_IN_LOOP 0x0080 /* we are inside a loop */
#define DT_FLAG_INITIAL 0x0100 /* one of the initial datatype */
#define DT_FLAG_DATA 0x0200 /* data or control structure */
#define DT_FLAG_BASIC (DT_FLAG_INITIAL | DT_FLAG_COMMITED | DT_FLAG_FOREVER | DT_FLAG_CONTIGUOUS)
#define DT_INCREASE_STACK 32
/* the basic element. A data description is composed
* by a set of basic elements.
*/
typedef struct __dt_elem_desc {
unsigned short flags; /* flags for the record */
unsigned short type; /* the basic data type id */
unsigned int count; /* number of elements */
long disp; /* displacement of the first element */
unsigned int extent; /* extent of each element */
} dt_elem_desc_t;
typedef struct {
float r;
float i;
} complex_float_t;
typedef struct {
double r;
double i;
} complex_double_t;
/* The basic memory zone description. The idea is to be able to represent the
* data as a array of zones, thus allowing us to simply find when concatenating
* several data leads to merging contiguous zones of memory.
*/
typedef struct __dt_zone_desc {
int useless;
} dt_zone_desc_t;
typedef struct __dt_struct_desc {
int length; /* the maximum number of elements in the description array */
int used; /* the number of used elements in the description array */
dt_elem_desc_t* desc;
} dt_type_desc_t;
/* the data description.
*/
typedef struct __dt_desc {
lam_object_t super;
unsigned int size; /* total size in bytes of the memory used by the data if
* the data is put on a contiguous buffer */
long true_lb;
long true_ub; /* the true ub of the data without user defined lb and ub */
unsigned int align; /* data should be aligned to */
long lb; /* lower bound in memory */
long ub; /* upper bound in memory */
unsigned short flags; /* the flags */
unsigned short id; /* data id, normally the index in the data array. */
unsigned int nbElems; /* total number of elements inside the datatype */
unsigned int bdt_used; /* which basic datatypes are used in the data description */
/* Attribute fields */
lam_hash_table_t *keyhash;
char name[MPI_MAX_OBJECT_NAME];
dt_type_desc_t desc; /* the data description */
dt_type_desc_t opt_desc; /* short description of the data used when conversion is useless
* or in the send case (without conversion) */
void* args; /* data description for the user */
/* basic elements count used to compute the size of the datatype for
* remote nodes */
unsigned int btypes[DT_MAX_PREDEFINED];
} dt_desc_t, lam_datatype_t;
OBJ_CLASS_DECLARATION( lam_datatype_t );
extern dt_desc_t basicDatatypes[];
#if defined(__GNUC__)
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a); })
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
#else
static long LMAX( long a, long b ) { return ( a < b ? b : a ); }
static long LMIN( long a, long b ) { return ( a < b ? a : b ); }
static int IMAX( int a, int b ) { return ( a < b ? b : a ); }
static int IMIN( int a, int b ) { return ( a < b ? a : b ); }
#endif /* __GNU__ */
typedef struct __dt_stack {
int index;
int count;
int end_loop;
long disp;
} dt_stack_t;
typedef struct __dt_convert {
char* buf;
unsigned int length;
dt_stack_t* pStack;
dt_desc_t* pDesc;
} dt_convert_t;
int dt_load( void );
int dt_unload( void );
dt_desc_t* dt_create( int expectedSize );
int dt_commit( dt_desc_t** );
#define dt_free dt_destroy
int dt_free( dt_desc_t** );
int dt_destroy( dt_desc_t** );
void dt_dump( dt_desc_t* pData );
void dt_dump_complete( dt_desc_t* pData );
/* data creation functions */
int dt_duplicate( dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_contiguous( size_t count, dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_vector( size_t count, int bLength, long stride,
dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_hvector( size_t count, int bLength, long stride,
dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_indexed( size_t count, int* pBlockLength, int* pDisp,
dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_hindexed( size_t count, int* pBlockLength, long* pDisp,
dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_indexed_block( size_t count, int bLength, int* pDisp,
dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_struct( size_t count, size_t* pBlockLength, long* pDisp,
dt_desc_t** pTypes, dt_desc_t** newType );
int dt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType );
int dt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
int order, dt_desc_t* oldType, dt_desc_t** newType );
int dt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib,
int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType,
dt_desc_t** newType );
int dt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent );
int dt_type_lb( dt_desc_t* pData, long* disp );
int dt_type_ub( dt_desc_t* pData, long* disp );
int dt_type_size ( dt_desc_t* pData, int *size );
int dt_type_extent( dt_desc_t* pData, long* extent );
int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew );
int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent);
int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent);
int dt_get_element_count( dt_desc_t* datatype, size_t iSize );
int dt_copy_content_same_dt( dt_desc_t* pData, int count, char* pDestBuf, char* pSrcBuf );
#define dt_increase_ref(PDT) OBJ_RETAIN( PDT )
#define dt_decrease_ref(PDT) OBJ_RELEASE( PDT )
int dt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDesc );
#define REMOVE_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) ^ (FLAG)
#define SET_FLAG( INT_VALUE, FLAG ) (INT_VALUE) = (INT_VALUE) | (FLAG)
#define UNSET_FLAG( INT_VALUE, FLAG) (INT_VALUE) = (INT_VALUE) & (~(FLAG))
#define REMOVE_CONTIGUOUS_FLAG( INT_VALUE ) REMOVE_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) SET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) UNSET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
/* flags for the datatypes */
typedef int (*conversion_fct_t)( unsigned int count,
void* from, unsigned int from_len, long from_extent,
void* to, unsigned int in_length, long to_extent,
unsigned int* used );
/* keep the last 16 bits free for data flags */
#define CONVERTOR_USELESS 0x00010000
#define CONVERTOR_RECV 0x00020000
#define CONVERTOR_SEND 0x00040000
#define CONVERTOR_STATE_MASK 0xFF000000
#define CONVERTOR_STATE_START 0x01000000
#define CONVEROTR_STATE_COMPLETE 0x02000000
#define CONVERTOR_STATE_ALLOC 0x04000000
typedef struct __struct_convertor convertor_t;
typedef int (*convertor_advance_fct_t)( convertor_t* pConvertor,
struct iovec* pInputv,
unsigned int inputCount );
/* and now the convertor stuff */
struct __struct_convertor {
dt_desc_t* pDesc;
long remoteArch;
dt_stack_t* pStack;
/* the convertor functions pointer */
/* the local stack for the actual conversion */
int converted; /* the number of already converted elements */
int bConverted; /* the size of already converted elements in bytes */
unsigned int flags;
unsigned int count;
unsigned int stack_pos;
char* pBaseBuf;
unsigned int available_space;
void* freebuf;
convertor_advance_fct_t fAdvance;
conversion_fct_t* pFunctions;
};
extern conversion_fct_t copy_functions[DT_MAX_PREDEFINED];
/* some convertor flags */
#define convertor_progress( PCONV, IOVEC, COUNT ) \
(PCONV)->fAdvance( (PCONV), (IOVEC), (COUNT) );
/* and finally the convertor functions */
convertor_t* convertor_create( int remote_arch, int mode );
int convertor_init_for_send( convertor_t* pConv, unsigned int flags,
dt_desc_t* pData, int count, void* pUserBuf );
int convertor_init_for_recv( convertor_t* pConv, unsigned int flags,
dt_desc_t* pData, int count, void* pUserBuf );
convertor_t* convertor_get_copy( convertor_t* pConvertor );
int convertor_need_buffers( convertor_t* pConvertor );
int convertor_pack( convertor_t* pConv, struct iovec* in, unsigned int in_size );
int convertor_unpack( convertor_t* pConv, struct iovec* out, unsigned int out_size );
int convertor_destroy( convertor_t** ppConv );
int convertor_get_packed_size( convertor_t* pConv, unsigned int* pSize );
int convertor_get_unpacked_size( convertor_t* pConv, unsigned int* pSize );
#endif /* DATATYPE_H_HAS_BEEN_INCLUDED */
enum {
LAM_DATATYPE_PACK = 0,
LAM_DATATYPE_UNPACK,
LAM_DATATYPE_PACK_COMPLETE = 0,
LAM_DATATYPE_PACK_INCOMPLETE,
TYPE_PACK_INCOMPLETE_VECTOR,
TYPE_PACK_INCOMPLETE_DATAVEC_REPEAT,
TYPE_PACK_INCOMPLETE_DATAVEC_ELEMENT,
TYPE_PACK_ERROR = -1
};
/**
* Enumeration of datatype creation functions
*/
enum lam_datatype_kind_t {
LAM_DATATYPE_KIND_BASIC = 0,
LAM_DATATYPE_KIND_CONTIG,
LAM_DATATYPE_KIND_DUP,
LAM_DATATYPE_KIND_HINDEXED,
LAM_DATATYPE_KIND_HVECTOR,
LAM_DATATYPE_KIND_INDEXED,
LAM_DATATYPE_KIND_LB,
LAM_DATATYPE_KIND_PACKED,
LAM_DATATYPE_KIND_STRUCT,
LAM_DATATYPE_KIND_UB,
LAM_DATATYPE_KIND_VECTOR,
LAM_DATATYPE_KIND_CONTIG_FORTRAN,
LAM_DATATYPE_KIND_HINDEXED_FORTRAN,
LAM_DATATYPE_KIND_HVECTOR_FORTRAN,
LAM_DATATYPE_KIND_INDEXED_FORTRAN,
LAM_DATATYPE_KIND_STRUCT_FORTRAN,
LAM_DATATYPE_KIND_VECTOR_FORTRAN
};
typedef enum lam_datatype_state_t lam_datatype_state_t;
typedef enum lam_datatype_kind_t lam_datatype_kind_t;
/* types **************************************************************/
typedef struct lam_datatype_t lam_datatype_t;
typedef struct lam_datavec_element_t lam_datavec_element_t;
typedef struct lam_datavec_t lam_datavec_t;
typedef struct lam_dataxdr_t lam_dataxdr_t;
typedef struct lam_pack_state_t lam_pack_state_t;
typedef struct lam_memcpy_state_t lam_memcpy_state_t;
/**
* Function prototype for a generalized memcpy()
*/
typedef void *(lam_memcpy_fn_t) (void *restrict dst,
const void *restrict src,
size_t size, lam_memcpy_state_t *check);
/**
* Internal representation of MPI datatype
*/
struct lam_datatype_t {
lam_object_t super; /**< object super class */
char name[MPI_MAX_OBJECT_NAME]; /**< object name */
int flags; /**< bit flags */
/* Attributes */
lam_hash_table_t *keyhash;
/* cached information */
ssize_t lower_bound;
size_t extent;
size_t packed_size; /**< size in bytes, ignoring gaps */
int nbasic; /**< number of basic elements */
/* optimized representation */
size_t datavec_size; /**< size of optimized representation */
lam_datavec_t *datavec; /**< optimized representation (may be null) */
/* XDR representation */
size_t dataxdr_size; /**< size of XDR representation */
lam_dataxdr_t *dataxdr; /**< XDR representation (may be null) */
/* full representation (c.f. MPI_Type_create_struct) */
struct {
lam_datatype_kind_t c_kind; /**< creation function */
int c_count; /**< number of blocks */
int *c_blocklengths; /**< number of elements in each block */
MPI_Aint *c_offset; /**< stride/displacement as appropriate */
lam_datatype_t **c_types; /**< array of types (array) */
} creator;
};
OBJ_CLASS_DECLARATION(lam_datatype_t);
/**
* An optimized representation of noncontiguous data used by packing
* routines
*/
struct lam_datavec_t {
size_t nrepeat;
ssize_t repeat_offset;
size_t nelement;
lam_datavec_element_t *element;
};
/**
* An element of a data type in optimized form
*/
struct lam_datavec_element_t {
size_t size; /**< size in bytes of element */
ssize_t offset; /**< offset from start of data type */
ssize_t seq_offset; /**< offset from start of packed data type */
};
/**
* XDR representation of a datatype
*/
struct lam_dataxdr_element_t {
/* to be done */
void *xdrs; /**< XDR stream */
};
/**
* State of incremental memcpy with checksum or CRC
*/
struct lam_memcpy_state_t {
size_t size; /**< total size in bytes of the object being checksummed / CRCed */
size_t partial_size; /**< size of non- uint32_t to be carried over to next call */
uint32_t partial_int; /**< value of non- uint32_t to be carried over to next call */
uint32_t sum; /**< current value of the CRC or checksum */
bool first_call; /**< is this the first call for this checksum/CRC? */
};
/**
* Pack state
*
* Structure to store the state of an incremental pack/unpack of a
* datatype.
*/
struct lam_pack_state_t {
size_t type_index; /**< current index of datatype */
size_t repeat_index; /**< current index of datavec repeat */
size_t element_index; /**< current index of datavec element */
size_t datavec_offset; /**< current offset into datavec element */
size_t packed_offset; /**< current offset into packed buffer */
};
/* interface **********************************************************/
BEGIN_C_DECLS
/**
* Test 32-bit alignment of an address
*
* @param address An address
* @return true if the address is 32-bit aligned
*/
static inline bool lam_aligned32(void *addr)
{
if (((uintptr_t) addr & (uintptr_t) 3) == (uintptr_t) 0) {
return true;
} else {
return false;
}
}
/**
* Test 64-bit alignment of an address
*
* @param address An address
* @return true if the address is 64-bit aligned
*/
static inline bool lam_aligned64(void *addr)
{
if (((uintptr_t) addr & (uintptr_t) 7) == (uintptr_t) 0) {
return true;
} else {
return false;
}
}
/**
* Return a 32-bit checksum of (the contents of) an array of data
* types
*
* @param addr Data type array
* @param count Size of array
* @param datatype Datatype descriptor
* @return Checksum
*/
uint32_t lam_datatype_sum32(const void *addr,
size_t count,
lam_datatype_t *datatype);
/**
* Copy (the contents of) an array of data types
*
* @param dst Output data type array
* @param src Input data type array
* @param count Size of array
* @param datatype Datatype descriptor
* @param check Pointer to checksum or CRC
* @return 0 on success, -1 on error
*/
int lam_datatype_copy(void *dst,
const void *src,
size_t count,
lam_datatype_t *datatype,
lam_memcpy_fn_t *memcpy_fn,
lam_memcpy_state_t *check);
/**
* Copy (the contents of) an array of data types, and convert to
* another datatype
*
* @param dst Output data type array
* @param dst_count Size of output array
* @param dst_datatype Output datatype descriptor
* @param src Input data type array
* @param src_count Size of input array
* @param src_datatype Input datatype descriptor
* @param checksum Checksum
* @return 0 on success, -1 on error
*/
int lam_datatype_convert(void *dst,
lam_datatype_t *dst_datatype,
size_t dst_count,
const void *src,
lam_datatype_t *src_datatype,
size_t src_count,
lam_memcpy_fn_t *memcpy_fn,
lam_memcpy_state_t *check);
/**
* Initialize pack state structure
*
* @param state Pointer to state structure
* @return LAM return code
*/
static inline int lam_pack_state_init(lam_pack_state_t *state)
{
assert(state);
state->type_index = 0;
state->repeat_index = 0;
state->element_index = 0;
state->datavec_offset = 0;
state->packed_offset = 0;
}
/**
* Incrementally pack or unpack a buffer to/from an array of
* datatypes.
*
* DO NOT USE THIS FUNCTION DIRECTLY: lam_datatype_pack or
* lam_datatype_unpack instead.
*
* @param direction 0 for pack , non-zero for unpack
* @param state current state of the incremental pack/unpack
* @param typebuf array of types
* @param ntype size of type array
* @param buf buffer to pack into/unpack from
* @param bufsize size of buffer
* @param datatype type descriptor
* @param memcpy_fn pointer to memcpy function
* @param check pointer to checksum
* @return 0 complete, non-zero otherwise
*
* Incrementally copy data type arrays to/from a packed buffer by
* iterating over the type and type_map until we finish or run out of
* room.
*
* The state (all members) should be initialized to 0 before the first
* call.
*/
int lam_datatype_packer(lam_pack_state_t *state,
void *buf,
size_t bufsize,
void *typebuf,
size_t ntype,
lam_datatype_t *datatype,
lam_memcpy_fn_t *memcpy_fn,
lam_memcpy_state_t *check,
int pack_direction);
/**
* Incrementally pack a buffer from an array of datatypes.
*
* The arguments for this function are the same as for
* lam_datatype_packer except that the last argument (pack_direction)
* is not required.
*/
static inline int lam_datatype_pack(lam_pack_state_t *state,
void *buf,
size_t bufsize,
const void *typebuf,
size_t ntype,
lam_datatype_t *datatype,
lam_memcpy_fn_t *memcpy_fn,
lam_memcpy_state_t *check)
{
return lam_datatype_packer(state, buf, bufsize, (void *) typebuf,
ntype, datatype, memcpy_fn, check,
LAM_DATATYPE_PACK);
}
/**
* Incrementally unpack a buffer to an array of datatypes.
*
* The arguments for this function are the same as for
* lam_datatype_packer except that the last argument (pack_direction)
* is not required.
*/
static inline int lam_datatype_unpack(lam_pack_state_t *state,
const void *buf,
size_t bufsize,
void *typebuf,
size_t ntype,
lam_datatype_t *datatype,
lam_memcpy_fn_t *memcpy_fn,
lam_memcpy_state_t *check)
{
return lam_datatype_packer(state, (void *) buf, bufsize, typebuf,
ntype, datatype, memcpy_fn, check,
LAM_DATATYPE_UNPACK);
}
/**
* Incrementally generate an iovec for gathering from an array of
* datatypes
*
* @param state current state of the incremental pack/unpack
* @param base_addr base address for iovec offsets
* @param vec iovec buffer
* @param vec_count maximum length of iovec buffer
* @param max_bytes maximum bytes addressed by iovec
* @param buf buffer to pack into/unpack from
* @param bufsize size of buffer
* @param typebuf array of types
* @param ntype size of type array
* @param type type descriptor
* @return 0 if complete, non-zero otherwise
*
* Incrementally traverse an array of datatypes and generate an iovec
* of at most length vec_count and addressing at most max_bytes. This
* can be used to do a (partial) RDMA gather of the datatype array.
*
* The state (all members) should be initialized to 0 before the first
* call.
*/
int lam_datatype_gather_iovec(lam_pack_state_t *state,
void *base_addr,
struct iovec *vec,
size_t vec_count,
size_t max_bytes,
const void *typebuf,
size_t ntype,
lam_datatype_t *datatype,
lam_memcpy_fn_t *memcpy_fn,
lam_memcpy_state_t *);
/**
* Incrementally generate an iovec for scattering from a packed array
* of datatypes
*
* @param state current state of the incremental pack/unpack
* @param base_addr base address for iovec offsets
* @param vec iovec buffer
* @param vec_count maximum length of iovec buffer
* @param max_bytes maximum bytes addressed by iovec
* @param buf packed buffer
* @param bufsize size of buffer
* @param typebuf array of types
* @param ntype size of type array
* @param type type descriptor
* @return 0 if complete, non-zero otherwise
*
* Incrementally copy data type arrays to/from a packed buffer. by
* iterating over the type and type_map until we finish or run out of
* room.
*
* Incrementally traverse a packed array of datatypes and generate an
* iovec of at most length vec_count and addressing at most max_bytes.
* This can be used to do a (partial) RDMA scatter of the datatype
* array.
*
* The state (all members) should be initialized to 0 before the first
* call.
*/
int lam_datatype_scatter_iovec(lam_pack_state_t *state,
void *base_addr,
struct iovec *vec,
size_t vec_count,
size_t max_bytes,
const void *buf,
size_t bufsize,
lam_datatype_t *datatype,
lam_memcpy_fn_t *memcpy_fn,
lam_memcpy_state_t *check);
/*
* incremental memcpy with checksum / CRC functions
*/
/**
* initialize the state for an incremental memcpy with checksum / CRC
*
* @param state pointer to state object for the current sequence of copies
* @param sum_size the length of the entire buffer to be checksummed
*/
static inline void
lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size)
{
state->size = sum_size;
state->first_call = true;
}
/**
* Copy data from one buffer to another
*
* @param dst pointer to the destination buffer
* @param src pointer to the source buffer
* @param size size of the buffer
* @param check unused
* @return the original value of dst
*/
static inline void *lam_memcpy(void *dst, const void *src, size_t size,
void *check)
{
return memcpy(dst, src, size);
}
/**
* An alternative version of memcpy that may out-perform the system
* version on some (silly) systems.
*
* @param dst pointer to the destination buffer
* @param src pointer to the source buffer
* @param size size of the buffer
* @param state unused
* @return the original value of dst
*/
void *lam_memcpy_alt(void *dst, const void *src, size_t size,
lam_memcpy_state_t *state);
/**
* Generate a 32-bit CRC for a buffer
*
* @param buffer Data buffer
* @param size Size of buffer
* @param initial_crc Initial value of the CRC register
* @return The CRC
*
* Generate a 32-bit for a data buffer starting from a given CRC
* value.
*/
uint32_t lam_crc32(const void *buffer, size_t size,
uint32_t initial_crc);
/**
* Generate a 32-bit checksum for a buffer
*
* @param buffer Data buffer
* @param size Size of buffer
* @return The CRC
*
* Generate a 32-bit for a data buffer starting from a given CRC
* value.
*/
uint32_t lam_sum32(const void *buffer, size_t size);
/**
* Copy data from one buffer to another and calculate a 32-bit CRC
*
* @param dst pointer to the destination buffer
* @param src pointer to the source buffer
* @param size size of the buffer
* @param state pointer to a memcpy with checksum/CRC state structure
* @return the original value of dst
*
* This handles cumulative CRCs for for arbitrary lengths and address
* alignments as best as it can. The initial contents of state->sum is
* used as the starting value of the CRC. The final CRC is placed
* back in state->sum.
*/
void *lam_memcpy_crc32(void *dst,
const void *src,
size_t size,
lam_memcpy_state_t *check);
/**
* Copy data from one buffer to another and calculate a 32-bit checksum
*
* @param dst pointer to the destination buffer
* @param src pointer to the source buffer
* @param size size of the buffer
* @param state pointer to a memcpy with checksum/CRC state structure
* @return the original value of dst
*
* This handles cumulative checksumming for arbitrary lengths and
* address alignments as best as it can; the contents of
* lastPartialLong and lastPartialLength are updated to reflected the
* last partial word's value and length (in bytes) -- this should
* allow proper handling of checksumming contiguous or noncontiguous
* buffers via multiple calls of bcopy_csum() - Mitch
*/
void *lam_memcpy_sum32(void *dst,
const void *src,
size_t size,
lam_memcpy_state_t *check);
/**
* Copy data from one buffer to another and calculate a 32-bit checksum
*
* @param dst pointer to the destination buffer
* @param src pointer to the source buffer
* @param size size of the buffer
* @param state pointer to a memcpy with checksum/CRC state structure
* @return the original value of dst
*/
void *lam_memcpy_sum64(void *dst,
const void *src,
size_t size,
lam_memcpy_state_t *check);
/**
* Create a LAM/MPI datatype
*
* @param combiner integer identifying the kind of MPI create function
* @param ninteger number of integers passed to the create function
* @param integer array of integers passed to the create function
* @param naddress number of addresses passed to the create function
* @param address array of addresses passed to the create function
* @param ntype number of data types passed to the create function
* @param type array of data types passed to the create function
* @param newtype pointer to address of new type
* @return LAM_SUCCESS on successful creation, LAM_ERROR otherwise
*
* This is the central location for creation of data types in LAM/MPI.
* All MPI_Type_create functions rely upon this to do the actual type
* creation.
*/
int lam_datatype_create(int combiner,
int nintegers,
int integers[],
int naddresses,
ssize_t addresses[],
int ntypes,
lam_datatype_t *types[], lam_datatype_t **newtype);
/**
* Delete a LAM/MPI datatype (actually, just mark it for deletion)
*
* @param type datatype
* @return LAM_SUCCESS on success, LAM_ERROR otherwise
*
* This is the central location for creation of data types in LAM/MPI.
* All MPI_Type_create functions rely upon this to do the actual type
* creation.
*/
int lam_datatype_delete(lam_datatype_t *type);
END_C_DECLS
#endif /* LAM_DATATYPE_H_INCLUDED */

79
src/datatype/datatype_internal.h Обычный файл
Просмотреть файл

@ -0,0 +1,79 @@
#ifndef DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED
#define DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED
#if defined(VERBOSE)
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) \
dump_stack( (PSTACK), (STACK_POS), (PDESC), (NAME) )
# if defined(__GNUC__)
# define DUMP(ARGS...) printf(ARGS)
# else
# if defined(ACCEPT_C99)
# define DUMP( ARGS... ) printf(__VA_ARGS__)
# else
# define DUMP printf
# endif /* ACCEPT_C99 */
# endif /* __GNUC__ */
#else
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME )
# if defined(__GNUC__)
# define DUMP(ARGS...)
# else
# if defined(ACCEPT_C99)
# define DUMP(ARGS...)
# else
static void DUMP() { /* empty hopefully removed by the compiler */ }
# endif /* ACCEPT_C99 */
# endif /* __GNUC__ */
#endif /* VERBOSE */
extern void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name );
#define SAVE_STACK( PSTACK, INDEX, COUNT, DISP, END_LOOP) \
do { \
(PSTACK)->index = (INDEX); \
(PSTACK)->count = (COUNT); \
(PSTACK)->disp = (DISP); \
(PSTACK)->end_loop = (END_LOOP); \
} while(0)
#define PUSH_STACK( PSTACK, STACK_POS, INDEX, COUNT, DISP, END_LOOP) \
do { \
dt_stack_t* pTempStack = (PSTACK) + 1; \
SAVE_STACK( pTempStack, (INDEX), (COUNT), (DISP), (END_LOOP) ); \
(STACK_POS)++; \
(PSTACK) = pTempStack; \
} while(0)
#define MEMCPY( DST, SRC, BLENGTH ) memcpy( (DST), (SRC), (BLENGTH) )
#ifdef USELESS
#define MEMCPY_LIMIT 1
#define MEMCPY( DST, SRC, BLENGTH ) \
do { \
if( (BLENGTH) < (MEMCPY_LIMIT) ) { \
long mask = sizeof(int) - 1; \
char *dst = (char*)(DST), *src = (char*)(SRC); \
int i; \
if( ((long)(DST) & mask) == ((long)(SRC) & mask) ) { \
int *idst = (int*)((long)(DST) & (~mask)); \
int *isrc = (int*)((long)(SRC) & (~mask)); \
for( i = 0; i < ((long)(DST) & mask); i++ ) { \
*dst = *src; dst++; src++; \
} \
if( ((char*)idst) != dst ) { \
idst++; isrc++; \
} \
for( i = 0; i < ((BLENGTH) >> 2); i++ ) { \
*idst = *isrc; idst++; isrc++; \
} \
} else { \
for( i = 0; i < (BLENGTH); i++ ) { \
*dst = *src; dst++; src++; \
} \
} \
} else \
memcpy( (DST), (SRC), (BLENGTH) ); \
} while(0)
#endif /* USELESS */
#endif /* DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED */

603
src/datatype/ddt_test.c Обычный файл
Просмотреть файл

@ -0,0 +1,603 @@
#include "ddt.h"
#include <time.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#define TIMER_DATA_TYPE struct timeval
#define GET_TIME(TV) gettimeofday( &(TV), NULL )
#define ELAPSED_TIME(TSTART, TEND) (((TEND).tv_sec - (TSTART).tv_sec) * 1000000 + ((TEND).tv_usec - (TSTART).tv_usec))
int mpich_typeub( void )
{
int errs = 0;
long extent, lb, extent1, extent2, extent3;
long displ[2];
int blens[2];
dt_desc_t *type1, *type2, *type3, *types[2];
dt_create_vector( 2, 1, 4, &(basicDatatypes[DT_INT]), &type1 );
dt_commit( &type1 );
dt_get_extent( type1, &lb, &extent );
extent1 = 5 * sizeof(int);
if (extent != extent1) {
printf("EXTENT 1 %ld != %ld\n",extent,extent1);
errs++;
printf("extent(type1)=%ld\n",(long)extent);
}
blens[0]=1;
blens[1]=1;
displ[0]=0;
displ[1]=sizeof(int)*4;
types[0]=type1;
types[1]=&(basicDatatypes[DT_UB]);
extent2 = displ[1];
/* using MPI_UB and Type_struct, monkey with the extent, making it 16
*/
dt_create_struct( 2, blens, displ, types, &type2 );
dt_commit( &type2 );
dt_get_extent( type2, &lb, &extent );
if (extent != extent2) {
printf("EXTENT 2 %ld != %ld\n",extent,extent2);
errs++;
printf("extent(type2)=%ld\n",(long)extent);
}
/* monkey with the extent again, making it 4
* ===> MPICH gives 4
* ===> MPIF gives 16, the old extent
*/
displ[1]=sizeof(int);
types[0]=type2;
types[1]=&(basicDatatypes[DT_UB]);
extent3 = extent2;
dt_create_struct( 2, blens, displ, types, &type3 );
dt_commit( &type3 );
dt_get_extent( type3, &lb, &extent );
if (extent != extent3) {
printf("EXTENT 3 %ld != %ld\n",extent,extent3);
errs++;
printf("extent(type3)=%ld\n",(long)extent);
}
dt_free( &type1 );
dt_free( &type2 );
dt_free( &type3 );
return errs;
}
int mpich_typeub2( void )
{
int blocklen[3], err = 0, sz1, sz2, sz3;
long disp[3], lb, ub, ex1, ex2, ex3;
dt_desc_t *types[3], *dt1, *dt2, *dt3;
blocklen[0] = 1;
blocklen[1] = 1;
blocklen[2] = 1;
disp[0] = -3;
disp[1] = 0;
disp[2] = 6;
types[0] = &(basicDatatypes[DT_LB]);
types[1] = &(basicDatatypes[DT_INT]);
types[2] = &(basicDatatypes[DT_UB]);
dt_create_struct(3,blocklen,disp, types,&dt1);
dt_commit(&dt1);
dt_type_lb(dt1, &lb); dt_type_ub(dt1, &ub);
dt_type_extent(dt1,&ex1); dt_type_size(dt1,&sz1);
/* Values should be lb = -3, ub = 6 extent 9; size depends on implementation */
if (lb != -3 || ub != 6 || ex1 != 9) {
printf("Example 3.26 type1 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex1, sz1);
err++;
}
else
printf("Example 3.26 type1 correct\n" );
dt_create_contiguous(2,dt1,&dt2);
dt_type_lb(dt2, &lb); dt_type_ub(dt2, &ub);
dt_type_extent(dt2,&ex2); dt_type_size(dt2,&sz2);
/* Values should be lb = -3, ub = 15, extent = 18, size depends on implementation */
if (lb != -3 || ub != 15 || ex2 != 18) {
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8);
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex2, sz2);
err++;
}
else
printf("Example 3.26 type1 correct\n" );
dt_create_contiguous(2,dt1,&dt2);
dt_type_lb(dt2, &lb); dt_type_ub(dt2, &ub);
dt_type_extent(dt2,&ex2); dt_type_size(dt2,&sz2);
/* Values should be lb = -3, ub = 15, extent = 18, size depends on implementation */
if (lb != -3 || ub != 15 || ex2 != 18) {
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8);
printf("Example 3.26 type2 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex2, sz2);
err++;
}
else
printf( "Example 3.26 type2 correct\n" );
types[0]=dt1; types[1]=dt1;
blocklen[0]=1; blocklen[1]=1;
disp[0]=0; disp[1]=ex1;
dt_create_struct(2, blocklen, disp, types, &dt3);
dt_commit(&dt3);
dt_type_lb(dt3, &lb); dt_type_ub(dt3, &ub);
dt_type_extent(dt3,&ex3); dt_type_size(dt3,&sz3);
/* Another way to express type2 */
if (lb != -3 || ub != 15 || ex3 != 18) {
printf("type3 lb %d ub %d extent %d size %d\n", (int)-3, (int)15, (int)18, 8);
printf("type3 lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex3, sz2);
err++;
}
else
printf( "type3 correct\n" );
dt_free( &dt1 );
dt_free( &dt2 );
dt_free( &dt3 );
return err;
}
int mpich_typeub3( void )
{
int blocklen[2], sz, err = 0, idisp[3];
long disp[3], lb, ub, ex;
dt_desc_t *types[3], *dt1, *dt2, *dt3, *dt4, *dt5;
/* Create a datatype with explicit LB and UB */
blocklen[0] = 1;
blocklen[1] = 1;
blocklen[2] = 1;
disp[0] = -3;
disp[1] = 0;
disp[2] = 6;
types[0] = &(basicDatatypes[DT_LB]);
types[1] = &(basicDatatypes[DT_INT]);
types[2] = &(basicDatatypes[DT_UB]);
/* Generate samples for contiguous, hindexed, hvector, indexed, and vector (struct and contiguous tested in typeub2) */
dt_create_struct(3,blocklen,disp, types,&dt1);
dt_commit(&dt1);
/* This type is the same as in typeub2, and is tested there */
types[0]=dt1; types[1]=dt1;
blocklen[0]=1; blocklen[1]=1;
disp[0]=-4; disp[1]=7;
idisp[0]=-4; idisp[1]=7;
dt_create_hindexed( 2, blocklen, disp, dt1, &dt2 );
dt_commit( &dt2 );
dt_type_lb( dt2, &lb ); dt_type_ub( dt2, &ub );
dt_type_extent( dt2, &ex ); dt_type_size( dt2, &sz );
if (lb != -7 || ub != 13 || ex != 20) {
printf("hindexed lb %d ub %d extent %d size %d\n", (int)-7, (int)13, (int)20, sz);
printf("hindexed lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
err++;
}
else
printf( "hindexed ok\n" );
dt_create_indexed( 2, blocklen, idisp, dt1, &dt3 );
dt_commit( &dt3 );
dt_type_lb( dt3, &lb ); dt_type_ub( dt3, &ub );
dt_type_extent( dt3, &ex ); dt_type_size( dt3, &sz );
if (lb != -39 || ub != 69 || ex != 108) {
printf("indexed lb %d ub %d extent %d size %d\n", (int)-39, (int)69, (int)108, sz);
printf("indexed lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
err++;
}
else
printf( "indexed ok\n" );
dt_create_hvector( 2, 1, 14, dt1, &dt4 );
dt_commit( &dt4 );
dt_type_lb( dt4, &lb ); dt_type_ub( dt4, &ub );
dt_type_extent( dt4, &ex ); dt_type_size( dt4, &sz );
if (lb != -3 || ub != 20 || ex != 23) {
printf("hvector lb %d ub %d extent %d size %d\n", (int)-3, (int)20, (int)23, sz);
printf("hvector lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
err++;
}
else
printf( "hvector ok\n" );
dt_create_vector( 2, 1, 14, dt1, &dt5 );
dt_commit( &dt5 );
dt_type_lb( dt5, &lb ); dt_type_ub( dt5, &ub );
dt_type_extent( dt5, &ex ); dt_type_size( dt5, &sz );
if (lb != -3 || ub != 132 || ex != 135) {
printf("vector lb %d ub %d extent %d size %d\n", (int)-3, (int)132, (int)135, sz);
printf("vector lb %d ub %d extent %d size %d\n", (int)lb, (int)ub, (int)ex, sz);
err++;
}
else
printf( "vector ok\n" );
dt_free( &dt1 );
dt_free( &dt2 );
dt_free( &dt3 );
dt_free( &dt4 );
dt_free( &dt5 );
return err;
}
void print_double_mat( size_t N, double* mat )
{
int i, j;
double* pMat;
for( i = 0; i < N; i++ ) {
printf( "(%4d) :", i * N * sizeof(double) );
pMat = mat + i * N;
for( j = 0; j < N; j++ ) {
printf( "%5.1f ", *pMat );
pMat++;
}
printf( "\n" );
}
}
int init_random_upper_matrix( size_t N, double* mat )
{
int i, j;
srand( time(NULL) );
for( i = 0; i < N; i++ )
for( j = i; j < N; j++ ) {
*mat = (double)random();
mat++;
}
return 0;
}
int check_diag_matrix( size_t N, double* mat1, double* mat2 )
{
int i, j;
for( i = 0; i < N; i++ ) {
mat1 += i;
mat2 += i;
for( j = i; j < N; j++ ) {
if( *mat1 != *mat2 ) {
printf( "error in position (%d, %d) expect %f and find %f\n",
i, j, *mat1, *mat2 );
return -1;
}
mat1++; mat2++;
}
}
return 0;
}
dt_desc_t* upper_matrix( size_t mat_size )
{
int *disp, i;
size_t *blocklen;
dt_desc_t* upper;
disp = (int*)malloc( sizeof(int) * mat_size );
blocklen = (size_t*)malloc( sizeof(size_t) * mat_size );
for( i = 0; i < mat_size; i++ ) {
disp[i] = i * mat_size + i;
blocklen[i] = mat_size - i;
}
dt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
&upper );
free( disp );
free( blocklen );
return upper;
}
dt_desc_t* lower_matrix( size_t mat_size )
{
int *disp, i;
size_t *blocklen;
dt_desc_t* upper;
disp = (int*)malloc( sizeof(int) * mat_size );
blocklen = (size_t*)malloc( sizeof(size_t) * mat_size );
for( i = 0; i < mat_size; i++ ) {
disp[i] = i * mat_size;
blocklen[i] = i;
}
dt_create_indexed( mat_size, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
&upper );
free( disp );
free( blocklen );
return upper;
}
extern long conversion_elapsed;
int test_upper( size_t length )
{
double *mat1, *mat2, *inbuf;
dt_desc_t *pdt, *pdt1;
convertor_t * pConv;
char *ptr;
int i, j, split_chunk, total_length, rc;
struct iovec a;
TIMER_DATA_TYPE start, end;
long total_time;
printf( "test upper matrix\n" );
pdt = upper_matrix( length );
pdt1 = lower_matrix( length );
/*dt_dump( pdt );*/
mat1 = malloc( length * length * sizeof(double) );
init_random_upper_matrix( length, mat1 );
mat2 = calloc( length * length, sizeof(double) );
total_length = length * (length + 1) / 2 * sizeof(double);
inbuf = (double*)malloc( total_length );
ptr = (char*)inbuf;
/* copy upper matrix in the array simulating the input buffer */
for( i = 0; i < length; i++ )
for( j = i; j < length; j++ ) {
*inbuf = mat1[i * length + j];
inbuf++;
}
inbuf = (double*)ptr;
pConv = convertor_create( 0, 0 );
convertor_init_for_recv( pConv, 0, pdt, 1, mat2 );
/* test the automatic destruction pf the data */
dt_destroy( &pdt );
dt_destroy( &pdt1 );
GET_TIME( start );
split_chunk = (length + 1) * sizeof(double);
/* split_chunk = (total_length + 1) * sizeof(double); */
for( i = total_length; i > 0; ) {
if( i < split_chunk ) split_chunk = i;
a.iov_base = ptr;
a.iov_len = split_chunk;
convertor_unpack( pConv, &a, 1 );
ptr += split_chunk;
i -= split_chunk;
}
GET_TIME( end );
total_time = ELAPSED_TIME( start, end );
printf( "complete unpacking in %ld microsec\n", total_time );
/* printf( "conversion done in %ld microsec\n", conversion_elapsed ); */
/* printf( "stack management in %ld microsec\n", total_time - conversion_elapsed ); */
free( inbuf );
rc = check_diag_matrix( length, mat1, mat2 );
free( mat1 );
free( mat2 );
return rc;
}
dt_desc_t* test_matrix_borders( unsigned int size, unsigned int width )
{
dt_desc_t *pdt, *pdt_line;
int disp[2];
size_t blocklen[2];
disp[0] = 0;
blocklen[0] = width;
disp[1] = (size - width) * sizeof(double);
blocklen[1] = width;
dt_create_indexed( 2, blocklen, disp, &(basicDatatypes[DT_DOUBLE]),
&pdt_line );
dt_create_contiguous( size, pdt_line, &pdt );
dt_destroy( &pdt_line );
return pdt;
}
dt_desc_t* test_contiguous( void )
{
dt_desc_t *pdt, *pdt1, *pdt2;
printf( "test contiguous (alignement)\n" );
pdt1 = dt_create( -1 );
dt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 );
dt_dump( pdt1 );
dt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 );
dt_dump( pdt1 );
dt_create_contiguous( 4, pdt1, &pdt2 );
dt_destroy( &pdt1 );
dt_dump( pdt2 );
dt_create_contiguous( 2, pdt2, &pdt );
dt_destroy( &pdt2 );
dt_dump( pdt );
dt_dump_complete( pdt );
return pdt;
}
dt_desc_t* test_struct( void )
{
dt_desc_t* types[] = { &(basicDatatypes[DT_FLOAT]),
NULL,
&(basicDatatypes[DT_CHAR]) };
int lengths[] = { 2, 1, 3 };
long disp[] = { 0, 16, 26 };
dt_desc_t* pdt, *pdt1;
printf( "test struct\n" );
pdt1 = dt_create( -1 );
dt_add( pdt1, &(basicDatatypes[DT_DOUBLE]), 1, 0, -1 );
dt_add( pdt1, &(basicDatatypes[DT_CHAR]), 1, 8, -1 );
dt_dump_complete( pdt1 );
types[1] = pdt1;
dt_create_struct( 3, lengths, disp, types, &pdt );
dt_destroy( &pdt1 );
dt_dump_complete( pdt );
return pdt;
}
typedef struct {
int i1;
int gap;
int i2;
} sdata_intern;
typedef struct {
int counter;
sdata_intern v[10];
int last;
} sstrange;
#define SSTRANGE_CNT 10
#define USE_RESIZED
dt_desc_t* create_strange_dt( void )
{
sdata_intern v[2];
long displ[3];
dt_desc_t* types[3] = { &(basicDatatypes[DT_INT]) };
sstrange t[2];
int pBlock[3] = {1, 10, 1}, dispi[3];
dt_desc_t *pdt, *pdt1, *pdt2, *pdtTemp;
dispi[0] = (int)((char*)&(v[0].i1) - (char*)&(v[0])); /* 0 */
dispi[1] = (int)(((char*)(&(v[0].i2)) - (char*)&(v[0])) / sizeof(int)); /* 2 */
dt_create_indexed_block( 2, 1, dispi, &(basicDatatypes[DT_INT]), &pdtTemp );
#ifdef USE_RESIZED
/* optional */
displ[0] = 0;
displ[1] = (char*)&(v[1]) - (char*)&(v[0]);
dt_create_resized( pdtTemp, displ[0], displ[1], &pdt1 );
dt_destroy( &pdtTemp );
#else
pdt1 = pdtTemp;
#endif /* USE_RESIZED */
types[1] = pdt1;
types[2] = &(basicDatatypes[DT_INT]);
displ[0] = 0;
displ[1] = (long)((char*)&(t[0].v[0]) - (char*)&(t[0]));
displ[2] = (long)((char*)&(t[0].last) - (char*)&(t[0]));
dt_create_struct( 3, pBlock, displ, types, &pdtTemp );
#ifdef USE_RESIZED
/* optional */
displ[1] = (char*)&(t[1]) - (char*)&(t[0]);
dt_create_resized( pdtTemp, displ[0], displ[1], &pdt2 );
dt_destroy( &pdtTemp );
#else
pdt2 = pdtTemp;
#endif /* USE_RESIZED */
dt_create_contiguous( SSTRANGE_CNT, pdt2, &pdt );
dt_destroy( &pdt1 );
dt_destroy( &pdt2 );
dt_dump( pdt );
{
dt_elem_desc_t* pElemDesc;
dt_optimize_short( pdt, 1, &pElemDesc );
free( pElemDesc );
}
return pdt;
}
int local_copy_ddt_count( dt_desc_t* pdt, int count )
{
long extent;
void *pdst, *psrc;
dt_type_extent( pdt, &extent );
pdst = malloc( extent * count );
psrc = malloc( extent * count );
pdt = create_strange_dt();
//dt_copy_content_same_dt( pdt, count, pdst, psrc );
free(pdst );
free( psrc );
return 0;
}
int main( int argc, char* argv[] )
{
dt_desc_t *pdt, *pdt1, *pdt2, *pdt3;
int rc, length = 500;
pdt = create_strange_dt();
return 0;
/*
local_copy_ddt_count(pdt, 10);
dt_destroy( &pdt );
*/
pdt = upper_matrix(100);
local_copy_ddt_count(pdt, 1);
dt_destroy( &pdt );
return 0;
return 0;
mpich_typeub();
mpich_typeub2();
mpich_typeub3();
rc = test_upper( length );
if( rc == 0 )
printf( "decode [PASSED]\n" );
else
printf( "decode [NOT PASSED]\n" );
pdt = test_matrix_borders( length, 100 );
dt_dump( pdt );
dt_free( &pdt );
printf( ">>--------------------------------------------<<\n" );
pdt = test_contiguous();
dt_destroy( &pdt );
printf( ">>--------------------------------------------<<\n" );
pdt = test_struct();
dt_destroy( &pdt );
printf( ">>--------------------------------------------<<\n" );
pdt1 = dt_create( -1 );
pdt2 = dt_create( -1 );
pdt3 = dt_create( -1 );
dt_add( pdt3, &(basicDatatypes[DT_INT]), 10, 0, -1 );
dt_add( pdt3, &(basicDatatypes[DT_FLOAT]), 5, 10 * sizeof(int), -1 );
dt_add( pdt2, &(basicDatatypes[DT_INT]), 1, 0, -1 );
dt_add( pdt2, pdt3, 3, sizeof(int) * 1, -1 );
dt_add( pdt1, &(basicDatatypes[DT_LONG_LONG]), 5, 0, -1 );
dt_add( pdt1, &(basicDatatypes[DT_LONG_DOUBLE]), 2, sizeof(long long) * 5, -1 );
printf( ">>--------------------------------------------<<\n" );
dt_dump( pdt1 );
printf( ">>--------------------------------------------<<\n" );
dt_dump( pdt2 );
printf( ">>--------------------------------------------<<\n" );
dt_dump( pdt3 );
dt_destroy( &pdt1 );
dt_destroy( &pdt2 );
dt_destroy( &pdt3 );
return 0;
}

188
src/datatype/dt_add.c Обычный файл
Просмотреть файл

@ -0,0 +1,188 @@
#include "datatype.h"
/* When we add a datatype we should update it's definition depending on
* the initial displacement for the whole data, so the displacement of
* all elements inside a datatype depend only on the loop displacement
* and it's own displacement.
*/
/* we have 3 differents structures to update:
* the first is the real representation of the datatype
* the second is the internal representation using extents
* the last is the representation used for send operations
*/
int dt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, unsigned int count, long disp, long extent )
{
int newLength, place_needed = 0, i;
short localFlags;
dt_elem_desc_t *pLast, *pLoop = NULL;
long lb, ub;
/* the extent should be always be positive. So a negative
* value here have a special meaning ie. default extent as
* computed by ub - lb
*/
if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb);
/* first make sure that we have enought place to
* put the new element inside */
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) {
place_needed = 1;
/* handle special cases for DT_LB and DT_UB */
if( pdtAdd == &(basicDatatypes[DT_LB]) ) {
pdtBase->bdt_used |= (1<< DT_LB);
if( pdtBase->flags & DT_FLAG_USER_LB ) {
pdtBase->lb = LMIN( pdtBase->lb, disp );
} else {
pdtBase->lb = disp;
pdtBase->flags |= DT_FLAG_USER_LB;
}
return 0;
} else if( pdtAdd == &(basicDatatypes[DT_UB]) ) {
pdtBase->bdt_used |= (1<< DT_UB);
if( pdtBase->flags & DT_FLAG_USER_UB ) {
pdtBase->ub = LMAX( pdtBase->ub, disp );
} else {
pdtBase->ub = disp;
pdtBase->flags |= DT_FLAG_USER_UB;
}
return 0;
}
} else {
place_needed = pdtAdd->desc.used;
if( count != 1 ) place_needed += 2;
}
dt_increase_ref( pdtAdd );
/* compute the new memory alignement */
pdtBase->align = IMAX( pdtBase->align, pdtAdd->align );
pdtBase->bdt_used |= pdtAdd->bdt_used;
newLength = pdtBase->desc.used + place_needed;
if( newLength > pdtBase->desc.length ) {
newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK;
printf( "increase the size of the data desc array from %d to %d (old ptr = %p ",
pdtBase->desc.length, newLength, pdtBase->desc.desc );
pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc, newLength );
printf( "new ptr = %p\n", pdtBase->desc.desc );
pdtBase->desc.length = newLength;
}
pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { /* add a basic datatype */
pLast->type = pdtAdd->id;
pLast->count = count;
pLast->disp = disp;
pLast->extent = extent;
pdtBase->desc.used++;
pdtBase->btypes[pdtAdd->id] += count;
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
if( extent == pdtAdd->size )
pLast->flags |= DT_FLAG_CONTIGUOUS;
} else {
/* now we add a complex datatype */
if( disp != pdtBase->ub ) { /* add the initial gap */
if( disp < pdtBase->ub ) pdtBase->flags |= DT_FLAG_OVERLAP;
}
/* keep trace of the total number of basic datatypes in the datatype definition */
pdtBase->btypes[DT_LOOP] += pdtAdd->btypes[DT_LOOP];
for( i = 3; i < DT_END_LOOP; i++ )
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
pdtBase->btypes[DT_END_LOOP] += pdtAdd->btypes[DT_END_LOOP];
/* if the extent of the datatype if the same as the extent of the loop
* description of the datatype then we simply have to update the main loop.
*/
if( count != 1 ) {
pLoop = pLast;
pLast->type = DT_LOOP;
pLast->count = count;
pLast->disp = (long)pdtAdd->desc.used + 1;
pLast->extent = extent;
pLast->flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
localFlags = DT_FLAG_IN_LOOP;
pdtBase->btypes[DT_LOOP] += 2;
pdtBase->desc.used += 2;
pLast++;
}
for( i = 0; i < pdtAdd->desc.used; i++ ) {
pLast->type = pdtAdd->desc.desc[i].type;
pLast->flags = pdtAdd->desc.desc[i].flags | localFlags;
pLast->count = pdtAdd->desc.desc[i].count;
pLast->extent = pdtAdd->desc.desc[i].extent;
pLast->disp = pdtAdd->desc.desc[i].disp;
if( pdtAdd->desc.desc[i].type != DT_LOOP )
pLast->disp += disp/* + pdtAdd->lb */;
pLast++;
}
pdtBase->desc.used += pdtAdd->desc.used;
if( pLoop != NULL ) {
pLast->type = DT_END_LOOP;
pLast->count = pdtAdd->desc.used + 1; /* where the loop start */
pLast->disp = disp + (count - 1) * extent
+ (pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
pLast->extent = pdtAdd->size; /* the size of the data inside the loop */
pLast->flags = pLoop->flags;
}
/* should I add some space until the extent of this datatype ? */
}
pdtBase->size += count * pdtAdd->size;
pdtBase->true_lb = LMIN( pdtBase->true_lb, pdtAdd->true_lb + disp );
pdtBase->true_ub = LMAX( pdtBase->true_ub,
disp + pdtAdd->true_lb +
(count - 1) * extent + pdtAdd->true_ub );
/* the lower bound should be inherited from the parents if and only
* if the USER has explicitly set it. The result lb is the MIN between
* the all lb + disp if and only if all or nobody flags's contain the LB.
*/
if( (pdtAdd->flags ^ pdtBase->flags) & DT_FLAG_USER_LB ) {
pdtBase->flags |= DT_FLAG_USER_LB;
if( pdtAdd->flags & DT_FLAG_USER_LB )
lb = pdtAdd->lb + disp;
else
lb = pdtBase->lb;
} else {
lb = LMIN( pdtBase->lb, pdtAdd->lb + disp );
}
/* the same apply for the upper bound except for the case where
* either of them has the flag UB, in which case we should
* compute the UB including the natural alignement of the data.
*/
if( (pdtBase->flags ^ pdtAdd->flags) & DT_FLAG_USER_UB ) {
if( pdtBase->flags & DT_FLAG_USER_UB )
ub = pdtBase->ub;
else {
pdtBase->flags |= DT_FLAG_USER_UB;
ub = disp + pdtAdd->lb + count * extent;
}
} else {
if( pdtBase->flags & DT_FLAG_USER_UB )
ub = LMAX( pdtBase->ub, disp + pdtAdd->lb + count * (extent) );
else {
/* we should compute the extent depending on the alignement */
long ubN = (disp + pdtAdd->lb + count * (extent));
ub = LMAX( ((pdtBase->ub / pdtBase->align) * pdtBase->align),
(((ubN + pdtBase->align - 1)/ pdtBase->align) * pdtBase->align) );
}
}
/* update the extent and size */
pdtBase->lb = lb;
pdtBase->ub = ub;
pdtBase->nbElems += (count * pdtAdd->nbElems);
/* Is the data still contiguous ?
* The only way for the data to be contiguous is to have the true extent equal to his size.
* In other words to avoid having internal gaps between elements.
*/
if( (pdtBase->size != (pdtBase->true_ub - pdtBase->true_lb)) ||
!(pdtBase->flags & DT_FLAG_CONTIGUOUS) || !(pdtAdd->flags & DT_FLAG_CONTIGUOUS) )
UNSET_CONTIGUOUS_FLAG(pdtBase->flags);
dt_decrease_ref( pdtAdd );
return 0;
}

260
src/datatype/dt_create.c Обычный файл
Просмотреть файл

@ -0,0 +1,260 @@
/* -*- Mode: C; c-basic-offset:3 ; -*- */
#include "datatype.h"
#include "limits.h"
/* other fields starting after bdt_used (index of DT_LOOP should be ONE) */
#define EMPTY_DATA NULL, "", {0, 0, NULL}, {0, 0, NULL}, NULL, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
#define BASEOBJ_DATA { NULL, 1 }
dt_desc_t basicDatatypes[] = {
/*super size true_lb true_ub align lb ub flags id nbElems bdt_used others fields */
/*0x00*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC, DT_LOOP, 1, (1<<DT_LOOP), EMPTY_DATA },
/*0x01*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC | DT_FLAG_USER_LB, DT_LB, 1, (1<<DT_LB), EMPTY_DATA },
/*0x02*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC | DT_FLAG_USER_UB, DT_UB, 1, (1<<DT_UB), EMPTY_DATA },
/* now the real basic datatypes */
/*0x03*/ { BASEOBJ_DATA, 1, 0, 1, 1, 0, 1, DT_FLAG_BASIC | DT_FLAG_DATA, DT_SPACE, 1, (1<<DT_SPACE), EMPTY_DATA },
/*0x04*/ { BASEOBJ_DATA, 1, 0, 1, 1, 0, 1, DT_FLAG_BASIC | DT_FLAG_DATA, DT_CHAR, 1, (1<<DT_CHAR), EMPTY_DATA },
/*0x05*/ { BASEOBJ_DATA, 1, 0, 1, 1, 0, 1, DT_FLAG_BASIC | DT_FLAG_DATA, DT_BYTE, 1, (1<<DT_BYTE), EMPTY_DATA },
/*0x06*/ { BASEOBJ_DATA, sizeof(short), 0, sizeof(short), sizeof(short), 0, sizeof(short), DT_FLAG_BASIC | DT_FLAG_DATA, DT_SHORT, 1, (1<<DT_SHORT), EMPTY_DATA },
/*0x07*/ { BASEOBJ_DATA, sizeof(int), 0, sizeof(int), sizeof(int), 0, sizeof(int), DT_FLAG_BASIC | DT_FLAG_DATA, DT_INT, 1, (1<<DT_INT), EMPTY_DATA },
/*0x08*/ { BASEOBJ_DATA, sizeof(float), 0, sizeof(float), sizeof(float), 0, sizeof(float), DT_FLAG_BASIC | DT_FLAG_DATA, DT_FLOAT, 1, (1<<DT_FLOAT), EMPTY_DATA },
/*0x09*/ { BASEOBJ_DATA, sizeof(long), 0, sizeof(long), sizeof(long), 0, sizeof(long), DT_FLAG_BASIC | DT_FLAG_DATA, DT_LONG, 1, (1<<DT_LONG), EMPTY_DATA },
/*0x0A*/ { BASEOBJ_DATA, sizeof(double), 0, sizeof(double), sizeof(double), 0, sizeof(double), DT_FLAG_BASIC | DT_FLAG_DATA, DT_DOUBLE, 1, (1<<DT_DOUBLE), EMPTY_DATA },
/*0x0B*/ { BASEOBJ_DATA, sizeof(long long), 0, sizeof(long long), sizeof(long long), 0, sizeof(long long), DT_FLAG_BASIC | DT_FLAG_DATA, DT_LONG_LONG, 1, (1<<DT_LONG_LONG), EMPTY_DATA },
/*0x0C*/ { BASEOBJ_DATA, sizeof(long double), 0, sizeof(long double), sizeof(long double), 0, sizeof(long double), DT_FLAG_BASIC | DT_FLAG_DATA, DT_LONG_DOUBLE, 1, (1<<DT_LONG_DOUBLE), EMPTY_DATA },
/*0x0D*/ { BASEOBJ_DATA, 2 * sizeof(float), 0, 2 * sizeof(float), 2 * sizeof(float), 0, 2 * sizeof(float), DT_FLAG_BASIC | DT_FLAG_DATA, DT_COMPLEX_FLOAT, 1, (1<<DT_COMPLEX_FLOAT), EMPTY_DATA },
/*0x0E*/ { BASEOBJ_DATA, 2 * sizeof(double), 0, 2 * sizeof(double), 2 * sizeof(double), 0, 2 * sizeof(double), DT_FLAG_BASIC | DT_FLAG_DATA, DT_COMPLEX_DOUBLE, 1, (1<<DT_COMPLEX_DOUBLE), EMPTY_DATA },
/*0x0F*/ { BASEOBJ_DATA, 0, 0, 0, 0, 0, 0, DT_FLAG_BASIC, DT_END_LOOP, 1, (1<<DT_END_LOOP), EMPTY_DATA },
};
static char* basicDatatypeNames[] = { "loop", "lb", "ub", "space", "char", "byte", "short", "int", "float",
"long", "double", "long_long", "long_double", "cfloat", "cdouble", "end_loop", "unknown" };
typedef struct __internal_keep_ptr {
struct __internal_keep_ptr* next;
} internal_dt_desc_t;
static internal_dt_desc_t* __free_dt_desc = NULL;
static void __get_free_dt_struct( dt_desc_t* pData )
{
int i;
pData->size = 0;
pData->id = 0;
pData->nbElems = 0;
pData->bdt_used = 0;
for( i = 0; i < DT_MAX_PREDEFINED; i++ )
pData->btypes[i] = 0;
pData->btypes[DT_LOOP] = 1;
pData->opt_desc.desc = NULL;
pData->opt_desc.length = 0;
pData->opt_desc.used = 0;
pData->args = NULL;
pData->align = 1;
pData->flags = DT_FLAG_CONTIGUOUS;
pData->true_lb = LONG_MAX;
pData->true_ub = LONG_MIN;
pData->lb = LONG_MAX;
pData->ub = LONG_MIN;
}
OBJ_CLASS_INSTANCE(lam_datatype_t, lam_object_t, __get_free_dt_struct, dt_destroy );
dt_desc_t* dt_create( int expectedSize )
{
dt_desc_t* pdt = (dt_desc_t*)OBJ_NEW(lam_datatype_t);
if( expectedSize == -1 ) expectedSize = DT_INCREASE_STACK;
pdt->desc.length = expectedSize;
pdt->desc.used = 0;
pdt->desc.desc = (dt_elem_desc_t*)calloc(pdt->desc.length, sizeof(dt_elem_desc_t));
return pdt;
}
int dt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t** newType )
{
dt_duplicate( oldType, newType );
(*newType)->lb = lb;
(*newType)->ub = lb + extent;
return 0;
}
int dt_commit( dt_desc_t** data )
{
dt_desc_t* pData = (dt_desc_t*)*data;
if( pData->flags & DT_FLAG_COMMITED ) return -1;
pData->flags |= DT_FLAG_COMMITED;
/* If the data is contiguous is useless to generate an optimized version. */
if( pData->size != (pData->true_ub - pData->true_lb) )
(void)dt_optimize_short( pData, 1, &(pData->opt_desc) );
return 0;
}
static void _dump_data_flags( unsigned short usflags )
{
char flags[12] = "-----------";
if( usflags & DT_FLAG_DESTROYED ) flags[0] = 'd';
if( usflags & DT_FLAG_COMMITED ) flags[1] = 'c';
if( usflags & DT_FLAG_CONTIGUOUS ) flags[2] = 'C';
if( usflags & DT_FLAG_OVERLAP ) flags[3] = 'o';
if( usflags & DT_FLAG_USER_LB ) flags[4] = 'l';
if( usflags & DT_FLAG_USER_UB ) flags[5] = 'u';
if( usflags & DT_FLAG_FOREVER ) flags[6] = 'F';
if( usflags & DT_FLAG_IN_LOOP ) flags[7] = 'L';
if( usflags & DT_FLAG_DATA ) flags[8] = 'D';
if( usflags & DT_FLAG_INITIAL ) flags[9] = 'I';
if( (usflags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) flags[10] = 'B';
flags[11] = 0;
printf( "%s\t", flags );
}
int __dump_data_desc( dt_elem_desc_t* pDesc, int nbElems )
{
char* dtName;
int i;
for( i = 0; i < nbElems; i++ ) {
if( pDesc->type > DT_MAX_PREDEFINED ) dtName = basicDatatypeNames[DT_MAX_PREDEFINED];
else dtName = basicDatatypeNames[pDesc->type];
_dump_data_flags( pDesc->flags );
if( pDesc->type == DT_LOOP )
printf( "%15s %d times the next %d elements extent %d\n", dtName,
pDesc->count, (int)pDesc->disp, pDesc->extent );
else
printf( "%15s count %d disp 0x%lx (%ld) extent %d\n", dtName,
pDesc->count, pDesc->disp, pDesc->disp, pDesc->extent );
pDesc++;
}
return 0;
}
void __dt_contain_basic_datatypes( dt_desc_t* pData )
{
int i, mask = 1;
if( pData->flags & DT_FLAG_USER_LB ) printf( "lb " );
if( pData->flags & DT_FLAG_USER_UB ) printf( "ub " );
for( i = 0; i < DT_MAX_PREDEFINED; i++ ) {
if( pData->bdt_used & mask )
printf( "%s ", basicDatatypeNames[i] );
mask <<= 1;
}
}
void dt_dump( dt_desc_t* data )
{
dt_desc_t* pData = (dt_desc_t*)data;
printf( "Datatype %p size %d align %d id %d length %d used %d\n\
true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n\
nbElems %d loops %d flags %X (",
pData, pData->size, pData->align, pData->id, pData->desc.length, pData->desc.used,
pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb,
pData->lb, pData->ub, pData->ub - pData->lb,
pData->nbElems, pData->btypes[DT_LOOP], pData->flags );
/* dump the flags */
if( pData->flags == DT_FLAG_BASIC ) printf( "basic datatype " );
else {
if( pData->flags & DT_FLAG_DESTROYED ) printf( "destroyed " );
if( pData->flags & DT_FLAG_COMMITED ) printf( "commited " );
if( pData->flags & DT_FLAG_CONTIGUOUS) printf( "contiguous " );
}
printf( ")" ); _dump_data_flags( pData->flags );
printf( "\n contain " ); __dt_contain_basic_datatypes( pData ); printf( "\n" );
__dump_data_desc( pData->desc.desc, pData->desc.used );
if( pData->opt_desc.desc != NULL ) {
printf( "Optimized description \n" );
__dump_data_desc( pData->opt_desc.desc, pData->opt_desc.used );
}
}
#define DUMP_TYPE( TYPENAME, TYPE ) \
int dump_##TYPENAME( unsigned int count, \
char* from, unsigned int from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent, \
int* used ) \
{ \
int remote_type_size = sizeof(TYPE), res = 1; \
if( (remote_type_size * count) > from_len ) { \
count = from_len / remote_type_size; \
if( (count * remote_type_size) != from_len ) { \
printf( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_type_size) ); \
res = -1; \
} \
printf( "correct: %s count %d from %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
} else \
printf( " %s count %d from %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
\
*used = count * to_extent; \
return res * count; \
}
DUMP_TYPE( char, char );
DUMP_TYPE( short, short );
DUMP_TYPE( int, int );
DUMP_TYPE( float, float );
DUMP_TYPE( long, long );
DUMP_TYPE( double, double );
DUMP_TYPE( long_long, long long );
DUMP_TYPE( long_double, long double );
DUMP_TYPE( complex_float, complex_float_t );
DUMP_TYPE( complex_double, complex_double_t );
static convertor_t* pDumpConv = NULL;
static conversion_fct_t dump_functions[] = {
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)NULL, /* DT_SPACE */
(conversion_fct_t)dump_char, /* DT_CHAR */
(conversion_fct_t)dump_char, /* DT_BYTE */
(conversion_fct_t)dump_short, /* DT_SHORT */
(conversion_fct_t)dump_int, /* DT_INT */
(conversion_fct_t)dump_float, /* DT_FLOAT */
(conversion_fct_t)dump_long, /* DT_LONG */
(conversion_fct_t)dump_double, /* DT_DOUBLE */
(conversion_fct_t)dump_long_long, /* DT_LONG_LONG */
(conversion_fct_t)dump_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)dump_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)dump_complex_double, /* DT_COMPLEX_DOUBLE */
};
void dt_dump_complete( dt_desc_t* data )
{
dt_desc_t* pData = (dt_desc_t*)data;
struct iovec fake = { (void*)0, 0 };
printf( "Datatype %p size %d align %d id %d length %d used %d\n\
true_lb %ld true_ub %ld (true_extent %ld) lb %ld ub %ld (extent %ld)\n\
nbElems %d loops %d flags %X (",
pData, pData->size, pData->align, pData->id, pData->desc.length, pData->desc.used,
pData->true_lb, pData->true_ub, pData->true_ub - pData->true_lb,
pData->lb, pData->ub, pData->ub - pData->lb,
pData->nbElems, pData->btypes[DT_LOOP], pData->flags );
/* dump the flags */
if( pData->flags == DT_FLAG_BASIC ) printf( "basic datatype " );
else {
if( pData->flags & DT_FLAG_DESTROYED ) printf( "destroyed " );
if( pData->flags & DT_FLAG_COMMITED ) printf( "commited " );
if( pData->flags & DT_FLAG_CONTIGUOUS) printf( "contiguous " );
if( pData->flags & DT_FLAG_OVERLAP ) printf( "overlap " );
}
printf( ")\n contain " ); __dt_contain_basic_datatypes( pData );
printf( "\n{\n" );
if( pDumpConv == NULL ) {
pDumpConv = convertor_create( 0, 0 );
}
convertor_init_for_recv( pDumpConv, 0, pData, 1, NULL );
pDumpConv->pFunctions = dump_functions;
fake.iov_len = pData->size;
convertor_unpack( pDumpConv, &fake, 1 );
printf( "}\n" );
}

15
src/datatype/dt_create_array.c Обычный файл
Просмотреть файл

@ -0,0 +1,15 @@
#include "datatype.h"
int dt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
int order, dt_desc_t* oldType, dt_desc_t** newType )
{
return -1;
}
int dt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib,
int* pDArgs, int* pPSizes, int order, dt_desc_t* oldType,
dt_desc_t** newType )
{
return -1;
}

23
src/datatype/dt_create_dup.c Обычный файл
Просмотреть файл

@ -0,0 +1,23 @@
#include "datatype.h"
int dt_duplicate( dt_desc_t* oldType, dt_desc_t** newType )
{
dt_desc_t* pdt = dt_create( oldType->desc.used );
void* temp = pdt->desc.desc; /* temporary copy of the desc pointer */
memcpy( pdt, oldType, sizeof(dt_desc_t) );
pdt->desc.desc = temp;
memcpy( pdt->desc.desc, oldType->desc.desc, sizeof(dt_elem_desc_t) * oldType->desc.used );
pdt->id = 0;
pdt->args = NULL;
*newType = pdt;
return 0;
}
int dt_create_contiguous( size_t count, dt_desc_t* oldType, dt_desc_t** newType )
{
dt_desc_t* pdt = dt_create( oldType->desc.used + 2 );
dt_add( pdt, oldType, count, 0, (oldType->ub - oldType->lb) );
*newType = pdt;
return 0;
}

90
src/datatype/dt_create_indexed.c Обычный файл
Просмотреть файл

@ -0,0 +1,90 @@
#include "datatype.h"
/* We try to merge together data that are contiguous */
int dt_create_indexed( size_t count, int* pBlockLength, int* pDisp,
dt_desc_t* oldType, dt_desc_t** newType )
{
dt_desc_t* pdt;
int i, dLength, endat, disp;
long extent = oldType->ub - oldType->lb;
pdt = dt_create( count * (2 + oldType->desc.used) );
disp = pDisp[0];
dLength = pBlockLength[0];
endat = disp + dLength;
for( i = 1; i < count; i++ ) {
if( endat == pDisp[i] ) {
/* contiguous with the previsious */
dLength += pBlockLength[i];
endat += pBlockLength[i];
} else {
dt_add( pdt, oldType, dLength, disp * extent, extent );
disp = pDisp[i];
dLength = pBlockLength[i];
endat = disp + pBlockLength[i];
}
}
dt_add( pdt, oldType, dLength, disp * extent, extent );
*newType = pdt;
return 0;
}
int dt_create_hindexed( size_t count, int* pBlockLength, long* pDisp,
dt_desc_t* oldType, dt_desc_t** newType )
{
dt_desc_t* pdt;
int i, dLength;
long extent = oldType->ub - oldType->lb;
long disp, endat;
pdt = dt_create( count * (2 + oldType->desc.used) );
disp = pDisp[0];
dLength = pBlockLength[0];
endat = disp + dLength * extent;
for( i = 1; i < count; i++ ) {
if( endat == pDisp[i] ) {
/* contiguous with the previsious */
dLength += pBlockLength[i];
endat += pBlockLength[i] * extent;
} else {
dt_add( pdt, oldType, dLength, disp, extent );
disp = pDisp[i];
dLength = pBlockLength[i];
endat = disp + pBlockLength[i] * extent;
}
}
dt_add( pdt, oldType, dLength, disp, extent );
*newType = pdt;
return 0;
}
int dt_create_indexed_block( size_t count, int bLength, int* pDisp,
dt_desc_t* oldType, dt_desc_t** newType )
{
dt_desc_t* pdt;
int i, dLength, endat, disp;
long extent = oldType->ub - oldType->lb;
pdt = dt_create( count * (2 + oldType->desc.used) );
disp = pDisp[0];
dLength = bLength;
endat = disp + dLength;
for( i = 1; i < count; i++ ) {
if( endat == pDisp[i] ) {
/* contiguous with the previsious */
dLength += bLength;
endat += bLength;
} else {
dt_add( pdt, oldType, dLength, disp * extent, extent );
disp = pDisp[i];
dLength = bLength;
endat = disp + bLength;
}
}
dt_add( pdt, oldType, dLength, disp * extent, extent );
*newType = pdt;
return 0;
}

42
src/datatype/dt_create_struct.c Обычный файл
Просмотреть файл

@ -0,0 +1,42 @@
#include "datatype.h"
int dt_create_struct( size_t count, size_t* pBlockLength, long* pDisp,
dt_desc_t** pTypes, dt_desc_t** newType )
{
int i;
long disp, endto, lastExtent, lastDisp;
size_t lastBlock;
dt_desc_t *pdt, *lastType;
/* if we compute the total number of elements before we can
* avoid increasing the size of the desc array often.
*/
for( disp = 0, i = 0; i < count; i++ ) {
disp += pTypes[i]->desc.used;
if( pBlockLength[i] != 1 ) disp += 2;
}
lastType = pTypes[0];
lastBlock = pBlockLength[0];
lastExtent = lastType->ub - lastType->lb;
lastDisp = pDisp[0];
endto = pDisp[0] + lastExtent * lastBlock;
pdt = dt_create( disp );
for( i = 1; i < count; i++ ) {
if( (pTypes[i] == lastType) && (pDisp[i] == endto) ) {
lastBlock += pBlockLength[i];
endto = lastDisp + lastBlock * lastExtent;
} else {
dt_add( pdt, lastType, lastBlock, lastDisp, lastExtent );
lastType = pTypes[i];
lastExtent = lastType->ub - lastType->lb;
lastBlock = pBlockLength[i];
lastDisp = pDisp[i];
endto = lastDisp + lastExtent * lastBlock;
}
}
dt_add( pdt, lastType, lastBlock, lastDisp, lastExtent );
*newType = pdt;
return 0;
}

64
src/datatype/dt_create_vector.c Обычный файл
Просмотреть файл

@ -0,0 +1,64 @@
#include "datatype.h"
/* Open questions ...
* - how to improuve the handling of these vectors (creating a temporary datatype
* can be ONLY a initial solution.
*
*/
int dt_create_vector( size_t count, int bLength, long stride,
dt_desc_t* oldType, dt_desc_t** newType )
{
long extent = oldType->ub - oldType->lb;
dt_desc_t *pTempData, *pData;
if( bLength == stride ) {
/* the elements are contiguous */
pData = dt_create( oldType->desc.used + 2 );
dt_add( pData, oldType, count * bLength, 0, extent );
} else {
if( count > 1 ) {
if( bLength == 1 ) {
pData = dt_create( oldType->desc.used + 2 );
dt_add( pData, oldType, count - 1, 0, stride * extent );
} else {
pTempData = dt_create( oldType->desc.used + 2 );
pData = dt_create( oldType->desc.used + 2 + 2 );
dt_add( pTempData, oldType, bLength, 0, extent );
dt_add( pData, pTempData, count - 1, 0, stride * extent );
dt_free( &pTempData );
}
} else {
pData = dt_create( oldType->desc.used + 2 );
}
dt_add( pData, oldType, bLength, (count - 1) * extent * stride, extent );
}
*newType = pData;
return 0;
}
int dt_create_hvector( size_t count, int bLength, long stride,
dt_desc_t* oldType, dt_desc_t** newType )
{
long extent = oldType->ub - oldType->lb;
dt_desc_t *pTempData, *pData;
if( (extent * bLength) == stride ) {
/* contiguous */
pData = dt_create( oldType->desc.used + 2 );
dt_add( pData, oldType, count * bLength, 0, extent );
} else {
if( count > 1 ) {
pTempData = dt_create( oldType->desc.used + 2 );
pData = dt_create( oldType->desc.used + 2 + 2 );
dt_add( pTempData, oldType, bLength, 0, extent );
dt_add( pData, pTempData, count - 1, 0, stride );
dt_free( &pTempData );
} else {
pData = dt_create( oldType->desc.used + 2 );
}
dt_add( pData, oldType, bLength, (count - 1) * stride, extent );
}
*newType = pData;
return 0;
}

21
src/datatype/dt_destroy.c Обычный файл
Просмотреть файл

@ -0,0 +1,21 @@
#include "datatype.h"
/* This function should never be called directly. It's called by the dt_decrease_ref
* when the number of references on the data reach ZERO.
*/
int dt_destroy( dt_desc_t** dt )
{
dt_desc_t* pData = *dt;
if( !(pData->flags & DT_FLAG_FOREVER) )
return LAM_ERROR;
/* I still have the data description ? */
if( pData->args != NULL ) {
fprintf( stderr, "Data description has not been removed prior to data destruction" );
}
if( pData->opt_desc.desc != NULL ) free( pData->opt_desc.desc );
if( pData->desc.desc != NULL ) free( pData->desc.desc );
return 0;
}

71
src/datatype/dt_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,71 @@
#include "datatype.h"
#include "datatype_internal.h"
struct _c_l {
char c;
long l;
};
struct _c_d {
char c;
double d;
};
struct _c_ll {
char c;
long long ll;
};
struct _c_ld {
char c;
long double ld;
};
struct _c_f {
char c;
float f;
};
int dt_load( void )
{
/* we have to compute the correct alignement for several types of basic datatypes */
struct _c_f c_f;
struct _c_l c_l;
struct _c_d c_d;
struct _c_ll c_ll;
struct _c_ld c_ld;
int i;
basicDatatypes[DT_FLOAT].align = (char*)&(c_f.f) - (char*)&(c_f);
basicDatatypes[DT_LONG].align = (char*)&(c_l.l) - (char*)&(c_l);
basicDatatypes[DT_DOUBLE].align = (char*)&(c_d.d) - (char*)&(c_d);
basicDatatypes[DT_LONG_DOUBLE].align = (char*)&(c_ld.ld) - (char*)&(c_ld);
basicDatatypes[DT_LONG_LONG].align = (char*)&(c_ll.ll) - (char*)&(c_ll);
for( i = 0; i < DT_MAX_PREDEFINED; i++ ) {
basicDatatypes[i].desc.desc = (dt_elem_desc_t*)malloc(sizeof(dt_elem_desc_t));
basicDatatypes[i].desc.desc->flags = DT_FLAG_BASIC | DT_FLAG_CONTIGUOUS;
basicDatatypes[i].desc.desc->type = i;
basicDatatypes[i].desc.desc->count = 1;
basicDatatypes[i].desc.desc->disp = 0;
basicDatatypes[i].desc.desc->extent = basicDatatypes[i].size;
basicDatatypes[i].desc.length = 1;
basicDatatypes[i].desc.used = 1;
basicDatatypes[i].btypes[i] = 1;
}
return 0;
}
int dt_unload( void )
{
int i;
for( i =0; i < DT_MAX_PREDEFINED; i++ ) {
free( basicDatatypes[i].desc.desc );
basicDatatypes[i].desc.desc = NULL;
basicDatatypes[i].desc.length = 0;
basicDatatypes[i].desc.used = 0;
}
return 0;
}

48
src/datatype/dt_old_limits.c Обычный файл
Просмотреть файл

@ -0,0 +1,48 @@
#include "datatype.h"
int dt_type_ub( dt_desc_t* pData, long* disp )
{
*disp = pData->ub;
return 0;
}
int dt_type_lb( dt_desc_t* pData, long* disp )
{
*disp = pData->lb;
return 0;
}
int dt_type_extent( dt_desc_t* pData, long* extent )
{
*extent = pData->ub - pData->lb;
return 0;
}
int dt_type_size ( dt_desc_t* pData, int *size )
{
*size = pData->size;
return 0;
}
int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew )
{
return 0;
}
int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent)
{
dt_desc_t* pData = (dt_desc_t*)datatype;
*lb = pData->lb;
*extent = pData->ub - pData->lb;
return 0;
}
int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent)
{
dt_desc_t* pData = (dt_desc_t*)datatype;
*true_lb = pData->true_lb;
*true_extent = pData->true_ub - pData->true_lb;
return 0;
}

151
src/datatype/dt_optimize.c Обычный файл
Просмотреть файл

@ -0,0 +1,151 @@
/* -*- Mode: C; c-basic-offset:3 ; -*- */
#include "datatype.h"
#include "datatype_internal.h"
/* printf( "save in %s:%d at %p DT_BYTE disp %ld count %d\n", __FILE__, __LINE__, (PELEM), (DISP), (COUNT) ); \ */
#define SAVE_DESC( PELEM, DISP, COUNT ) \
do { \
(PELEM)->flags = DT_FLAG_BASIC; \
(PELEM)->type = DT_BYTE; \
(PELEM)->count = (COUNT); \
(PELEM)->disp = (DISP); \
(PELEM)->extent = 1; \
(PELEM)++; \
nbElems++; \
} while(0)
/* printf( "save in %s:%d type %d flags %x count %d disp %ld extent %d\n", \ */
/* __FILE__, __LINE__, (TYPE), (FLAGS), (COUNT), (DISP), (EXTENT) ); \ */
#define SAVE_ELEM( PELEM, TYPE, FLAGS, COUNT, DISP, EXTENT ) \
do { \
(PELEM)->flags = (FLAGS); \
(PELEM)->type = (TYPE); \
(PELEM)->count = (COUNT); \
(PELEM)->disp = (DISP); \
(PELEM)->extent = (EXTENT); \
(PELEM)++; \
nbElems++; \
} while(0)
static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem )
{
while( _pElem->type == DT_LOOP ) ++_pElem;
return _pElem->disp;
}
int dt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDesc )
{
dt_elem_desc_t* pElemDesc;
long lastDisp = 0;
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int end_loop; /* last element in the actual loop */
int stack_pos = 0;
int type, lastLength = 0, nbElems = 0, changes = 0;
long totalDisp;
pTypeDesc->length = 2 * pData->desc.used;
pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length );
pStack = alloca( sizeof(dt_stack_t) * (pData->btypes[DT_LOOP]+1) );
pStack->count = count;
pStack->index = -1;
pStack->end_loop = pData->desc.used - 1;
pStack->disp = 0;
pos_desc = 0;
next_loop:
end_loop = pStack->end_loop;
totalDisp = pStack->disp;
while( pos_desc <= end_loop ) {
if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
dt_elem_desc_t* pStartLoop;
if( lastLength != 0 ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastDisp += lastLength;
lastLength = 0;
}
pStartLoop = (pElemDesc - nbElems);
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].flags,
nbElems, pData->desc.desc[pos_desc].disp,
pData->desc.desc[pos_desc].extent );
nbElems += pStartLoop->disp;
pStartLoop->disp = (pElemDesc - 1)->count;
stack_pos--;
pStack--;
pos_desc++;
goto next_loop;
}
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
dt_elem_desc_t* pEndLoop = &(pData->desc.desc[pos_desc + pData->desc.desc[pos_desc].disp]);
long loop_disp = GET_LOOP_DISP( &(pData->desc.desc[pos_desc]) );
if( pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
/* the loop is contiguous or composed by contiguous elements with a gap */
if( pData->desc.desc[pos_desc].extent == pEndLoop->extent ) {
/* the whole loop is contiguous */
if( (lastDisp + lastLength) != (totalDisp + loop_disp) ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastLength = 0;
lastDisp = totalDisp + loop_disp;
}
lastLength += pData->desc.desc[pos_desc].count * pEndLoop->extent;
} else {
int counter = pData->desc.desc[pos_desc].count;
if( (lastDisp + lastLength) == (totalDisp + loop_disp) ) {
lastLength += pEndLoop->extent;
counter--;
}
if( lastLength != 0 ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastDisp += lastLength;
lastLength = 0;
}
/* we have a gap in the begining or the end of the loop but the whole
* loop can be merged in just one memcpy.
*/
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
counter, (long)2, pData->desc.desc[pos_desc].extent );
SAVE_DESC( pElemDesc, loop_disp, pEndLoop->extent );
SAVE_ELEM( pElemDesc, DT_END_LOOP, pEndLoop->flags,
2, pEndLoop->disp, pEndLoop->extent );
}
pos_desc += pData->desc.desc[pos_desc].disp + 1;
changes++;
} else {
if( lastLength != 0 ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastDisp += lastLength;
lastLength = 0;
}
SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags,
pData->desc.desc[pos_desc].count, (long)nbElems,
pData->desc.desc[pos_desc].extent );
nbElems = 1;
PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count,
totalDisp, pos_desc + pData->desc.desc[pos_desc].disp );
pos_desc++;
DUMP_STACK( pStack, stack_pos, pData->desc, "advance loops" );
}
goto next_loop;
}
/* now here we have a basic datatype */
type = pData->desc.desc[pos_desc].type;
if( (lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].disp) ) {
lastLength += pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
} else {
if( lastLength != 0 )
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastDisp = totalDisp + pData->desc.desc[pos_desc].disp;
lastLength = pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
if( lastLength != 0 )
SAVE_DESC( pElemDesc, lastDisp, lastLength );
/* cleanup the stack */
pTypeDesc->used = nbElems;
return 0;
}

478
src/datatype/dt_pack.c Обычный файл
Просмотреть файл

@ -0,0 +1,478 @@
/* -*- Mode: C; c-basic-offset:3 ; -*- */
#include "datatype.h"
#include "datatype_internal.h"
static int convertor_pack_general( convertor_t* pConvertor, struct iovec* out, unsigned int outCount )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */
int end_loop; /* last element in the actual loop */
int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */
int rc;
long disp_desc = 0; /* compute displacement for truncated data */
long disp; /* displacement at the beging of the last loop */
dt_desc_t *pData = pConvertor->pDesc;
dt_elem_desc_t* pElem;
char* pOutput = pConvertor->pBaseBuf;
int oCount = (pData->ub - pData->lb) * pConvertor->count;
char* pInput = out[0].iov_base;
int iCount = out[0].iov_len;
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
out[0].iov_base, out[0].iov_len, outCount );
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp = 0;
if( pData->opt_desc.desc != NULL ) pElem = pData->opt_desc.desc;
else pElem = pData->desc.desc;
if( pos_desc == -1 ) {
pos_desc = 0;
count_desc = pElem[0].count;
disp_desc = pElem[0].disp;
} else {
count_desc = pStack->count;
if( pElem[pos_desc].type != DT_LOOP ) {
pConvertor->stack_pos--;
pStack--;
disp = pStack->disp;
disp_desc = ( pElem[pos_desc].disp +
(pElem[pos_desc].count - count_desc) * pElem[pos_desc].extent);
}
}
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
end_loop = pStack->end_loop;
while( pConvertor->stack_pos >= 0 ) {
if( pos_desc == end_loop ) { /* end of the current loop */
while( --(pStack->count) == 0 ) { /* end of loop */
pConvertor->stack_pos--;
pStack--;
if( pConvertor->stack_pos == -1 )
return 1; /* completed */
}
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElem[pos_desc].extent;
pos_desc++;
disp = pStack->disp;
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
goto next_loop;
}
if( pElem[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElem[pos_desc].count,
disp, pos_desc + pElem[pos_desc].disp + 1);
pos_desc++;
} while( pElem[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loops" );
/* update the current state */
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
goto next_loop;
}
/* now here we have a basic datatype */
type = pElem[pos_desc].type;
rc = pConvertor->pFunctions[type]( count_desc,
pOutput + disp + disp_desc, oCount, pElem[pos_desc].extent,
pInput, iCount, pElem[pos_desc].extent,
&advance );
if( rc <= 0 ) {
printf( "trash in the input buffer\n" );
return -1;
}
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
pConvertor->bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
count_desc - rc,
disp + rc * pElem[pos_desc].extent,
pos_desc );
if( iCount != 0 )
printf( "there is still room in the input buffer %d bytes\n", iCount );
return 0;
}
pConvertor->converted += rc; /* number of elementd converted so far */
pos_desc++; /* advance to the next data */
count_desc = pElem[pos_desc].count;
disp_desc = pElem[pos_desc].disp;
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
}
/* out of the loop: we have complete the data conversion or no more space
* in the buffer.
*/
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, pElem[pos_desc].count,
disp, pos_desc );
return 0;
}
int convertor_pack_homogeneous( convertor_t* pConv, struct iovec* iov, unsigned int out_size )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pDestBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
pDestBuf = iov[0].iov_base;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrc = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
type = pConv->count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
MEMCPY( pDestBuf, pSrc, iov[0].iov_len );
pConv->bConverted += iov[0].iov_len;
} else {
char* pSrcBuf = pConv->pBaseBuf + pData->true_lb;
long extent = pData->ub - pData->lb;
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
MEMCPY( pDestBuf, pSrcBuf, pData->size );
pSrcBuf += extent;
pDestBuf += pData->size;
}
pConv->bConverted += type;
}
return (pConv->bConverted == (pData->size * pConv->count));
}
pStack = pConv->pStack;
pStack->count = pConv->count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength + pLast->extent );
i = 1;
} else {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
i = 0;
}
pDestBuf += lastLength;
lastLength = pLast->extent;
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
pDestBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
pDestBuf += lastLength;
pConv->bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
pConv->bConverted += lastLength;
/* cleanup the stack */
return 0;
}
#define PRINT_MEMCPY( DST, SRC, LENGTH ) \
{ \
printf( "%5d: memcpy dst = %p src %p length %ld bytes (so far %d)[%d]\n", \
__index++, (DST), (SRC), (long)(LENGTH), __sofar, __LINE__ ); \
__sofar += (LENGTH); \
}
int dt_unroll( dt_desc_t* pData, int count )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pDestBuf;
int bConverted = 0, __index = 0, __sofar = 0;
dt_elem_desc_t* pElems;
pDestBuf = NULL;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrc = (char*)pData->true_lb;
type = count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
bConverted += (pData->size * count);
} else {
char* pSrcBuf = (char*)pData->true_lb;
long extent = pData->ub - pData->lb;
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
pSrcBuf += extent;
pDestBuf += pData->size;
}
bConverted += type;
}
return (bConverted == (pData->size * count));
}
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
i = 1;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
i = 0;
}
lastLength = pLast->extent;
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += lastLength;
bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
return 0;
}
/* The pack routines should do 2 things:
* - first if the provided iovec contains NULL pointers then they should provide
* buffer space. If the data is contiguous the it should provide directly pointers
* the the user space depending on the iov_len argument. If -1 then all the buffer
* can be supplied in one time, if not several steps need to be executed, it should
* provide the correct pointer every time. But if the user provide a buffer, then
* some parts of the data should be packed inside this buffer, but we still should
* able to have pointers to the user buf on the subsequents calls.
* Return 0 if everything went OK and if there is still room before the complete
* conversion of the data (need additional call with others input buffers )
* 1 if everything went fine and the data was completly converted
* -1 something wrong occurs.
*/
int convertor_pack( convertor_t* pConv, struct iovec* out, unsigned int out_size )
{
dt_desc_t* pData = pConv->pDesc;
int extent;
if( pConv->count == 0 ) return 1; /* nothing to do */
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
if( pData->size == (extent = (pData->ub - pData->lb)) ) {
if( out[0].iov_base == NULL ) {
out[0].iov_base = pConv->pBaseBuf + pData->true_lb;
out[0].iov_len = pData->size * pConv->count;
} else {
/* contiguous data just memcpy the smallest data in the user buffer */
out[0].iov_len = IMIN( out[0].iov_len, pData->size * pConv->count );
MEMCPY( out[0].iov_base, pConv->pBaseBuf + pData->true_lb, out[0].iov_len);
}
pConv->bConverted += out[0].iov_len;
return 0;
}
}
if( out[0].iov_base == NULL ) {
out[0].iov_len = pConv->count * pData->size;
out[0].iov_base = (void*)malloc( out[0].iov_len );
pConv->freebuf = out[0].iov_base;
}
return convertor_progress( pConv, out, out_size );
}
int convertor_init_for_send( convertor_t* pConv, unsigned int flags,
dt_desc_t* dt, int count, void* pUserBuf )
{
dt_increase_ref( dt );
pConv->pDesc = dt;
pConv->flags = CONVERTOR_SEND;
if( pConv->pStack != NULL ) free( pConv->pStack );
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (dt->btypes[DT_LOOP] + 2) );
pConv->stack_pos = 0;
pConv->pStack[0].index = -1; /* fake entry for the first step */
pConv->pStack[0].count = count; /* fake entry for the first step */
pConv->pStack[0].disp = 0;
/* first hre we should select which data representation will be used for
* this operation: normal one or the optimized version ? */
pConv->pStack[0].end_loop = dt->desc.used;
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (dt->ub - dt->lb);
pConv->count = count;
pConv->pFunctions = copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
if( (dt->flags & DT_FLAG_CONTIGUOUS) && (dt->size == (dt->ub - dt->lb)) )
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = convertor_pack_homogeneous;
if( pConv->freebuf != NULL ) {
free( pConv->freebuf );
pConv->freebuf = NULL;
}
return 0;
}
convertor_t* convertor_create( int remote_arch, int mode )
{
convertor_t* pConv = (convertor_t*)calloc( 1, sizeof(convertor_t) );
pConv->pStack = NULL;
pConv->remoteArch = remote_arch;
pConv->fAdvance = convertor_pack_homogeneous;
return pConv;
}
/* Actually we suppose that we can only do receiver side conversion */
int convertor_get_packed_size( convertor_t* pConv, unsigned int* pSize )
{
if( dt_type_size( pConv->pDesc, pSize ) != 0 )
return -1;
*pSize = (*pSize) * pConv->count;
return 0;
}
int convertor_get_unpacked_size( convertor_t* pConv, unsigned int* pSize )
{
int i;
dt_desc_t* pData = pConv->pDesc;
if( pConv->count == 0 ) {
*pSize = 0;
return 0;
}
if( pConv->remoteArch == 0 ) { /* same architecture */
*pSize = pData->size * pConv->count;
return 0;
}
*pSize = 0;
for( i = DT_CHAR; i < DT_MAX_PREDEFINED; i++ ) {
if( pData->bdt_used & (1<<i) ) {
/* TODO replace with the remote size */
*pSize += (pData->btypes[i] * basicDatatypes[i].size);
}
}
*pSize *= pConv->count;
return 0;
}

617
src/datatype/dt_unpack.c Обычный файл
Просмотреть файл

@ -0,0 +1,617 @@
/* -*- Mode: C; c-basic-offset:3 ; -*- */
#include "datatype.h"
#include "datatype_internal.h"
void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name )
{
printf( "\nStack %p stack_pos %d name %s\n", pStack, stack_pos, name );
for( ;stack_pos >= 0; stack_pos-- ) {
printf( "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
if( pStack[stack_pos].index != -1 )
printf( "[desc count %d disp %ld extent %d]\n",
pDesc[pStack[stack_pos].index].count,
pDesc[pStack[stack_pos].index].disp,
pDesc[pStack[stack_pos].index].extent );
else
printf( "\n" );
}
printf( "\n" );
}
/*
* Remember that the first item in the stack (ie. position 0) is the number
* of times the datatype is involved in the operation (ie. the count argument
* in the MPI_ call).
*/
/* Convert data from multiple input buffers (as received from the network layer)
* to a contiguous output buffer with a predefined size.
* Return 0 if everything went OK and if there is still room before the complete
* conversion of the data (need additional call with others input buffers )
* 1 if everything went fine and the data was completly converted
* -1 something wrong occurs.
*/
static int convertor_unpack_general( convertor_t* pConvertor,
struct iovec* pInputv,
unsigned int inputCount )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */
int end_loop; /* last element in the actual loop */
int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */
int rc;
long disp_desc = 0; /* compute displacement for truncated data */
long disp; /* displacement at the beging of the last loop */
dt_desc_t *pData = pConvertor->pDesc;
dt_elem_desc_t* pElems;
char* pOutput = pConvertor->pBaseBuf;
int oCount = (pData->ub - pData->lb) * pConvertor->count;
char* pInput = pInputv[0].iov_base;
int iCount = pInputv[0].iov_len;
if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc;
else pElems = pData->desc.desc;
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
pInputv[0].iov_base, pInputv[0].iov_len, inputCount );
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp = 0;
if( pos_desc == -1 ) {
pos_desc = 0;
count_desc = pElems[0].count;
disp_desc = pElems[0].disp;
} else {
count_desc = pStack->count;
if( pElems[pos_desc].type != DT_LOOP ) {
pConvertor->stack_pos--;
pStack--;
disp = pStack->disp;
disp_desc = ( pElems[pos_desc].disp +
(pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent);
}
}
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
end_loop = pStack->end_loop;
while( pConvertor->stack_pos >= 0 ) {
if( pos_desc == end_loop ) { /* end of the current loop */
while( --(pStack->count) == 0 ) { /* end of loop */
pConvertor->stack_pos--;
pStack--;
if( pConvertor->stack_pos == -1 )
return 1; /* completed */
}
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
pos_desc++;
disp = pStack->disp;
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElems[pos_desc].count,
disp, pos_desc + pElems[pos_desc].disp + 1 );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
/* update the current state */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
rc = pConvertor->pFunctions[type]( count_desc,
pInput, iCount, pElems[pos_desc].extent,
pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent,
&advance );
if( rc <= 0 ) {
printf( "trash in the input buffer\n" );
return -1;
}
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
pConvertor->bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, count_desc - rc,
disp + rc * pElems[pos_desc].extent, pos_desc );
if( iCount != 0 )
printf( "there is still room in the input buffer %d bytes\n", iCount );
return 0;
}
pConvertor->converted += rc; /* number of elementd converted so far */
pos_desc++; /* advance to the next data */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
}
/* out of the loop: we have complete the data conversion or no more space
* in the buffer.
*/
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pElems[pos_desc].count, disp, pos_desc );
return 0;
}
int convertor_unpack_homogeneous( convertor_t* pConv, struct iovec* iov, unsigned int out_size )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* counter for basic datatype with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pSrcBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
pSrcBuf = iov[0].iov_base;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
if( pData->size == extent ) {
/* contiguous data or basic datatype with count */
MEMCPY( pDstBuf, pSrcBuf, iov[0].iov_len );
pConv->bConverted += iov[0].iov_len;
} else {
type = iov[0].iov_len;
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
MEMCPY( pDstBuf, pSrcBuf, pData->size );
pSrcBuf += pData->size;
pDstBuf += extent;
type -= pData->size;
}
pConv->bConverted += type;
}
return (pConv->bConverted == (pData->size * pConv->count));
}
pStack = pConv->pStack;
pStack->count = pConv->count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( lastLength == 0 ) {
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pSrcBuf += lastLength;
}
lastLength = pLast->extent;
for( i = 0; i < (pElems[pos_desc].count - 1); i++ ) {
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pSrcBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pSrcBuf += lastLength;
pConv->bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pConv->bConverted += lastLength;
/* cleanup the stack */
return 0;
}
int convertor_unpack( convertor_t* pConvertor,
struct iovec* pInputv,
unsigned int inputCount )
{
dt_desc_t *pData = pConvertor->pDesc;
char* pOutput = pConvertor->pBaseBuf;
char* pInput = pInputv[0].iov_base;
int rc;
if( pConvertor->count == 0 ) return 1; /* nothing to do */
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
if( pInputv[0].iov_base == NULL ) {
rc = pConvertor->count * pData->size;
if( pInputv[0].iov_len == 0 ) { /* give me the whole buffer */
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb;
pInputv[0].iov_len = rc;
return 1;
} else { /* what about the next chunk ? */
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted;
if( pInputv[0].iov_len > (rc - pConvertor->bConverted) )
pInputv[0].iov_len = rc - pConvertor->bConverted;
pConvertor->bConverted += pInputv[0].iov_len;
return (pConvertor->bConverted == rc);
}
}
}
if( (pInput >= pOutput) && (pInput < (pOutput + pConvertor->count * (pData->ub - pData->lb))) ) {
return 1;
}
return convertor_progress( pConvertor, pInputv, inputCount );
}
/* Return value:
* 0 : nothing has been done
* positive value: number of item converted.
* negative value: -1 * number of items converted, less data provided than expected
* and there are less data than the size on the remote host of the
* basic datatype.
*/
#define COPY_TYPE( TYPENAME, TYPE ) \
int copy_##TYPENAME( unsigned int count, \
char* from, unsigned int from_len, long from_extent, \
char* to, unsigned int to_len, long to_extent, \
int* used ) \
{ \
int i, res = 1; \
unsigned int remote_TYPE_size = sizeof(TYPE); /* TODO */ \
\
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
if( (count * remote_TYPE_size) != from_len ) { \
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_TYPE_size) ); \
res = -1; \
} \
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
} else \
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
\
if( (from_extent == sizeof(TYPE)) && (to_extent == sizeof(TYPE)) ) { \
MEMCPY( to, from, count * sizeof(TYPE) ); \
} else { \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, sizeof(TYPE) ); \
to += to_extent; \
from += from_extent; \
} \
} \
*used = count * sizeof(TYPE) ; \
return res * count; \
}
COPY_TYPE( char, char );
COPY_TYPE( short, short );
COPY_TYPE( int, int );
COPY_TYPE( float, float );
COPY_TYPE( long, long );
/*COPY_TYPE( double, double );*/
COPY_TYPE( long_long, long long );
COPY_TYPE( long_double, long double );
COPY_TYPE( complex_float, complex_float_t );
COPY_TYPE( complex_double, complex_double_t );
int copy_double( unsigned int count,
char* from, unsigned int from_len, long from_extent,
char* to, unsigned int to_len, long to_extent,
int* used )
{
int i, res = 1;
unsigned int remote_double_size = sizeof(double); /* TODO */
if( (remote_double_size * count) > from_len ) {
count = from_len / remote_double_size;
if( (count * remote_double_size) != from_len ) {
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n",
from_len - (count * remote_double_size) );
res = -1;
}
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n",
"double", count, from, from_len, to, to_len );
} else
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n",
"double", count, from, from_len, to, to_len );
if( (from_extent == sizeof(double)) && (to_extent == sizeof(double)) ) {
MEMCPY( to, from, count * sizeof(double) );
} else {
for( i = 0; i < count; i++ ) {
MEMCPY( to, from, sizeof(double) );
to += to_extent;
from += from_extent;
}
}
*used = count * sizeof(double) ;
return res * count;
}
conversion_fct_t copy_functions[DT_MAX_PREDEFINED] = {
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)NULL, /* DT_SPACE */
(conversion_fct_t)copy_char, /* DT_CHAR */
(conversion_fct_t)copy_char, /* DT_BYTE */
(conversion_fct_t)copy_short, /* DT_SHORT */
(conversion_fct_t)copy_int, /* DT_INT */
(conversion_fct_t)copy_float, /* DT_FLOAT */
(conversion_fct_t)copy_long, /* DT_LONG */
(conversion_fct_t)copy_double, /* DT_DOUBLE */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
};
/* Should we supply buffers to the convertor or can we use directly
* the user buffer ?
*/
int convertor_need_buffers( convertor_t* pConvertor )
{
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) return 0;
return 1;
}
int convertor_init_for_recv( convertor_t* pConv, unsigned int flags,
dt_desc_t* pData, int count, void* pUserBuf )
{
dt_increase_ref( pData );
pConv->pDesc = pData;
pConv->flags = CONVERTOR_RECV;
if( pConv->pStack != NULL ) free( pConv->pStack );
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (pData->btypes[DT_LOOP] + 2) );
pConv->stack_pos = 0;
pConv->pStack[0].index = -1; /* fake entry for the first step */
pConv->pStack[0].count = count; /* fake entry for the first step */
pConv->pStack[0].disp = 0;
/* first we should decide which data representation will be used TODO */
pConv->pStack[0].end_loop = pData->desc.used;
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (pData->ub - pData->lb);
pConv->count = count;
pConv->pFunctions = copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
if( (pData->flags & DT_FLAG_CONTIGUOUS) && (pData->size == (pData->ub - pData->lb)) )
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = convertor_unpack_homogeneous;
return 0;
}
convertor_t* convertor_get_copy( convertor_t* pConvertor )
{
convertor_t* pConv = (convertor_t*)calloc( 1, sizeof(convertor_t) );
MEMCPY( pConv, pConvertor, sizeof(convertor_t) );
pConv->pStack = NULL;
pConv->pDesc = NULL;
pConv->count = 0;
pConv->converted = 0;
pConv->bConverted = 0;
pConv->freebuf = NULL;
return pConv;
}
int convertor_destroy( convertor_t** ppConv )
{
if( (*ppConv) == NULL ) return 0;
if( (*ppConv)->pStack != NULL ) free( (*ppConv)->pStack );
if( (*ppConv)->pDesc != NULL ) dt_decrease_ref( (*ppConv)->pDesc );
if( (*ppConv)->freebuf != NULL ) free( (*ppConv)->freebuf );
free( (*ppConv) );
*ppConv = NULL;
return 0;
}
/* Get the number of elements from the data associated with this convertor that can be
* retrieved from a recevied buffer with the size iSize.
* To spped-up this function you should use it with a iSize == to the modulo
* of the original size and the size of the data.
* This function should be called with a initialized clean convertor.
* Return value:
* positive = number of basic elements inside
* negative = some error occurs
*/
int dt_get_element_count( dt_desc_t* pData, size_t iSize )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int end_loop; /* last element in the actual loop */
int type; /* type at current position */
int rc, nbElems = 0;
int stack_pos = 0;
DUMP( "dt_count_elements( %p, %d )\n", pData, iSize );
pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP] + 2) );
pStack->count = 1;
pStack->index = -1;
pStack->end_loop = pData->desc.used;
pStack->disp = 0;
pos_desc = 0;
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
end_loop = pStack->end_loop;
while( stack_pos >= 0 ) {
if( pos_desc == end_loop ) { /* end of the current loop */
while( --(pStack->count) == 0 ) { /* end of loop */
stack_pos--;
pStack--;
if( stack_pos == -1 )
return nbElems; /* completed */
}
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pData->desc.desc[pos_desc].extent;
pos_desc++;
goto next_loop;
}
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count,
0, pos_desc + pData->desc.desc[pos_desc].disp );
pos_desc++;
} while( pData->desc.desc[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pStack, stack_pos, pData->desc, "advance loops" );
goto next_loop;
}
/* now here we have a basic datatype */
type = pData->desc.desc[pos_desc].type;
rc = pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
if( rc >= iSize ) {
nbElems += iSize / basicDatatypes[type].size;
break;
}
nbElems += pData->desc.desc[pos_desc].count;
iSize -= rc;
pos_desc++; /* advance to the next data */
}
/* cleanup the stack */
return nbElems;
}
int dt_copy_content_same_dt( dt_desc_t* pData, int count,
char* pDestBuf, char* pSrcBuf )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int stack_pos = 0;
long lastDisp = 0, lastLength = 0;
dt_elem_desc_t* pElems;
if( (pData->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) {
/* basic datatype with count */
MEMCPY( pDestBuf, pSrcBuf, pData->size * count );
return 0;
}
pStack = alloca( sizeof(pStack) * (pData->btypes[DT_LOOP]+1) );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
goto next_loop;
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
/* cleanup the stack */
return 0;
}