Move some functions in the datatype.h file and make them static inline.
Remove dt_old_limit.c file. Correctly define the lam_datatype_t This commit was SVN r920.
Этот коммит содержится в:
родитель
868ff4a4cf
Коммит
4f3bca907f
@ -14,7 +14,7 @@ headers = datatype.h datatype_internal.h
|
||||
libdatatype_la_SOURCES = \
|
||||
$(headers) \
|
||||
dt_add.c dt_create.c dt_create_array.c dt_create_dup.c dt_create_indexed.c \
|
||||
dt_create_struct.c dt_create_vector.c dt_destroy.c dt_module.c dt_old_limits.c \
|
||||
dt_create_struct.c dt_create_vector.c dt_destroy.c dt_module.c \
|
||||
dt_optimize.c dt_pack.c dt_unpack.c
|
||||
|
||||
# Conditionally install the header files
|
||||
|
@ -1,43 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/*
|
||||
* lam_datatype_t implementation
|
||||
*/
|
||||
|
||||
#include "lam_config.h"
|
||||
#include "datatype/datatype.h"
|
||||
|
||||
/*
|
||||
* Global variables
|
||||
* Sizes and alignments from configure
|
||||
*/
|
||||
|
||||
int lam_sizeof_f77_integer = LAM_SIZEOF_FORTRAN_INT;
|
||||
int lam_sizeof_f77_real = LAM_SIZEOF_FORTRAN_REAL;
|
||||
int lam_sizeof_f77_dblprec = LAM_SIZEOF_FORTRAN_DBLPREC;
|
||||
int lam_sizeof_f77_complex = LAM_SIZEOF_FORTRAN_COMPLEX;
|
||||
int lam_sizeof_f77_dblcomplex = LAM_SIZEOF_FORTRAN_DBLCOMPLEX;
|
||||
|
||||
int lam_alignment_f77_integer = LAM_ALIGNMENT_FORTRAN_INT;
|
||||
int lam_alignment_f77_real = LAM_ALIGNMENT_FORTRAN_REAL;
|
||||
int lam_alignment_f77_dblprec = LAM_ALIGNMENT_FORTRAN_DBLPREC;
|
||||
int lam_alignment_f77_complex = LAM_ALIGNMENT_FORTRAN_COMPLEX;
|
||||
int lam_alignment_f77_dblcomplex = LAM_ALIGNMENT_FORTRAN_DBLCOMPLEX;
|
||||
|
||||
|
||||
static void lam_datatype_t_construct(lam_datatype_t *datatype);
|
||||
static void lam_datatype_t_destruct(lam_datatype_t *datatype);
|
||||
|
||||
OBJ_CLASS_INSTANCE(lam_datatype_t,
|
||||
lam_object_t,
|
||||
lam_datatype_t_construct,
|
||||
lam_datatype_t_destruct);
|
||||
|
||||
|
||||
static void lam_datatype_t_construct(lam_datatype_t *datatype) {}
|
||||
|
||||
|
||||
static void lam_datatype_t_destruct(lam_datatype_t *datatype) {}
|
||||
|
@ -1,4 +1,12 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
/**
|
||||
* lam_datatype_t interface for LAM internal data type representation
|
||||
*
|
||||
* lam_datatype_t is a class which represents contiguous or
|
||||
* non-contiguous data together with constituent type-related
|
||||
* information.
|
||||
*/
|
||||
|
||||
#ifndef DATATYPE_H_HAS_BEEN_INCLUDED
|
||||
#define DATATYPE_H_HAS_BEEN_INCLUDED
|
||||
@ -32,16 +40,16 @@
|
||||
#define DT_MAX_PREDEFINED 0x10
|
||||
|
||||
/* flags for the datatypes. */
|
||||
#define DT_FLAG_DESTROYED 0x0001 /* user destroyed but some other layers still have a reference */
|
||||
#define DT_FLAG_COMMITED 0x0002 /* ready to be used for a send/recv operation */
|
||||
#define DT_FLAG_CONTIGUOUS 0x0004 /* contiguous datatype */
|
||||
#define DT_FLAG_OVERLAP 0x0008 /* datatype is unpropper for a recv operation */
|
||||
#define DT_FLAG_USER_LB 0x0010 /* has a user defined LB */
|
||||
#define DT_FLAG_USER_UB 0x0020 /* has a user defined UB */
|
||||
#define DT_FLAG_FOREVER 0x0040 /* cannot be removed: initial and predefined datatypes */
|
||||
#define DT_FLAG_IN_LOOP 0x0080 /* we are inside a loop */
|
||||
#define DT_FLAG_INITIAL 0x0100 /* one of the initial datatype */
|
||||
#define DT_FLAG_DATA 0x0200 /* data or control structure */
|
||||
#define DT_FLAG_DESTROYED 0x0001 /**< user destroyed but some other layers still have a reference */
|
||||
#define DT_FLAG_COMMITED 0x0002 /**< ready to be used for a send/recv operation */
|
||||
#define DT_FLAG_CONTIGUOUS 0x0004 /**< contiguous datatype */
|
||||
#define DT_FLAG_OVERLAP 0x0008 /**< datatype is unpropper for a recv operation */
|
||||
#define DT_FLAG_USER_LB 0x0010 /**< has a user defined LB */
|
||||
#define DT_FLAG_USER_UB 0x0020 /**< has a user defined UB */
|
||||
#define DT_FLAG_FOREVER 0x0040 /**< cannot be removed: initial and predefined datatypes */
|
||||
#define DT_FLAG_IN_LOOP 0x0080 /**< we are inside a loop */
|
||||
#define DT_FLAG_INITIAL 0x0100 /**< one of the initial datatype */
|
||||
#define DT_FLAG_DATA 0x0200 /**< data or control structure */
|
||||
#define DT_FLAG_BASIC (DT_FLAG_INITIAL | DT_FLAG_COMMITED | DT_FLAG_FOREVER | DT_FLAG_CONTIGUOUS)
|
||||
|
||||
#define DT_INCREASE_STACK 32
|
||||
@ -50,11 +58,11 @@
|
||||
* by a set of basic elements.
|
||||
*/
|
||||
typedef struct __dt_elem_desc {
|
||||
unsigned short flags; /* flags for the record */
|
||||
unsigned short type; /* the basic data type id */
|
||||
unsigned int count; /* number of elements */
|
||||
long disp; /* displacement of the first element */
|
||||
unsigned int extent; /* extent of each element */
|
||||
unsigned short flags; /**< flags for the record */
|
||||
unsigned short type; /**< the basic data type id */
|
||||
unsigned int count; /**< number of elements */
|
||||
long disp; /**< displacement of the first element */
|
||||
unsigned int extent; /**< extent of each element */
|
||||
} dt_elem_desc_t;
|
||||
|
||||
typedef struct {
|
||||
@ -83,28 +91,28 @@ typedef struct __dt_struct_desc {
|
||||
|
||||
/* the data description.
|
||||
*/
|
||||
typedef struct __dt_desc {
|
||||
lam_object_t super;
|
||||
unsigned int size; /* total size in bytes of the memory used by the data if
|
||||
typedef struct lam_datatype_t {
|
||||
lam_object_t super; /**< basic superclass */
|
||||
unsigned int size; /**< total size in bytes of the memory used by the data if
|
||||
* the data is put on a contiguous buffer */
|
||||
long true_lb;
|
||||
long true_ub; /* the true ub of the data without user defined lb and ub */
|
||||
unsigned int align; /* data should be aligned to */
|
||||
long lb; /* lower bound in memory */
|
||||
long ub; /* upper bound in memory */
|
||||
unsigned short flags; /* the flags */
|
||||
unsigned short id; /* data id, normally the index in the data array. */
|
||||
unsigned int nbElems; /* total number of elements inside the datatype */
|
||||
unsigned int bdt_used; /* which basic datatypes are used in the data description */
|
||||
long true_ub; /**< the true ub of the data without user defined lb and ub */
|
||||
unsigned int align; /**< data should be aligned to */
|
||||
long lb; /**< lower bound in memory */
|
||||
long ub; /**< upper bound in memory */
|
||||
unsigned short flags; /**< the flags */
|
||||
unsigned short id; /**< data id, normally the index in the data array. */
|
||||
unsigned int nbElems; /**< total number of elements inside the datatype */
|
||||
unsigned int bdt_used; /**< which basic datatypes are used in the data description */
|
||||
|
||||
/* Attribute fields */
|
||||
lam_hash_table_t *keyhash;
|
||||
char name[MPI_MAX_OBJECT_NAME];
|
||||
|
||||
dt_type_desc_t desc; /* the data description */
|
||||
dt_type_desc_t opt_desc; /* short description of the data used when conversion is useless
|
||||
dt_type_desc_t desc; /**< the data description */
|
||||
dt_type_desc_t opt_desc; /**< short description of the data used when conversion is useless
|
||||
* or in the send case (without conversion) */
|
||||
void* args; /* data description for the user */
|
||||
void* args; /**< data description for the user */
|
||||
|
||||
/* basic elements count used to compute the size of the datatype for
|
||||
* remote nodes */
|
||||
@ -174,15 +182,23 @@ int dt_create_darray( int size, int rank, int ndims, int* pGSizes, int *pDistrib
|
||||
|
||||
int dt_add( dt_desc_t* pdtBase, dt_desc_t* pdtNew, unsigned int count, long disp, long extent );
|
||||
|
||||
int dt_type_lb( dt_desc_t* pData, long* disp );
|
||||
int dt_type_ub( dt_desc_t* pData, long* disp );
|
||||
int dt_type_size ( dt_desc_t* pData, int *size );
|
||||
int dt_type_extent( dt_desc_t* pData, long* extent );
|
||||
static inline int dt_type_lb( dt_desc_t* pData, long* disp )
|
||||
{ *disp = pData->lb; return 0; };
|
||||
static inline int dt_type_ub( dt_desc_t* pData, long* disp )
|
||||
{ *disp = pData->ub; return 0; };
|
||||
static inline int dt_type_size ( dt_desc_t* pData, int *size )
|
||||
{ *size = pData->size; return 0; };
|
||||
static inline int dt_type_extent( dt_desc_t* pData, long* extent )
|
||||
{ *extent = (pData->ub - pData->lb); return 0; };
|
||||
|
||||
int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew );
|
||||
int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent);
|
||||
int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent);
|
||||
int dt_get_element_count( dt_desc_t* datatype, size_t iSize );
|
||||
static inline int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew )
|
||||
{ /* empty function */ return -1; };
|
||||
static inline int dt_get_extent( dt_desc_t* pData, long* lb, long* extent)
|
||||
{ *lb = pData->lb; *extent = pData->ub - pData->lb; return 0; };
|
||||
static inline int dt_get_true_extent( dt_desc_t* pData, long* true_lb, long* true_extent)
|
||||
{ *true_lb = pData->true_lb; *true_extent = (pData->true_ub - pData->true_lb); return 0; };
|
||||
|
||||
int dt_get_element_count( dt_desc_t* pData, size_t iSize );
|
||||
int dt_copy_content_same_dt( dt_desc_t* pData, int count, char* pDestBuf, char* pSrcBuf );
|
||||
|
||||
#define dt_increase_ref(PDT) OBJ_RETAIN( PDT )
|
||||
|
@ -1,58 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/* lam_dataype_t copy function */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/*
|
||||
* Copy (the contents of) an array of data types
|
||||
*/
|
||||
int lam_datatype_copy(void *dst,
|
||||
const void *src,
|
||||
size_t count,
|
||||
lam_datatype_t *d,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *memcpy_state)
|
||||
{
|
||||
int status;
|
||||
|
||||
status = LAM_SUCCESS;
|
||||
|
||||
if (NULL == src || NULL == dst) {
|
||||
status = LAM_ERROR;
|
||||
}
|
||||
|
||||
if (LAM_SUCCESS == status) {
|
||||
if (NULL == d) {
|
||||
(*memcpy_fn)(dst, src, count, memcpy_state);
|
||||
} else if (LAM_DATATYPE_STATE_CONTIGUOUS & d->flags) {
|
||||
(*memcpy_fn)(dst, src, count * d->extent, memcpy_state);
|
||||
} else {
|
||||
lam_datavec_t *dv = d->datavec;
|
||||
unsigned char *p = (unsigned char *) dst;
|
||||
unsigned char *q = (unsigned char *) src;
|
||||
size_t i, j;
|
||||
|
||||
while (count--) {
|
||||
for (i = 0; i < d->datavec_size; i++) {
|
||||
for (j = 0; j < dv->nrepeat; j++) {
|
||||
(*memcpy_fn)(p + dv->element[i].offset,
|
||||
q + dv->element[i].offset,
|
||||
dv->element[i].size,
|
||||
memcpy_state);
|
||||
}
|
||||
p += dv->repeat_offset;
|
||||
q += dv->repeat_offset;
|
||||
}
|
||||
p += d->extent;
|
||||
q += d->extent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
@ -1,165 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/** @file
|
||||
*
|
||||
* 32-bit cyclic redundancy check implementation
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "lam_config.h"
|
||||
#include "datatype.h"
|
||||
|
||||
#define CRC_POLYNOMIAL ((uint32_t) 0x04c11db7)
|
||||
#define CRC_INITIAL_REGISTER ((uint32_t) 0xffffffff)
|
||||
|
||||
|
||||
/*
|
||||
* Look-up table for CRC32 generation
|
||||
*/
|
||||
static bool crc_table_initialized = false;
|
||||
static uint32_t crc_table[256];
|
||||
|
||||
|
||||
/**
|
||||
* CRC32 table generation
|
||||
*
|
||||
* One time initializtion of CRC32 look-up table. Thanks to Charles
|
||||
* Michael Heard for his optimized CRC32 code.
|
||||
*/
|
||||
static void initialize_crc_table(void)
|
||||
{
|
||||
register int i, j;
|
||||
register uint32_t crc_accum;
|
||||
|
||||
for (i = 0; i < 256; i++) {
|
||||
crc_accum = (i << 24);
|
||||
for (j = 0; j < 8; j++) {
|
||||
if (crc_accum & 0x80000000) {
|
||||
crc_accum = (crc_accum << 1) ^ CRC_POLYNOMIAL;
|
||||
} else {
|
||||
crc_accum = (crc_accum << 1);
|
||||
}
|
||||
}
|
||||
crc_table[i] = crc_accum;
|
||||
}
|
||||
|
||||
crc_table_initialized = 1;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Generate a 32-bit CRC for a buffer
|
||||
*/
|
||||
uint32_t lam_crc32(const void *buffer, size_t size, uint32_t initial_crc)
|
||||
{
|
||||
register int i, j;
|
||||
register unsigned char *t;
|
||||
uint32_t tmp;
|
||||
uint32_t crc = initial_crc;
|
||||
|
||||
if (!crc_table_initialized) {
|
||||
initialize_crc_table();
|
||||
}
|
||||
|
||||
if (lam_aligned32((void *) buffer)) {
|
||||
register uint32_t *restrict src = (uint32_t *) buffer;
|
||||
while (size >= sizeof(uint32_t)) {
|
||||
tmp = *src++;
|
||||
t = (unsigned char *) &tmp;
|
||||
for (j = 0; j < (int) sizeof(uint32_t); j++) {
|
||||
i = ((crc >> 24) ^ *t++) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
size -= sizeof(uint32_t);
|
||||
}
|
||||
t = (unsigned char *) src;
|
||||
while (size--) {
|
||||
i = ((crc >> 24) ^ *t++) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
} else {
|
||||
register unsigned char *restrict src = (unsigned char *) buffer;
|
||||
while (size--) {
|
||||
i = ((crc >> 24) ^ *src++) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Copy data from one buffer to another and calculate a 32-bit CRC
|
||||
*/
|
||||
void *lam_memcpy_crc32(void *dst,
|
||||
const void *src,
|
||||
size_t size,
|
||||
lam_memcpy_state_t *state)
|
||||
{
|
||||
size_t crclenresidue = (state->size > size) ? (state->size - size) : 0;
|
||||
register int i, j;
|
||||
uint32_t tmp;
|
||||
register unsigned char t;
|
||||
uint32_t crc = state->sum;
|
||||
|
||||
if (!crc_table_initialized) {
|
||||
initialize_crc_table();
|
||||
}
|
||||
|
||||
if (state->first_call) {
|
||||
state->first_call = false;
|
||||
state->sum = CRC_INITIAL_REGISTER;
|
||||
}
|
||||
|
||||
if (lam_aligned32((void *) src) && lam_aligned32(dst)) {
|
||||
register uint32_t *restrict p = (uint32_t *) dst;
|
||||
register uint32_t *restrict q = (uint32_t *) src;
|
||||
register unsigned char *ts, *td;
|
||||
/* copy whole integers */
|
||||
while (size >= sizeof(uint32_t)) {
|
||||
tmp = *q++;
|
||||
*p++ = tmp;
|
||||
ts = (unsigned char *) &tmp;
|
||||
for (j = 0; j < (int) sizeof(uint32_t); j++) {
|
||||
i = ((crc >> 24) ^ *ts++) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
size -= sizeof(uint32_t);
|
||||
}
|
||||
ts = (unsigned char *) q;
|
||||
td = (unsigned char *) p;
|
||||
/* copy partial integer */
|
||||
while (size--) {
|
||||
t = *ts++;
|
||||
*td++ = t;
|
||||
i = ((crc >> 24) ^ t) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
/* calculate CRC over remaining bytes... */
|
||||
while (crclenresidue--) {
|
||||
i = ((crc >> 24) ^ *ts++) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
} else {
|
||||
register unsigned char *restrict q = (unsigned char *) src;
|
||||
register unsigned char *restrict p = (unsigned char *) dst;
|
||||
while (size--) {
|
||||
t = *q++;
|
||||
*p++ = t;
|
||||
i = ((crc >> 24) ^ t) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
while (crclenresidue--) {
|
||||
i = ((crc >> 24) ^ *q++) & 0xff;
|
||||
crc = (crc << 8) ^ crc_table[i];
|
||||
}
|
||||
}
|
||||
|
||||
state->sum = crc;
|
||||
|
||||
return dst;
|
||||
}
|
@ -1,343 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/** @file datatype creation function */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/**
|
||||
* Create a LAM/MPI datatype
|
||||
*
|
||||
* @param combiner integer identifying the kind of MPI create function
|
||||
* @param ninteger number of integers passed to the create function
|
||||
* @param integer array of integers passed to the create function
|
||||
* @param naddress number of addresses passed to the create function
|
||||
* @param address array of addresses passed to the create function
|
||||
* @param ntype number of data types passed to the create function
|
||||
* @param type array of data types passed to the create function
|
||||
* @param newtype pointer to address of new type
|
||||
* @return LAM_SUCCESS on successful creation, LAM_ERROR otherwise
|
||||
*
|
||||
* This is the central location for creation of data types in LAM/MPI.
|
||||
* All MPI_Type_create functions rely upon this to do the actual type
|
||||
* creation.
|
||||
*/
|
||||
int lam_datatype_create(int combiner,
|
||||
int nintegers,
|
||||
int integers[],
|
||||
int naddresses,
|
||||
ssize_t addresses[],
|
||||
int ntypes,
|
||||
lam_datatype_t *types[],
|
||||
lam_datatype_t **newtype)
|
||||
{
|
||||
#if 0
|
||||
lam_datatype_t *newtype, *t;
|
||||
lam_datatype_t **types = (lam_datatype_t **) array_of_types;
|
||||
int i, j, k;
|
||||
int mpi_lb_i, mpi_ub_i;
|
||||
int min_lb, max_ub;
|
||||
int min_disp_i, max_disp_i;
|
||||
int typemap_i;
|
||||
ssize_t lb, ub, min_disp, max_disp, typemap_off, current_offset,
|
||||
new_offset;
|
||||
size_t current_size, new_size;
|
||||
int rc;
|
||||
|
||||
if (newdatatype == NULL) {
|
||||
ulm_err(("Error: MPI_Type_struct: Invalid newtype pointer\n"));
|
||||
rc = MPI_ERR_TYPE;
|
||||
_mpi_errhandler(MPI_COMM_WORLD, rc, __FILE__, __LINE__);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (count < 0) {
|
||||
ulm_err(("Error: MPI_Type_struct: count %d is invalid\n", count));
|
||||
rc = MPI_ERR_INTERN;
|
||||
_mpi_errhandler(MPI_COMM_WORLD, rc, __FILE__, __LINE__);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (count == 0) {
|
||||
newtype = (lam_datatype_t *) malloc(sizeof(lam_datatype_t));
|
||||
if (newtype == NULL) {
|
||||
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
||||
rc = MPI_ERR_TYPE;
|
||||
_mpi_errhandler(MPI_COMM_WORLD, rc, __FILE__, __LINE__);
|
||||
return rc;
|
||||
}
|
||||
|
||||
newtype->isbasic = 0;
|
||||
newtype->layout = CONTIGUOUS;
|
||||
newtype->num_pairs = 0;
|
||||
newtype->extent = 0;
|
||||
newtype->lower_bound = 0;
|
||||
newtype->type_map = NULL;
|
||||
newtype->committed = 0;
|
||||
newtype->ref_count = 1;
|
||||
|
||||
*newdatatype = newtype;
|
||||
|
||||
/* save "envelope" information */
|
||||
newtype->envelope.combiner = MPI_COMBINER_STRUCT;
|
||||
newtype->envelope.nints = 1;
|
||||
newtype->envelope.naddrs = 0;
|
||||
newtype->envelope.ndatatypes = 0;
|
||||
newtype->envelope.iarray = (int *) malloc(sizeof(int));
|
||||
newtype->envelope.aarray = NULL;
|
||||
newtype->envelope.darray = NULL;
|
||||
newtype->envelope.iarray[0] = count;
|
||||
|
||||
if (_mpi.fortran_layer_enabled) {
|
||||
newtype->fhandle = _mpi_ptr_table_add(_mpif.type_table, newtype);
|
||||
}
|
||||
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Allocate new type */
|
||||
newtype = malloc(sizeof(lam_datatype_t));
|
||||
if (newtype == NULL) {
|
||||
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
||||
rc = MPI_ERR_TYPE;
|
||||
_mpi_errhandler(MPI_COMM_WORLD, rc, __FILE__, __LINE__);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Initialize newtype */
|
||||
newtype->isbasic = 0;
|
||||
newtype->op_index = 0;
|
||||
newtype->layout = NON_CONTIGUOUS;
|
||||
newtype->committed = 0;
|
||||
newtype->ref_count = 1;
|
||||
|
||||
/* save "envelope" information */
|
||||
newtype->envelope.combiner = MPI_COMBINER_STRUCT;
|
||||
newtype->envelope.nints = count + 1;
|
||||
newtype->envelope.naddrs = count;
|
||||
newtype->envelope.ndatatypes = count;
|
||||
newtype->envelope.iarray =
|
||||
(int *) malloc(newtype->envelope.nints * sizeof(int));
|
||||
newtype->envelope.aarray =
|
||||
(MPI_Aint *) malloc(newtype->envelope.naddrs *
|
||||
sizeof(MPI_Aint));
|
||||
newtype->envelope.darray =
|
||||
(MPI_Datatype *) malloc(newtype->envelope.ndatatypes *
|
||||
sizeof(MPI_Datatype));
|
||||
newtype->envelope.iarray[0] = count;
|
||||
for (i = 0; i < count; i++) {
|
||||
newtype->envelope.iarray[i + 1] = array_of_blocklengths[i];
|
||||
newtype->envelope.aarray[i] = array_of_displacements[i];
|
||||
newtype->envelope.darray[i] = array_of_types[i];
|
||||
t = array_of_types[i];
|
||||
fetchNadd(&(t->ref_count), 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Look for MPI_LB, MPI_UB markers, smallest/largest
|
||||
* displacements, and save off indices
|
||||
*/
|
||||
mpi_lb_i = -1;
|
||||
mpi_ub_i = -1;
|
||||
/* initialize min_lb and max_lb to quiet not-so-bright compilers */
|
||||
min_lb = 0;
|
||||
max_ub = 0;
|
||||
min_disp = array_of_displacements[0];
|
||||
max_disp = array_of_displacements[0];
|
||||
min_disp_i = 0;
|
||||
max_disp_i = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (types[i]->extent == 0) {
|
||||
if (types[i]->op_index == -1) {
|
||||
if ((mpi_lb_i == -1)
|
||||
|| (array_of_displacements[i] < min_lb)) {
|
||||
min_lb = array_of_displacements[i];
|
||||
mpi_lb_i = i;
|
||||
}
|
||||
} else if (types[i]->op_index == -2) {
|
||||
if ((mpi_lb_i == -1)
|
||||
|| (array_of_displacements[i] > max_ub)) {
|
||||
max_ub = array_of_displacements[i];
|
||||
mpi_ub_i = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (types[i]->lower_bound > 0) {
|
||||
if ((mpi_lb_i == -1) || (types[i]->lower_bound < min_lb)) {
|
||||
min_lb = types[i]->lower_bound;
|
||||
mpi_lb_i = i;
|
||||
}
|
||||
}
|
||||
if (array_of_displacements[i] < min_disp) {
|
||||
min_disp = array_of_displacements[i];
|
||||
min_disp_i = i;
|
||||
}
|
||||
if (array_of_displacements[i] > max_disp) {
|
||||
max_disp = array_of_displacements[i];
|
||||
max_disp_i = i;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* calculate the new datatype's extent, and set the
|
||||
* lower bound
|
||||
*/
|
||||
lb = 0, ub = 0;
|
||||
if (mpi_lb_i != -1) {
|
||||
lb = min_lb;
|
||||
} else {
|
||||
lb = array_of_displacements[min_disp_i];
|
||||
}
|
||||
if (mpi_ub_i != -1) {
|
||||
ub = max_ub;
|
||||
} else {
|
||||
ub = array_of_displacements[max_disp_i]
|
||||
+ array_of_blocklengths[max_disp_i]
|
||||
* types[max_disp_i]->extent;
|
||||
}
|
||||
/* extent should never be less than zero */
|
||||
if (ub < lb) {
|
||||
ub = lb;
|
||||
}
|
||||
newtype->extent = ub - lb;
|
||||
newtype->lower_bound = lb;
|
||||
|
||||
/* calculate the number of entries needed for the new type_map */
|
||||
typemap_i = 0;
|
||||
current_size = current_offset = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (types[i]->extent > 0) {
|
||||
typemap_off = array_of_displacements[i];
|
||||
for (j = 0; j < array_of_blocklengths[i]; j++) {
|
||||
for (k = 0; k < types[i]->num_pairs; k++) {
|
||||
new_size = types[i]->type_map[k].size;
|
||||
new_offset = types[i]->type_map[k].offset +
|
||||
typemap_off;
|
||||
if ((typemap_i != 0)
|
||||
&& (current_size + current_offset == new_offset)) {
|
||||
/* consolidate entries */
|
||||
current_size += new_size;
|
||||
if (_MPI_DTYPE_TRIM
|
||||
&& current_size + current_offset > ub) {
|
||||
current_size = ub - current_offset;
|
||||
}
|
||||
} else {
|
||||
if (!_MPI_DTYPE_TRIM
|
||||
|| ((new_offset + new_size > lb)
|
||||
&& (new_offset < ub))) {
|
||||
/* create new type_map entry if there is still something
|
||||
* left after possible trimming */
|
||||
if (_MPI_DTYPE_TRIM && new_offset < lb) {
|
||||
new_size -= (lb - new_offset);
|
||||
new_offset = lb;
|
||||
}
|
||||
if (_MPI_DTYPE_TRIM
|
||||
&& new_offset + new_size > ub) {
|
||||
new_size = (ub - new_offset);
|
||||
}
|
||||
if (new_size > 0) {
|
||||
current_size = new_size;
|
||||
current_offset = new_offset;
|
||||
typemap_i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
typemap_off += types[i]->extent;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* more newtype initialization */
|
||||
newtype->num_pairs = typemap_i;
|
||||
if (newtype->num_pairs > 0) {
|
||||
/* allocate the type_map */
|
||||
newtype->type_map = (ULMTypeMapElt_t *)
|
||||
malloc(newtype->num_pairs * sizeof(ULMTypeMapElt_t));
|
||||
if (newtype->type_map == NULL) {
|
||||
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
||||
rc = MPI_ERR_TYPE;
|
||||
_mpi_errhandler(MPI_COMM_WORLD, rc, __FILE__, __LINE__);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in new datatype's type_map....
|
||||
*/
|
||||
typemap_i = 0;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (types[i]->extent > 0) {
|
||||
typemap_off = array_of_displacements[i];
|
||||
for (j = 0; j < array_of_blocklengths[i]; j++) {
|
||||
for (k = 0; k < types[i]->num_pairs; k++) {
|
||||
new_size = types[i]->type_map[k].size;
|
||||
new_offset = types[i]->type_map[k].offset +
|
||||
typemap_off;
|
||||
if ((typemap_i != 0)
|
||||
&&
|
||||
((newtype->type_map[typemap_i - 1].size +
|
||||
newtype->type_map[typemap_i - 1].offset) ==
|
||||
new_offset)) {
|
||||
/* consolidate entries - trim at ub */
|
||||
newtype->type_map[typemap_i - 1].size +=
|
||||
new_size;
|
||||
if (_MPI_DTYPE_TRIM
|
||||
&& (newtype->type_map[typemap_i - 1].size +
|
||||
newtype->type_map[typemap_i -
|
||||
1].offset > ub)) {
|
||||
newtype->type_map[typemap_i - 1].size =
|
||||
ub - newtype->type_map[typemap_i -
|
||||
1].offset;
|
||||
}
|
||||
} else {
|
||||
if (!_MPI_DTYPE_TRIM
|
||||
|| ((new_offset + new_size > lb)
|
||||
&& (new_offset < ub))) {
|
||||
/* create new type_map entry if there is still something
|
||||
* left after possible trimming */
|
||||
if (_MPI_DTYPE_TRIM && new_offset < lb) {
|
||||
new_size -= (lb - new_offset);
|
||||
new_offset = lb;
|
||||
}
|
||||
if (_MPI_DTYPE_TRIM
|
||||
&& new_offset + new_size > ub) {
|
||||
new_size = (ub - new_offset);
|
||||
}
|
||||
if (new_size > 0) {
|
||||
newtype->type_map[typemap_i].size =
|
||||
new_size;
|
||||
newtype->type_map[typemap_i].offset =
|
||||
new_offset;
|
||||
typemap_i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
typemap_off += types[i]->extent;
|
||||
}
|
||||
}
|
||||
}
|
||||
} /* end if newtype->numpairs > 0 */
|
||||
else {
|
||||
newtype->type_map = NULL;
|
||||
}
|
||||
|
||||
/* mark the datatype as contiguous if it clearly is ... */
|
||||
if (_MPI_MARK_AS_CONTIGUOUS) {
|
||||
if (((newtype->num_pairs == 0) && (newtype->extent == 0))
|
||||
|| ((newtype->num_pairs == 1)
|
||||
&& (newtype->extent == newtype->type_map[0].size))) {
|
||||
newtype->layout = CONTIGUOUS;
|
||||
}
|
||||
}
|
||||
|
||||
*newdatatype = newtype;
|
||||
|
||||
if (_mpi.fortran_layer_enabled) {
|
||||
newtype->fhandle = _mpi_ptr_table_add(_mpif.type_table, newtype);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return LAM_SUCCESS;
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/* lam_datatype_t deletion function */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/*
|
||||
* Delete a LAM/MPI datatype (actually, just mark it for deletion)
|
||||
*/
|
||||
int lam_datatype_delete(lam_datatype_t *type)
|
||||
{
|
||||
return LAM_SUCCESS;
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#ifndef DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED
|
||||
#define DATATYPE_INTERNAL_H_HAS_BEEN_INCLUDED
|
||||
|
||||
|
@ -1,121 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/* lam_dataype_t pack function(s) */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/**
|
||||
* Incrementally generate an iovec referencing a datatype (or fragment
|
||||
* of a dataype).
|
||||
*
|
||||
* @param state current state of the incremental pack/unpack
|
||||
* @param vec iovec buffer
|
||||
* @param vec_count maximum length of iovec buffer
|
||||
* @param max_bytes maximum bytes addressed by iovec
|
||||
* @param typebuf array of types
|
||||
* @param ntype size of type array
|
||||
* @param type type descriptor
|
||||
* @return 0 if complete, non-zero otherwise
|
||||
*
|
||||
* Incrementally traverse an array of datatypes and generate an iovec
|
||||
* of at most length vec_count and addressing at most max_bytes. This
|
||||
* can be used to do a (partial) RDMA gather of the datatype array.
|
||||
*
|
||||
* The state (all members) should be initialized to 0 before the first
|
||||
* call.
|
||||
*/
|
||||
int lam_datatype_get_iovec(lam_pack_state_t *state,
|
||||
struct iovec *vec,
|
||||
size_t vec_count,
|
||||
size_t max_bytes,
|
||||
const void *typebuf,
|
||||
size_t ntype,
|
||||
lam_datatype_t *datatype)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**** OLD STUFF BELOW ****/
|
||||
|
||||
|
||||
/**
|
||||
* Incrementally generate an iovec for gathering from an array of
|
||||
* datatypes
|
||||
*
|
||||
* @param state current state of the incremental pack/unpack
|
||||
* @param base_addr base address for iovec offsets
|
||||
* @param vec iovec buffer
|
||||
* @param vec_count maximum length of iovec buffer
|
||||
* @param max_bytes maximum bytes addressed by iovec
|
||||
* @param buf buffer to pack into/unpack from
|
||||
* @param bufsize size of buffer
|
||||
* @param typebuf array of types
|
||||
* @param ntype size of type array
|
||||
* @param type type descriptor
|
||||
* @return 0 if complete, non-zero otherwise
|
||||
*
|
||||
* Incrementally traverse an array of datatypes and generate an iovec
|
||||
* of at most length vec_count and addressing at most max_bytes. This
|
||||
* can be used to do a (partial) RDMA gather of the datatype array.
|
||||
*
|
||||
* The state (all members) should be initialized to 0 before the first
|
||||
* call.
|
||||
*/
|
||||
int lam_datatype_gather_iovec(lam_pack_state_t *state,
|
||||
void *base_addr,
|
||||
struct iovec *vec,
|
||||
size_t vec_count,
|
||||
size_t max_bytes,
|
||||
const void *typebuf,
|
||||
size_t ntype,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *memcpy_state);
|
||||
|
||||
/**
|
||||
* Incrementally generate an iovec for scattering from a packed array
|
||||
* of datatypes
|
||||
*
|
||||
* @param state current state of the incremental pack/unpack
|
||||
* @param base_addr base address for iovec offsets
|
||||
* @param vec iovec buffer
|
||||
* @param vec_count maximum length of iovec buffer
|
||||
* @param max_bytes maximum bytes addressed by iovec
|
||||
* @param buf packed buffer
|
||||
* @param bufsize size of buffer
|
||||
* @param typebuf array of types
|
||||
* @param ntype size of type array
|
||||
* @param type type descriptor
|
||||
* @return 0 if complete, non-zero otherwise
|
||||
*
|
||||
* Incrementally copy data type arrays to/from a packed buffer. by
|
||||
* iterating over the type and type_map until we finish or run out of
|
||||
* room.
|
||||
*
|
||||
* Incrementally traverse a packed array of datatypes and generate an
|
||||
* iovec of at most length vec_count and addressing at most max_bytes.
|
||||
* This can be used to do a (partial) RDMA scatter of the datatype
|
||||
* array.
|
||||
*
|
||||
* The state (all members) should be initialized to 0 before the first
|
||||
* call.
|
||||
*/
|
||||
int lam_datatype_scatter_iovec(lam_pack_state_t *state,
|
||||
void *base_addr,
|
||||
struct iovec *vec,
|
||||
size_t vec_count,
|
||||
size_t max_bytes,
|
||||
const void *buf,
|
||||
size_t bufsize,
|
||||
lam_datatype_t *datatype,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *memcpy_state);
|
||||
|
||||
|
@ -1,43 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/* alternative memcpy function */
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "lam_config.h"
|
||||
#include "datatype.h"
|
||||
|
||||
/*
|
||||
* Alternative memcpy function: On some systems, this performs better
|
||||
* than the system memcpy.
|
||||
*/
|
||||
void *lam_memcpy_alt(void *dst, const void *src, size_t size,
|
||||
lam_memcpy_state_t *dummy)
|
||||
{
|
||||
assert(dst);
|
||||
assert(src);
|
||||
|
||||
if (lam_aligned32((void *) src) && lam_aligned32(dst)) {
|
||||
uint32_t *restrict p = (uint32_t *) dst;
|
||||
uint32_t *restrict q = (uint32_t *) src;
|
||||
uint32_t i;
|
||||
uint32_t n = size >> 2;
|
||||
for (i = 0; i < n; i++) {
|
||||
*p++ = *q++;
|
||||
}
|
||||
size -= n * sizeof(size_t);
|
||||
if (size != 0) {
|
||||
while (size--) {
|
||||
*p++ = *q++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
memcpy(dst, src, size);
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
@ -1,122 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/* lam_dataype_t pack function(s) */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/*
|
||||
* Incrementally pack or unpack an array of datatypes to/from a buffer
|
||||
*/
|
||||
int lam_datatype_packer(lam_pack_state_t *state,
|
||||
void *buf,
|
||||
size_t bufsize,
|
||||
void *typebuf,
|
||||
size_t ntype,
|
||||
lam_datatype_t *d,
|
||||
lam_memcpy_fn_t *memcpy_fn,
|
||||
lam_memcpy_state_t *check,
|
||||
int pack_direction)
|
||||
{
|
||||
if (LAM_DATATYPE_STATE_CONTIGUOUS & d->flags) {
|
||||
|
||||
unsigned char *b;
|
||||
unsigned char *t;
|
||||
size_t copied_so_far;
|
||||
size_t left_to_copy;
|
||||
size_t size;
|
||||
|
||||
b = (unsigned char *) buf + state->packed_offset;
|
||||
t = (unsigned char *) typebuf
|
||||
+ state->type_index * d->extent
|
||||
+ state->repeat_index * d->datavec->repeat_offset
|
||||
+ state->datavec_offset;
|
||||
bufsize -= state->packed_offset;
|
||||
|
||||
copied_so_far =
|
||||
state->type_index * d->extent + state->datavec_offset;
|
||||
size = bufsize;
|
||||
left_to_copy = ntype * d->extent - copied_so_far;
|
||||
if (size > left_to_copy) {
|
||||
size = left_to_copy;
|
||||
}
|
||||
if (LAM_DATATYPE_PACK == pack_direction) {
|
||||
memcpy(b, t, size);
|
||||
} else {
|
||||
memcpy(t, b, size);
|
||||
}
|
||||
copied_so_far += size;
|
||||
state->packed_offset += size;
|
||||
state->type_index = copied_so_far / d->extent;
|
||||
state->datavec_offset =
|
||||
copied_so_far - state->type_index * d->extent;
|
||||
if (copied_so_far != (ntype * d->extent)) {
|
||||
return LAM_DATATYPE_PACK_INCOMPLETE;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
||||
unsigned char *ptr;
|
||||
unsigned char *b;
|
||||
unsigned char *t;
|
||||
size_t size;
|
||||
lam_datavec_t *dv;
|
||||
|
||||
ptr = (unsigned char *) typebuf
|
||||
+ state->type_index * d->extent
|
||||
+ state->repeat_index * dv->repeat_offset;
|
||||
b = (unsigned char *) buf + state->packed_offset;
|
||||
bufsize -= state->packed_offset;
|
||||
dv = d->datavec;
|
||||
|
||||
while (state->type_index < ntype) {
|
||||
while (state->repeat_index < dv->nrepeat) {
|
||||
while (state->element_index < dv->nelement) {
|
||||
t = ptr + dv->element[state->element_index].offset;
|
||||
size = dv->element[state->element_index].size;
|
||||
if (state->datavec_offset > 0) {
|
||||
t += state->datavec_offset;
|
||||
size -= state->datavec_offset;
|
||||
if (size <= bufsize) {
|
||||
state->datavec_offset = 0;
|
||||
}
|
||||
}
|
||||
if (size > bufsize) {
|
||||
size = bufsize;
|
||||
state->datavec_offset += size;
|
||||
}
|
||||
if (LAM_DATATYPE_PACK == pack_direction) {
|
||||
memcpy(b, t, size);
|
||||
} else {
|
||||
memcpy(t, b, size);
|
||||
}
|
||||
state->packed_offset += size;
|
||||
if (state->datavec_offset > 0) {
|
||||
return LAM_DATATYPE_PACK_INCOMPLETE;
|
||||
}
|
||||
bufsize -= size;
|
||||
b += size;
|
||||
state->element_index += 1;
|
||||
if (bufsize == 0
|
||||
&& state->element_index < (size_t) dv->nelement) {
|
||||
return LAM_DATATYPE_PACK_INCOMPLETE;
|
||||
}
|
||||
}
|
||||
ptr += dv->repeat_offset;
|
||||
state->repeat_index += 1;
|
||||
}
|
||||
ptr += d->extent;
|
||||
state->type_index += 1;
|
||||
state->element_index = 0;
|
||||
if (bufsize == 0 && state->type_index < ntype) {
|
||||
return LAM_DATATYPE_PACK_INCOMPLETE;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return LAM_DATATYPE_PACK_COMPLETE;
|
||||
}
|
@ -1,413 +0,0 @@
|
||||
/*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
/** @file
|
||||
*
|
||||
* 32-bit checksum implementation
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "lam_config.h"
|
||||
#include "datatype.h"
|
||||
|
||||
|
||||
/*
|
||||
* Generate a 32-bit checksum for a buffer
|
||||
*/
|
||||
uint32_t lam_sum32(const void *buffer, size_t size)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||
*/
|
||||
void *lam_memcpy_sum32(void *dst,
|
||||
const void *src,
|
||||
size_t size, lam_memcpy_state_t *state)
|
||||
{
|
||||
uint32_t *restrict p = (uint32_t *) dst;
|
||||
uint32_t *restrict q = (uint32_t *) src;
|
||||
size_t csumlen = state->size;
|
||||
size_t i;
|
||||
ssize_t csumlenresidue;
|
||||
uint32_t csum = 0;
|
||||
uint32_t temp;
|
||||
|
||||
if (state->first_call) {
|
||||
state->first_call = false;
|
||||
state->partial_int = 0;
|
||||
state->partial_size = 0;
|
||||
}
|
||||
|
||||
csumlenresidue = (csumlen > size) ? (csumlen - size) : 0;
|
||||
temp = state->partial_int;
|
||||
|
||||
if (lam_aligned32(p) && lam_aligned32(q)) {
|
||||
if (state->partial_size) {
|
||||
/* do we have enough data to fill out the partial word? */
|
||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||
/* YES, we do... */
|
||||
memcpy(((char *) &temp + state->partial_size), q,
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
memcpy(p, ((char *) &temp + state->partial_size),
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
q = (uint32_t *) ((char *) q + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
p = (uint32_t *) ((char *) p + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
csum += (temp - state->partial_int);
|
||||
size -= sizeof(uint32_t) - state->partial_size;
|
||||
/*
|
||||
* now we have an unaligned source and an unaligned
|
||||
* destination
|
||||
*/
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
q++;
|
||||
csum += temp;
|
||||
memcpy(p, &temp, sizeof(temp));
|
||||
p++;
|
||||
}
|
||||
state->partial_size = 0;
|
||||
state->partial_int = 0;
|
||||
} else {
|
||||
/* NO, we don't... */
|
||||
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||
q = (uint32_t *) ((char *) q + size);
|
||||
p = (uint32_t *) ((char *) p + size);
|
||||
csum += (temp - state->partial_int);
|
||||
state->partial_int = temp;
|
||||
state->partial_size += size;
|
||||
size = 0;
|
||||
}
|
||||
} else { /* fast path... */
|
||||
size_t numLongs = size / sizeof(uint32_t);
|
||||
for (i = 0; i < numLongs; i++) {
|
||||
csum += *q;
|
||||
*p++ = *q++;
|
||||
}
|
||||
state->partial_int = 0;
|
||||
state->partial_size = 0;
|
||||
if (lam_aligned32((void *) size) && (csumlenresidue == 0)) {
|
||||
state->sum = csum;
|
||||
return dst;
|
||||
} else {
|
||||
size -= i * sizeof(uint32_t);
|
||||
}
|
||||
}
|
||||
} else if (lam_aligned32(q)) {
|
||||
if (state->partial_size) {
|
||||
/* do we have enough data to fill out the partial word? */
|
||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||
/* YES, we do... */
|
||||
memcpy(((char *) &temp + state->partial_size), q,
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
memcpy(p, ((char *) &temp + state->partial_size),
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
q = (uint32_t *) ((char *) q + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
p = (uint32_t *) ((char *) p + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
csum += (temp - state->partial_int);
|
||||
size -= sizeof(uint32_t) - state->partial_size;
|
||||
/*
|
||||
* now we have an unaligned source and an unknown
|
||||
* alignment for our destination
|
||||
*/
|
||||
if (lam_aligned32(p)) {
|
||||
size_t numLongs = size / sizeof(uint32_t);
|
||||
for (i = 0; i < numLongs; i++) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
q++;
|
||||
csum += temp;
|
||||
*p++ = temp;
|
||||
}
|
||||
size -= i * sizeof(uint32_t);
|
||||
} else {
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
q++;
|
||||
csum += temp;
|
||||
memcpy(p, &temp, sizeof(temp));
|
||||
p++;
|
||||
}
|
||||
}
|
||||
state->partial_int = 0;
|
||||
state->partial_size = 0;
|
||||
} else {
|
||||
/* NO, we don't... */
|
||||
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||
q = (uint32_t *) ((char *) q + size);
|
||||
p = (uint32_t *) ((char *) p + size);
|
||||
csum += (temp - state->partial_int);
|
||||
state->partial_int = temp;
|
||||
state->partial_size += size;
|
||||
size = 0;
|
||||
}
|
||||
} else {
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
temp = *q++;
|
||||
csum += temp;
|
||||
memcpy(p, &temp, sizeof(temp));
|
||||
p++;
|
||||
}
|
||||
state->partial_int = 0;
|
||||
state->partial_size = 0;
|
||||
}
|
||||
} else if (lam_aligned32(p)) {
|
||||
if (state->partial_size) {
|
||||
/* do we have enough data to fill out the partial word? */
|
||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||
/* YES, we do... */
|
||||
memcpy(((char *) &temp + state->partial_size), q,
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
memcpy(p, ((char *) &temp + state->partial_size),
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
q = (uint32_t *) ((char *) q + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
p = (uint32_t *) ((char *) p + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
csum += (temp - state->partial_int);
|
||||
size -= sizeof(uint32_t) - state->partial_size;
|
||||
/*
|
||||
* now we have a source of unknown alignment and a
|
||||
* unaligned destination
|
||||
*/
|
||||
if (lam_aligned32(q)) {
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
temp = *q++;
|
||||
csum += temp;
|
||||
memcpy(p, &temp, sizeof(temp));
|
||||
p++;
|
||||
}
|
||||
state->partial_int = 0;
|
||||
state->partial_size = 0;
|
||||
} else {
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
q++;
|
||||
csum += temp;
|
||||
memcpy(p, &temp, sizeof(temp));
|
||||
p++;
|
||||
}
|
||||
state->partial_size = 0;
|
||||
state->partial_int = 0;
|
||||
}
|
||||
} else {
|
||||
/* NO, we don't... */
|
||||
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||
q = (uint32_t *) ((char *) q + size);
|
||||
p = (uint32_t *) ((char *) p + size);
|
||||
csum += (temp - state->partial_int);
|
||||
state->partial_int = temp;
|
||||
state->partial_size += size;
|
||||
size = 0;
|
||||
}
|
||||
} else {
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
q++;
|
||||
csum += temp;
|
||||
*p++ = temp;
|
||||
}
|
||||
state->partial_size = 0;
|
||||
state->partial_int = 0;
|
||||
}
|
||||
} else {
|
||||
if (state->partial_size) {
|
||||
/* do we have enough data to fill out the partial word? */
|
||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||
/* YES, we do... */
|
||||
memcpy(((char *) &temp + state->partial_size), q,
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
memcpy(p, ((char *) &temp + state->partial_size),
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
q = (uint32_t *) ((char *) q + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
p = (uint32_t *) ((char *) p + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
csum += (temp - state->partial_int);
|
||||
size -= sizeof(uint32_t) - state->partial_size;
|
||||
/*
|
||||
* now we have an unknown alignment for our source and
|
||||
* destination
|
||||
*/
|
||||
if (lam_aligned32(q) && lam_aligned32(p)) {
|
||||
size_t numLongs = size / sizeof(uint32_t);
|
||||
for (i = 0; i < numLongs; i++) {
|
||||
csum += *q;
|
||||
*p++ = *q++;
|
||||
}
|
||||
size -= i * sizeof(uint32_t);
|
||||
} else { /* safe but slower for all other alignments */
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
q++;
|
||||
csum += temp;
|
||||
memcpy(p, &temp, sizeof(temp));
|
||||
p++;
|
||||
}
|
||||
}
|
||||
state->partial_int = 0;
|
||||
state->partial_size = 0;
|
||||
} else {
|
||||
/* NO, we don't... */
|
||||
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||
q = (uint32_t *) ((char *) q + size);
|
||||
p = (uint32_t *) ((char *) p + size);
|
||||
csum += (temp - state->partial_int);
|
||||
state->partial_int = temp;
|
||||
state->partial_size += size;
|
||||
size = 0;
|
||||
}
|
||||
} else {
|
||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
q++;
|
||||
csum += temp;
|
||||
memcpy(p, &temp, sizeof(temp));
|
||||
p++;
|
||||
}
|
||||
state->partial_size = 0;
|
||||
state->partial_int = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if size is non-zero there was a bit left, less than an
|
||||
* uint32_t's worth
|
||||
*/
|
||||
|
||||
if ((size != 0) && (csumlenresidue == 0)) {
|
||||
temp = state->partial_int;
|
||||
if (state->partial_size) {
|
||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||
/* copy all remaining bytes from q to p */
|
||||
uint32_t copytemp = 0;
|
||||
memcpy(©temp, q, size);
|
||||
memcpy(p, ©temp, size);
|
||||
/* fill out rest of partial word and add to checksum */
|
||||
memcpy(((char *) &temp + state->partial_size), q,
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
/*
|
||||
* avoid unsigned arithmetic overflow by subtracting
|
||||
* the old partial word from the new one before adding
|
||||
* to the checksum...
|
||||
*/
|
||||
csum += (temp - state->partial_int);
|
||||
size -= sizeof(uint32_t) - state->partial_size;
|
||||
q = (uint32_t *) ((char *) q + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
state->partial_size = size;
|
||||
/* reset temp, and calculate next partial word */
|
||||
temp = 0;
|
||||
if (size) {
|
||||
memcpy(&temp, q, size);
|
||||
}
|
||||
/* add it to the the checksum */
|
||||
csum += temp;
|
||||
state->partial_int = temp;
|
||||
} else {
|
||||
/* copy all remaining bytes from q to p */
|
||||
uint32_t copytemp = 0;
|
||||
memcpy(©temp, q, size);
|
||||
memcpy(p, ©temp, size);
|
||||
/* fill out rest of partial word and add to checksum */
|
||||
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||
/*
|
||||
* avoid unsigned arithmetic overflow by subtracting
|
||||
* the old partial word from the new one before adding
|
||||
* to the checksum...
|
||||
*/
|
||||
csum += temp - state->partial_int;
|
||||
state->partial_int = temp;
|
||||
state->partial_size += size;
|
||||
}
|
||||
} else { /* fast path... */
|
||||
/*
|
||||
* temp and state->partial_int are 0 if
|
||||
* state->partial_size is 0...
|
||||
*/
|
||||
memcpy(&temp, q, size);
|
||||
csum += temp;
|
||||
memcpy(p, &temp, size);
|
||||
state->partial_int = temp;
|
||||
state->partial_size = size;
|
||||
/* done...return the checksum */
|
||||
}
|
||||
} else if (csumlenresidue != 0) {
|
||||
if (size != 0) {
|
||||
temp = 0;
|
||||
memcpy(&temp, q, size);
|
||||
memcpy(p, &temp, size);
|
||||
}
|
||||
if (csumlenresidue <
|
||||
(ssize_t) (sizeof(uint32_t) - size - state->partial_size)) {
|
||||
temp = state->partial_int;
|
||||
memcpy(((char *) &temp + state->partial_size), q,
|
||||
(size + csumlenresidue));
|
||||
/*
|
||||
* avoid unsigned arithmetic overflow by subtracting the
|
||||
* old partial word from the new one before adding to the
|
||||
* checksum...
|
||||
*/
|
||||
csum += temp - state->partial_int;
|
||||
q++;
|
||||
state->partial_int = temp;
|
||||
state->partial_size += size + csumlenresidue;
|
||||
csumlenresidue = 0;
|
||||
} else {
|
||||
/*
|
||||
* we have enough chksum data to fill out our last partial
|
||||
* word
|
||||
*/
|
||||
temp = state->partial_int;
|
||||
memcpy(((char *) &temp + state->partial_size), q,
|
||||
(sizeof(uint32_t) - state->partial_size));
|
||||
/*
|
||||
* avoid unsigned arithmetic overflow by subtracting the
|
||||
* old partial word from the new one before adding to the
|
||||
* checksum...
|
||||
*/
|
||||
csum += temp - state->partial_int;
|
||||
q = (uint32_t *) ((char *) q + sizeof(uint32_t) -
|
||||
state->partial_size);
|
||||
csumlenresidue -=
|
||||
sizeof(uint32_t) - state->partial_size - size;
|
||||
state->partial_size = 0;
|
||||
state->partial_int = 0;
|
||||
}
|
||||
if (lam_aligned32(q)) {
|
||||
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
|
||||
csum += *q++;
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
|
||||
memcpy(&temp, q, sizeof(temp));
|
||||
csum += temp;
|
||||
q++;
|
||||
}
|
||||
}
|
||||
csumlenresidue -= i * sizeof(uint32_t);
|
||||
if (csumlenresidue) {
|
||||
temp = 0;
|
||||
memcpy(&temp, q, csumlenresidue);
|
||||
csum += temp;
|
||||
state->partial_int = temp;
|
||||
state->partial_size = csumlenresidue;
|
||||
}
|
||||
}
|
||||
/* end else if (csumlenresidue != 0) */
|
||||
|
||||
state->sum = csum;
|
||||
|
||||
return dst;
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/* When we add a datatype we should update it's definition depending on
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "limits.h"
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_create_subarray( int ndims, int* pSizes, int* pSubSizes, int* pStarts,
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_duplicate( dt_desc_t* oldType, dt_desc_t** newType )
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/* We try to merge together data that are contiguous */
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_create_struct( size_t count, size_t* pBlockLength, long* pDisp,
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/* Open questions ...
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
|
||||
/* This function should never be called directly. It's called by the dt_decrease_ref
|
||||
|
@ -1,3 +1,5 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
||||
|
@ -1,48 +0,0 @@
|
||||
#include "datatype.h"
|
||||
|
||||
int dt_type_ub( dt_desc_t* pData, long* disp )
|
||||
{
|
||||
*disp = pData->ub;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_lb( dt_desc_t* pData, long* disp )
|
||||
{
|
||||
*disp = pData->lb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_extent( dt_desc_t* pData, long* extent )
|
||||
{
|
||||
*extent = pData->ub - pData->lb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_size ( dt_desc_t* pData, int *size )
|
||||
{
|
||||
*size = pData->size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_type_resize( dt_desc_t* pOld, long lb, long extent, dt_desc_t** pNew )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_get_extent( dt_desc_t* datatype, long* lb, long* extent)
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)datatype;
|
||||
|
||||
*lb = pData->lb;
|
||||
*extent = pData->ub - pData->lb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dt_get_true_extent( dt_desc_t* datatype, long* true_lb, long* true_extent)
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)datatype;
|
||||
|
||||
*true_lb = pData->true_lb;
|
||||
*true_extent = pData->true_ub - pData->true_lb;
|
||||
return 0;
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* -*- Mode: C; c-basic-offset:3 ; -*- */
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
|
||||
#include "datatype.h"
|
||||
#include "datatype_internal.h"
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user