More datatype stuff, as well as reorganization to avoid filename
clashes. I think this compiles OK -- but my automake may be broken. This commit was SVN r392.
Этот коммит содержится в:
родитель
b04786ab84
Коммит
92b97053c7
@ -13,11 +13,13 @@ headers = \
|
|||||||
datatype.h
|
datatype.h
|
||||||
|
|
||||||
libmpi_datatype_la_SOURCES = \
|
libmpi_datatype_la_SOURCES = \
|
||||||
$(headers)
|
$(headers) \
|
||||||
copy.c \
|
datatype_copy.c \
|
||||||
create.c \
|
datatype_crc32.c \
|
||||||
delete.c \
|
datatype_create.c \
|
||||||
datatype.c
|
datatype_delete.c \
|
||||||
|
datatype_memcpy.c \
|
||||||
|
datatype_sum32.c
|
||||||
|
|
||||||
# Conditionally install the header files
|
# Conditionally install the header files
|
||||||
|
|
||||||
|
@ -17,10 +17,15 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/uio.h>
|
#include <sys/uio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "lam_config.h"
|
||||||
|
#include "lam/constants.h"
|
||||||
#include "lam/lfc/object.h"
|
#include "lam/lfc/object.h"
|
||||||
#include "lam/types.h"
|
#include "lam/types.h"
|
||||||
|
|
||||||
|
#include "mpi.h"
|
||||||
|
|
||||||
/* typedefs ***********************************************************/
|
/* typedefs ***********************************************************/
|
||||||
|
|
||||||
typedef enum lam_checksum_kind_t lam_checksum_kind_t;
|
typedef enum lam_checksum_kind_t lam_checksum_kind_t;
|
||||||
@ -92,16 +97,20 @@ enum lam_checksum_kind_t {
|
|||||||
/* structs ************************************************************/
|
/* structs ************************************************************/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstraction of checksum for data
|
* State of incremental memcpy with checksum or CRC
|
||||||
*/
|
*/
|
||||||
struct lam_checksum_t {
|
typedef struct lam_memcpy_state_t {
|
||||||
lam_checksum_kind_t kind;
|
size_t size; /**< total size in bytes of the object
|
||||||
union {
|
* being checksummed / CRCed */
|
||||||
uint64_t sum64;
|
size_t partial_size; /**< size of non- uint32_t to be carried
|
||||||
uint32_t sum32;
|
* over to next call */
|
||||||
uint32_t crc32;
|
uint32_t partial_int; /**< value of non- uint32_t to be carried
|
||||||
} sum;
|
* over to next call */
|
||||||
};
|
uint32_t sum; /**< current value of the CRC or
|
||||||
|
* checksum */
|
||||||
|
bool first_call; /**< is this the first call for this
|
||||||
|
* checksum/CRC? */
|
||||||
|
} lam_memcpy_state_t;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -208,6 +217,7 @@ int lam_datatype_checksum(const void *addr,
|
|||||||
int lam_datatype_copy(void *dst,
|
int lam_datatype_copy(void *dst,
|
||||||
const void *src,
|
const void *src,
|
||||||
size_t count,
|
size_t count,
|
||||||
|
lam_datatype_t *datatype,
|
||||||
lam_memcpy_fn_t *memcpy_fn,
|
lam_memcpy_fn_t *memcpy_fn,
|
||||||
void *csum);
|
void *csum);
|
||||||
|
|
||||||
@ -380,21 +390,20 @@ int lam_datatype_scatter_iovec(lam_pack_state_t *state,
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* checksum functions
|
* incremental memcpy with checksum / CRC functions
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy data from one buffer to another and calculate a 32-bit checksum
|
* initialize the state for an incremental memcpy with checksum / CRC
|
||||||
*
|
*
|
||||||
* @param dst pointer to the dstination buffer
|
* @param state pointer to state object for the current sequence of copies
|
||||||
* @param src pointer to the source buffer
|
* @param sum_size the length of the entire buffer to be checksummed
|
||||||
* @param size size of the buffer
|
|
||||||
* @param csum32 pointer to a 32-bit unsigned integer to hold the checksum
|
|
||||||
* @return the original value of dst
|
|
||||||
*/
|
*/
|
||||||
static inline void *lam_memcpy(void *dst, const void *src, size_t size, void *dummy)
|
static inline void lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size)
|
||||||
{
|
{
|
||||||
return memcpy(dst, src, size);
|
state->size = sum_size;
|
||||||
|
state->first_call = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -404,21 +413,21 @@ static inline void *lam_memcpy(void *dst, const void *src, size_t size, void *du
|
|||||||
* @param dst pointer to the destination buffer
|
* @param dst pointer to the destination buffer
|
||||||
* @param src pointer to the source buffer
|
* @param src pointer to the source buffer
|
||||||
* @param size size of the buffer
|
* @param size size of the buffer
|
||||||
* @param csum32 pointer to a 32-bit unsigned integer to hold the checksum
|
* @param state pointer to a memcpy with checksum/CRC state structure (ignored)
|
||||||
* @return the original value of dst
|
* @return the original value of dst
|
||||||
*/
|
*/
|
||||||
void *lam_memcpy_csum32(void *dst, const void *src, size_t size, void *csum32);
|
static inline void *lam_memcpy(void *dst, const void *src, size_t size,
|
||||||
|
lam_memcpy_state_t *state)
|
||||||
|
{
|
||||||
|
return memcpy(dst, src, size);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc);
|
||||||
* Copy data from one buffer to another and calculate a 64-bit checksum
|
uint32_t lam_sum32(const void *restrict buffer, size_t size, uint32_t initial_crc);
|
||||||
*
|
void *lam_memcpy_sum32(void *dst, const void *src, size_t size,
|
||||||
* @param dst pointer to the destination buffer
|
lam_memcpy_state_t *state);
|
||||||
* @param src pointer to the source buffer
|
void *lam_memcpy_crc32(void *dst, const void *src, size_t size,
|
||||||
* @param size size of the buffer
|
lam_memcpy_state_t *state);
|
||||||
* @param csum64 pointer to a 64-bit unsigned integer to hold the checksum
|
|
||||||
* @return the original value of dst
|
|
||||||
*/
|
|
||||||
void *lam_memcpy_csum64(void *dst, const void *src, size_t size, void *csum64);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy data from one buffer to another and calculate a 32-bit checksum
|
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||||
@ -426,27 +435,20 @@ void *lam_memcpy_csum64(void *dst, const void *src, size_t size, void *csum64);
|
|||||||
* @param dst pointer to the destination buffer
|
* @param dst pointer to the destination buffer
|
||||||
* @param src pointer to the source buffer
|
* @param src pointer to the source buffer
|
||||||
* @param size size of the buffer
|
* @param size size of the buffer
|
||||||
* @param crc32 pointer to a 32-bit unsigned integer to hold the CRC
|
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||||
* @return the original value of dst
|
* @return the original value of dst
|
||||||
*/
|
*/
|
||||||
void *lam_memcpy_crc32(void *dst, const void *src, size_t size, void *crc32);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy data from one buffer to another and calculate a 64-bit checksum
|
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||||
*
|
*
|
||||||
* @param dst pointer to the destination buffer
|
* @param dst pointer to the destination buffer
|
||||||
* @param src pointer to the source buffer
|
* @param src pointer to the source buffer
|
||||||
* @param size size of the buffer
|
* @param size size of the buffer
|
||||||
* @param crc64 pointer to a 64-bit unsigned integer to hold the CRC
|
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||||
* @return the original value of dst
|
* @return the original value of dst
|
||||||
*/
|
*/
|
||||||
void *lam_memcpy_crc64(void *dst, const void *src, size_t size, void *crc64);
|
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
typedef void (ulm_scatterv_t) (void *, int *, int *, ULMType_t *, void *,
|
|
||||||
int, ULMType_t *, int, int);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* LAM_DATATYPE_H_INCLUDED */
|
#endif /* LAM_DATATYPE_H_INCLUDED */
|
||||||
|
@ -4,7 +4,8 @@
|
|||||||
|
|
||||||
/** @file dataype copy function */
|
/** @file dataype copy function */
|
||||||
|
|
||||||
#include "lam_config.h"
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
int lam_datatype_copy(void *dst,
|
int lam_datatype_copy(void *dst,
|
||||||
@ -24,20 +25,20 @@ int lam_datatype_copy(void *dst,
|
|||||||
|
|
||||||
if (LAM_SUCCESS == status) {
|
if (LAM_SUCCESS == status) {
|
||||||
if (NULL == d) {
|
if (NULL == d) {
|
||||||
memcpy_fn(dst, src, count, csum);
|
(*memcpy_fn)(dst, src, count, csum);
|
||||||
} else if (LAM_DATATYPE_STATE_CONTIGUOUS & d->d_flags) {
|
} else if (LAM_DATATYPE_STATE_CONTIGUOUS & d->d_flags) {
|
||||||
memcpy_fn(dst, src, count * d->d_extent, csum);
|
(*memcpy_fn)(dst, src, count * d->d_extent, csum);
|
||||||
} else {
|
} else {
|
||||||
lam_datavec_t dv = d->d_datavec;
|
lam_datavec_t *dv = d->d_datavec;
|
||||||
size_t datavec_size = d->d_datavec;
|
size_t datavec_size = d->d_datavec_size;
|
||||||
unsigned char *p = ((unsigned char *) dst);
|
unsigned char *p = ((unsigned char *) dst);
|
||||||
unsigned char *q = ((unsigned char *) src);
|
unsigned char *q = ((unsigned char *) src);
|
||||||
size_t i;
|
size_t i, j;
|
||||||
|
|
||||||
while (count--) {
|
while (count--) {
|
||||||
for (i = 0; i < d->d_datavec_size; i++) {
|
for (i = 0; i < d->d_datavec_size; i++) {
|
||||||
for (j = 0; j < dv->dv_nrepeat; i++) {
|
for (j = 0; j < dv->dv_nrepeat; i++) {
|
||||||
memcpy_fn(p + dv->dv_element[i].dve_offset,
|
(*memcpy_fn)(p + dv->dv_element[i].dve_offset,
|
||||||
q + dv->dv_element[i].dve_offset,
|
q + dv->dv_element[i].dve_offset,
|
||||||
dv->dv_element[i].dve_size,
|
dv->dv_element[i].dve_size,
|
||||||
csum);
|
csum);
|
182
src/mpi/datatype/datatype_crc32.c
Обычный файл
182
src/mpi/datatype/datatype_crc32.c
Обычный файл
@ -0,0 +1,182 @@
|
|||||||
|
/*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** @file 32-bit cyclic redundancy check support */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "lam_config.h"
|
||||||
|
#include "lam/stdint.h"
|
||||||
|
#include "datatype.h"
|
||||||
|
|
||||||
|
#define CRC_POLYNOMIAL ((uint32_t) 0x04c11db7)
|
||||||
|
#define CRC_INITIAL_REGISTER ((uint32_t) 0xffffffff)
|
||||||
|
#define IS_32BIT_ALIGNED(X) \
|
||||||
|
((uint32_t)(X) & (uint32_t) 3 == (uint32_t) 0 ? 1 : 0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Look-up table for CRC32 generation
|
||||||
|
*/
|
||||||
|
static bool crc_table_initialized = false;
|
||||||
|
static uint32_t crc_table[256];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CRC32 table generation
|
||||||
|
*
|
||||||
|
* One time initializtion of CRC32 look-up table. Thanks to Charles
|
||||||
|
* Michael Heard for his optimized CRC32 code.
|
||||||
|
*/
|
||||||
|
static void initialize_crc_table(void)
|
||||||
|
{
|
||||||
|
register int i, j;
|
||||||
|
register uint32_t crc_accum;
|
||||||
|
|
||||||
|
for (i = 0; i < 256; i++) {
|
||||||
|
crc_accum = (i << 24);
|
||||||
|
for (j = 0; j < 8; j++) {
|
||||||
|
if (crc_accum & 0x80000000) {
|
||||||
|
crc_accum = (crc_accum << 1) ^ CRC_POLYNOMIAL;
|
||||||
|
} else {
|
||||||
|
crc_accum = (crc_accum << 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
crc_table[i] = crc_accum;
|
||||||
|
}
|
||||||
|
|
||||||
|
crc_table_initialized = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a 32-bit CRC for a buffer
|
||||||
|
*
|
||||||
|
* @param buffer Data buffer
|
||||||
|
* @param size Size of buffer
|
||||||
|
* @param initial_crc Initial value of the CRC register
|
||||||
|
* @return The CRC
|
||||||
|
*
|
||||||
|
* Generate a 32-bit for a data buffer starting from a given CRC
|
||||||
|
* value.
|
||||||
|
*/
|
||||||
|
uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc)
|
||||||
|
{
|
||||||
|
register int i, j;
|
||||||
|
register unsigned char *t;
|
||||||
|
uint32_t tmp;
|
||||||
|
uint32_t crc = initial_crc;
|
||||||
|
|
||||||
|
if (!crc_table_initialized) {
|
||||||
|
initialize_crc_table();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_32BIT_ALIGNED(buffer)) {
|
||||||
|
register uint32_t *restrict src = (uint32_t *) buffer;
|
||||||
|
while (size >= sizeof(uint32_t)) {
|
||||||
|
tmp = *src++;
|
||||||
|
t = (unsigned char *) &tmp;
|
||||||
|
for (j = 0; j < (int) sizeof(uint32_t); j++) {
|
||||||
|
i = ((crc >> 24) ^ *t++) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
size -= sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
t = (unsigned char *) src;
|
||||||
|
while (size--) {
|
||||||
|
i = ((crc >> 24) ^ *t++) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
register unsigned char *restrict src = (unsigned char *) buffer;
|
||||||
|
while (size--) {
|
||||||
|
i = ((crc >> 24) ^ *src++) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy data from one buffer to another and calculate a 32-bit CRC
|
||||||
|
*
|
||||||
|
* @param dst pointer to the destination buffer
|
||||||
|
* @param src pointer to the source buffer
|
||||||
|
* @param size size of the buffer
|
||||||
|
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||||
|
* @return the original value of dst
|
||||||
|
*
|
||||||
|
* This handles cumulative CRCs for for arbitrary lengths and address
|
||||||
|
* alignments as best as it can. The initial contents of state->sum is
|
||||||
|
* used as the starting value of the CRC. The final CRC is placed
|
||||||
|
* back in state->sum.
|
||||||
|
*/
|
||||||
|
void *lam_memcpy_crc32(void *restrict dst,
|
||||||
|
const void *restrict src,
|
||||||
|
size_t size,
|
||||||
|
lam_memcpy_state_t *state)
|
||||||
|
{
|
||||||
|
size_t crclenresidue = (state->size > size) ? (state->size - size) : 0;
|
||||||
|
register int i, j;
|
||||||
|
uint32_t tmp;
|
||||||
|
register unsigned char t;
|
||||||
|
uint32_t crc = state->sum;
|
||||||
|
|
||||||
|
if (!crc_table_initialized) {
|
||||||
|
initialize_crc_table();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state->first_call) {
|
||||||
|
state->first_call = false;
|
||||||
|
state->sum = CRC_INITIAL_REGISTER;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_32BIT_ALIGNED(src) && IS_32BIT_ALIGNED(dst)) {
|
||||||
|
register uint32_t *restrict p = (uint32_t *) dst;
|
||||||
|
register uint32_t *restrict q = (uint32_t *) src;
|
||||||
|
register unsigned char *ts, *td;
|
||||||
|
/* copy whole integers */
|
||||||
|
while (size >= sizeof(uint32_t)) {
|
||||||
|
tmp = *q++;
|
||||||
|
*p++ = tmp;
|
||||||
|
ts = (unsigned char *) &tmp;
|
||||||
|
for (j = 0; j < (int) sizeof(uint32_t); j++) {
|
||||||
|
i = ((crc >> 24) ^ *ts++) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
size -= sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
ts = (unsigned char *) q;
|
||||||
|
td = (unsigned char *) p;
|
||||||
|
/* copy partial integer */
|
||||||
|
while (size--) {
|
||||||
|
t = *ts++;
|
||||||
|
*td++ = t;
|
||||||
|
i = ((crc >> 24) ^ t) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
/* calculate CRC over remaining bytes... */
|
||||||
|
while (crclenresidue--) {
|
||||||
|
i = ((crc >> 24) ^ *ts++) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
register unsigned char *restrict q = (unsigned char *) src;
|
||||||
|
register unsigned char *restrict p = (unsigned char *) dst;
|
||||||
|
while (size--) {
|
||||||
|
t = *q++;
|
||||||
|
*p++ = t;
|
||||||
|
i = ((crc >> 24) ^ t) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
while (crclenresidue--) {
|
||||||
|
i = ((crc >> 24) ^ *q++) & 0xff;
|
||||||
|
crc = (crc << 8) ^ crc_table[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state->sum = crc;
|
||||||
|
|
||||||
|
return dst;
|
||||||
|
}
|
@ -4,9 +4,6 @@
|
|||||||
|
|
||||||
/** @file datatype creation function */
|
/** @file datatype creation function */
|
||||||
|
|
||||||
#include "lam_config.h"
|
|
||||||
#include "lam/constants.h"
|
|
||||||
#include "mpi.h"
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
/**
|
/**
|
@ -4,9 +4,6 @@
|
|||||||
|
|
||||||
/** @file datatype deletion function */
|
/** @file datatype deletion function */
|
||||||
|
|
||||||
#include "lam_config.h"
|
|
||||||
#include "lam/constants.h"
|
|
||||||
#include "mpi.h"
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
/**
|
/**
|
46
src/mpi/datatype/datatype_memcpy.c
Обычный файл
46
src/mpi/datatype/datatype_memcpy.c
Обычный файл
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** @file alternative memcpy function */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "lam_config.h"
|
||||||
|
#include "lam/stdint.h"
|
||||||
|
|
||||||
|
#define ALIGNED32(X) ((uint32_t)(X) & (uint32_t) 3 == (uint32_t) 0 ? 1 : 0)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alternative memcpy function
|
||||||
|
*
|
||||||
|
* @param dst destination buffer
|
||||||
|
* @param src source buffer
|
||||||
|
* @param size size of buffer
|
||||||
|
* @param dummy unused variable
|
||||||
|
* @return the original value of dst
|
||||||
|
*
|
||||||
|
* On some systems, this performs better than the system memcpy.
|
||||||
|
*/
|
||||||
|
void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy)
|
||||||
|
{
|
||||||
|
if (ALIGNED32(src) && ALIGNED32(dst)) {
|
||||||
|
uint32_t *restrict p = (uint32_t *) dst;
|
||||||
|
uint32_t *restrict q = (uint32_t *) src;
|
||||||
|
uint32_t i;
|
||||||
|
uint32_t n = size >> 2;
|
||||||
|
for (i = 0; i < n; i++) {
|
||||||
|
*p++ = *q++;
|
||||||
|
}
|
||||||
|
size -= n * sizeof(size_t);
|
||||||
|
if (size != 0) {
|
||||||
|
while (size--) {
|
||||||
|
*p++ = *q++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
memcpy(dst, src, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
return dst;
|
||||||
|
}
|
367
src/mpi/datatype/datatype_sum32.c
Обычный файл
367
src/mpi/datatype/datatype_sum32.c
Обычный файл
@ -0,0 +1,367 @@
|
|||||||
|
/*
|
||||||
|
* $HEADER$
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** @file 32-bit checksum support */
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "lam_config.h"
|
||||||
|
#include "lam/stdint.h"
|
||||||
|
#include "datatype.h"
|
||||||
|
|
||||||
|
#define IS_32BIT_ALIGNED(X) \
|
||||||
|
((uint32_t)(X) & (uint32_t) 3 == (uint32_t) 0 ? 1 : 0)
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||||
|
*
|
||||||
|
* @param dst pointer to the destination buffer
|
||||||
|
* @param src pointer to the source buffer
|
||||||
|
* @param size size of the buffer
|
||||||
|
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||||
|
* @return the original value of dst
|
||||||
|
*
|
||||||
|
* This handles cumulative checksumming for arbitrary lengths and
|
||||||
|
* address alignments as best as it can; the contents of
|
||||||
|
* lastPartialLong and lastPartialLength are updated to reflected the
|
||||||
|
* last partial word's value and length (in bytes) -- this should
|
||||||
|
* allow proper handling of checksumming contiguous or noncontiguous
|
||||||
|
* buffers via multiple calls of bcopy_csum() - Mitch
|
||||||
|
*/
|
||||||
|
void *lam_memcpy_sum32(void *restrict dst,
|
||||||
|
const void *restrict src,
|
||||||
|
size_t size,
|
||||||
|
lam_memcpy_state_t *state)
|
||||||
|
{
|
||||||
|
uint32_t *restrict p = (uint32_t *) dst;
|
||||||
|
uint32_t *restrict q = (uint32_t *) src;
|
||||||
|
size_t csumlen = state->size;
|
||||||
|
size_t i;
|
||||||
|
ssize_t csumlenresidue;
|
||||||
|
uint32_t csum = 0;
|
||||||
|
uint32_t temp;
|
||||||
|
|
||||||
|
if (state->first_call) {
|
||||||
|
state->first_call = false;
|
||||||
|
state->partial_int = 0;
|
||||||
|
state->partial_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
csumlenresidue = (csumlen > size) ? (csumlen - size) : 0;
|
||||||
|
temp = state->partial_int;
|
||||||
|
|
||||||
|
if (intaligned(p) && intaligned(q)) {
|
||||||
|
if (state->partial_size) {
|
||||||
|
// do we have enough data to fill out the partial word?
|
||||||
|
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q,
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size),
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
|
||||||
|
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
size -= sizeof(uint32_t) - state->partial_size;
|
||||||
|
// now we have an unaligned source and an unaligned destination
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
q++;
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, sizeof(temp));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
state->partial_size = 0;
|
||||||
|
state->partial_int = 0;
|
||||||
|
} else { // NO, we don't...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||||
|
q = (uint32_t *) ((char *) q + size);
|
||||||
|
p = (uint32_t *) ((char *) p + size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size += size;
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
} else { // fast path...
|
||||||
|
size_t numLongs = size / sizeof(uint32_t);
|
||||||
|
for (i = 0; i < numLongs; i++) {
|
||||||
|
csum += *q;
|
||||||
|
*p++ = *q++;
|
||||||
|
}
|
||||||
|
state->partial_int = 0;
|
||||||
|
state->partial_size = 0;
|
||||||
|
if (intaligned(size) && (csumlenresidue == 0)) {
|
||||||
|
state->sum = csum;
|
||||||
|
return dst;
|
||||||
|
} else {
|
||||||
|
size -= i * sizeof(uint32_t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (intaligned(q)) {
|
||||||
|
if (state->partial_size) {
|
||||||
|
// do we have enough data to fill out the partial word?
|
||||||
|
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q,
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size),
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
|
||||||
|
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
size -= sizeof(uint32_t) - state->partial_size;
|
||||||
|
// now we have an unaligned source and an unknown alignment for our destination
|
||||||
|
if (intaligned(p)) {
|
||||||
|
size_t numLongs = size / sizeof(uint32_t);
|
||||||
|
for (i = 0; i < numLongs; i++) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
q++;
|
||||||
|
csum += temp;
|
||||||
|
*p++ = temp;
|
||||||
|
}
|
||||||
|
size -= i * sizeof(uint32_t);
|
||||||
|
} else {
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
q++;
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, sizeof(temp));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state->partial_int = 0;
|
||||||
|
state->partial_size = 0;
|
||||||
|
} else { // NO, we don't...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||||
|
q = (uint32_t *) ((char *) q + size);
|
||||||
|
p = (uint32_t *) ((char *) p + size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size += size;
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
temp = *q++;
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, sizeof(temp));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
state->partial_int = 0;
|
||||||
|
state->partial_size = 0;
|
||||||
|
}
|
||||||
|
} else if (intaligned(p)) {
|
||||||
|
if (state->partial_size) {
|
||||||
|
// do we have enough data to fill out the partial word?
|
||||||
|
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q,
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size),
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
|
||||||
|
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
size -= sizeof(uint32_t) - state->partial_size;
|
||||||
|
// now we have a source of unknown alignment and a unaligned destination
|
||||||
|
if (intaligned(q)) {
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
temp = *q++;
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, sizeof(temp));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
state->partial_int = 0;
|
||||||
|
state->partial_size = 0;
|
||||||
|
} else {
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
q++;
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, sizeof(temp));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
state->partial_size = 0;
|
||||||
|
state->partial_int = 0;
|
||||||
|
}
|
||||||
|
} else { // NO, we don't...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||||
|
q = (uint32_t *) ((char *) q + size);
|
||||||
|
p = (uint32_t *) ((char *) p + size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size += size;
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
q++;
|
||||||
|
csum += temp;
|
||||||
|
*p++ = temp;
|
||||||
|
}
|
||||||
|
state->partial_size = 0;
|
||||||
|
state->partial_int = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (state->partial_size) {
|
||||||
|
// do we have enough data to fill out the partial word?
|
||||||
|
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q,
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size),
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
|
||||||
|
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
size -= sizeof(uint32_t) - state->partial_size;
|
||||||
|
// now we have an unknown alignment for our source and destination
|
||||||
|
if (intaligned(q) && intaligned(p)) {
|
||||||
|
size_t numLongs = size / sizeof(uint32_t);
|
||||||
|
for (i = 0; i < numLongs; i++) {
|
||||||
|
csum += *q;
|
||||||
|
*p++ = *q++;
|
||||||
|
}
|
||||||
|
size -= i * sizeof(uint32_t);
|
||||||
|
} else { // safe but slower for all other alignments
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
q++;
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, sizeof(temp));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state->partial_int = 0;
|
||||||
|
state->partial_size = 0;
|
||||||
|
} else { // NO, we don't...
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||||
|
memcpy(p, ((char *) &temp + state->partial_size), size);
|
||||||
|
q = (uint32_t *) ((char *) q + size);
|
||||||
|
p = (uint32_t *) ((char *) p + size);
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size += size;
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
q++;
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, sizeof(temp));
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
state->partial_size = 0;
|
||||||
|
state->partial_int = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if size is non-zero there was a bit left, less than an uint32_t's worth */
|
||||||
|
|
||||||
|
if ((size != 0) && (csumlenresidue == 0)) {
|
||||||
|
temp = state->partial_int;
|
||||||
|
if (state->partial_size) {
|
||||||
|
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||||
|
// copy all remaining bytes from q to p
|
||||||
|
uint32_t copytemp = 0;
|
||||||
|
memcpy(©temp, q, size);
|
||||||
|
memcpy(p, ©temp, size);
|
||||||
|
// fill out rest of partial word and add to checksum
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q,
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
// avoid unsigned arithmetic overflow by subtracting the old partial
|
||||||
|
// word from the new one before adding to the checksum...
|
||||||
|
csum += (temp - state->partial_int);
|
||||||
|
size -= sizeof(uint32_t) - state->partial_size;
|
||||||
|
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
|
||||||
|
state->partial_size = size;
|
||||||
|
// reset temp, and calculate next partial word
|
||||||
|
temp = 0;
|
||||||
|
if (size) {
|
||||||
|
memcpy(&temp, q, size);
|
||||||
|
}
|
||||||
|
// add it to the the checksum
|
||||||
|
csum += temp;
|
||||||
|
state->partial_int = temp;
|
||||||
|
} else {
|
||||||
|
// copy all remaining bytes from q to p
|
||||||
|
uint32_t copytemp = 0;
|
||||||
|
memcpy(©temp, q, size);
|
||||||
|
memcpy(p, ©temp, size);
|
||||||
|
// fill out rest of partial word and add to checksum
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q, size);
|
||||||
|
// avoid unsigned arithmetic overflow by subtracting the old partial
|
||||||
|
// word from the new one before adding to the checksum...
|
||||||
|
csum += temp - state->partial_int;
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size += size;
|
||||||
|
}
|
||||||
|
} else { // fast path...
|
||||||
|
// temp and state->partial_int are 0 if state->partial_size is 0...
|
||||||
|
memcpy(&temp, q, size);
|
||||||
|
csum += temp;
|
||||||
|
memcpy(p, &temp, size);
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size = size;
|
||||||
|
// done...return the checksum
|
||||||
|
}
|
||||||
|
} else if (csumlenresidue != 0) {
|
||||||
|
if (size != 0) {
|
||||||
|
temp = 0;
|
||||||
|
memcpy(&temp, q, size);
|
||||||
|
memcpy(p, &temp, size);
|
||||||
|
}
|
||||||
|
if (csumlenresidue < (ssize_t) (sizeof(uint32_t) - size - state->partial_size)) {
|
||||||
|
temp = state->partial_int;
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q, (size + csumlenresidue));
|
||||||
|
// avoid unsigned arithmetic overflow by subtracting the old partial
|
||||||
|
// word from the new one before adding to the checksum...
|
||||||
|
csum += temp - state->partial_int;
|
||||||
|
q++;
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size += size + csumlenresidue;
|
||||||
|
csumlenresidue = 0;
|
||||||
|
} else {
|
||||||
|
// we have enough chksum data to fill out our last partial
|
||||||
|
// word
|
||||||
|
temp = state->partial_int;
|
||||||
|
memcpy(((char *) &temp + state->partial_size), q,
|
||||||
|
(sizeof(uint32_t) - state->partial_size));
|
||||||
|
// avoid unsigned arithmetic overflow by subtracting the old partial
|
||||||
|
// word from the new one before adding to the checksum...
|
||||||
|
csum += temp - state->partial_int;
|
||||||
|
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
|
||||||
|
csumlenresidue -= sizeof(uint32_t) - state->partial_size - size;
|
||||||
|
state->partial_size = 0;
|
||||||
|
state->partial_int = 0;
|
||||||
|
}
|
||||||
|
if (intaligned(q)) {
|
||||||
|
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
|
||||||
|
csum += *q++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
|
||||||
|
memcpy(&temp, q, sizeof(temp));
|
||||||
|
csum += temp;
|
||||||
|
q++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
csumlenresidue -= i * sizeof(uint32_t);
|
||||||
|
if (csumlenresidue) {
|
||||||
|
temp = 0;
|
||||||
|
memcpy(&temp, q, csumlenresidue);
|
||||||
|
csum += temp;
|
||||||
|
state->partial_int = temp;
|
||||||
|
state->partial_size = csumlenresidue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* end else if (csumlenresidue != 0) */
|
||||||
|
|
||||||
|
state->sum = csum;
|
||||||
|
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
Загрузка…
Ссылка в новой задаче
Block a user