1
1

More datatype stuff, as well as reorganization to avoid filename

clashes.  I think this compiles OK -- but my automake may be broken.

This commit was SVN r392.
Этот коммит содержится в:
David Daniel 2004-01-15 01:02:23 +00:00
родитель b04786ab84
Коммит 92b97053c7
8 изменённых файлов: 656 добавлений и 62 удалений

Просмотреть файл

@ -13,11 +13,13 @@ headers = \
datatype.h datatype.h
libmpi_datatype_la_SOURCES = \ libmpi_datatype_la_SOURCES = \
$(headers) $(headers) \
copy.c \ datatype_copy.c \
create.c \ datatype_crc32.c \
delete.c \ datatype_create.c \
datatype.c datatype_delete.c \
datatype_memcpy.c \
datatype_sum32.c
# Conditionally install the header files # Conditionally install the header files

Просмотреть файл

@ -17,10 +17,15 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/uio.h> #include <sys/uio.h>
#include <string.h> #include <string.h>
#include <stdlib.h>
#include "lam_config.h"
#include "lam/constants.h"
#include "lam/lfc/object.h" #include "lam/lfc/object.h"
#include "lam/types.h" #include "lam/types.h"
#include "mpi.h"
/* typedefs ***********************************************************/ /* typedefs ***********************************************************/
typedef enum lam_checksum_kind_t lam_checksum_kind_t; typedef enum lam_checksum_kind_t lam_checksum_kind_t;
@ -92,16 +97,20 @@ enum lam_checksum_kind_t {
/* structs ************************************************************/ /* structs ************************************************************/
/** /**
* Abstraction of checksum for data * State of incremental memcpy with checksum or CRC
*/ */
struct lam_checksum_t { typedef struct lam_memcpy_state_t {
lam_checksum_kind_t kind; size_t size; /**< total size in bytes of the object
union { * being checksummed / CRCed */
uint64_t sum64; size_t partial_size; /**< size of non- uint32_t to be carried
uint32_t sum32; * over to next call */
uint32_t crc32; uint32_t partial_int; /**< value of non- uint32_t to be carried
} sum; * over to next call */
}; uint32_t sum; /**< current value of the CRC or
* checksum */
bool first_call; /**< is this the first call for this
* checksum/CRC? */
} lam_memcpy_state_t;
/** /**
@ -208,6 +217,7 @@ int lam_datatype_checksum(const void *addr,
int lam_datatype_copy(void *dst, int lam_datatype_copy(void *dst,
const void *src, const void *src,
size_t count, size_t count,
lam_datatype_t *datatype,
lam_memcpy_fn_t *memcpy_fn, lam_memcpy_fn_t *memcpy_fn,
void *csum); void *csum);
@ -380,21 +390,20 @@ int lam_datatype_scatter_iovec(lam_pack_state_t *state,
/* /*
* checksum functions * incremental memcpy with checksum / CRC functions
*/ */
/** /**
* Copy data from one buffer to another and calculate a 32-bit checksum * initialize the state for an incremental memcpy with checksum / CRC
* *
* @param dst pointer to the dstination buffer * @param state pointer to state object for the current sequence of copies
* @param src pointer to the source buffer * @param sum_size the length of the entire buffer to be checksummed
* @param size size of the buffer
* @param csum32 pointer to a 32-bit unsigned integer to hold the checksum
* @return the original value of dst
*/ */
static inline void *lam_memcpy(void *dst, const void *src, size_t size, void *dummy) static inline void lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size)
{ {
return memcpy(dst, src, size); state->size = sum_size;
state->first_call = true;
} }
@ -404,21 +413,21 @@ static inline void *lam_memcpy(void *dst, const void *src, size_t size, void *du
* @param dst pointer to the destination buffer * @param dst pointer to the destination buffer
* @param src pointer to the source buffer * @param src pointer to the source buffer
* @param size size of the buffer * @param size size of the buffer
* @param csum32 pointer to a 32-bit unsigned integer to hold the checksum * @param state pointer to a memcpy with checksum/CRC state structure (ignored)
* @return the original value of dst * @return the original value of dst
*/ */
void *lam_memcpy_csum32(void *dst, const void *src, size_t size, void *csum32); static inline void *lam_memcpy(void *dst, const void *src, size_t size,
lam_memcpy_state_t *state)
{
return memcpy(dst, src, size);
}
/** uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc);
* Copy data from one buffer to another and calculate a 64-bit checksum uint32_t lam_sum32(const void *restrict buffer, size_t size, uint32_t initial_crc);
* void *lam_memcpy_sum32(void *dst, const void *src, size_t size,
* @param dst pointer to the destination buffer lam_memcpy_state_t *state);
* @param src pointer to the source buffer void *lam_memcpy_crc32(void *dst, const void *src, size_t size,
* @param size size of the buffer lam_memcpy_state_t *state);
* @param csum64 pointer to a 64-bit unsigned integer to hold the checksum
* @return the original value of dst
*/
void *lam_memcpy_csum64(void *dst, const void *src, size_t size, void *csum64);
/** /**
* Copy data from one buffer to another and calculate a 32-bit checksum * Copy data from one buffer to another and calculate a 32-bit checksum
@ -426,27 +435,20 @@ void *lam_memcpy_csum64(void *dst, const void *src, size_t size, void *csum64);
* @param dst pointer to the destination buffer * @param dst pointer to the destination buffer
* @param src pointer to the source buffer * @param src pointer to the source buffer
* @param size size of the buffer * @param size size of the buffer
* @param crc32 pointer to a 32-bit unsigned integer to hold the CRC * @param state pointer to a memcpy with checksum/CRC state structure
* @return the original value of dst * @return the original value of dst
*/ */
void *lam_memcpy_crc32(void *dst, const void *src, size_t size, void *crc32);
/** /**
* Copy data from one buffer to another and calculate a 64-bit checksum * Copy data from one buffer to another and calculate a 32-bit checksum
* *
* @param dst pointer to the destination buffer * @param dst pointer to the destination buffer
* @param src pointer to the source buffer * @param src pointer to the source buffer
* @param size size of the buffer * @param size size of the buffer
* @param crc64 pointer to a 64-bit unsigned integer to hold the CRC * @param state pointer to a memcpy with checksum/CRC state structure
* @return the original value of dst * @return the original value of dst
*/ */
void *lam_memcpy_crc64(void *dst, const void *src, size_t size, void *crc64);
#if 0
typedef void (ulm_scatterv_t) (void *, int *, int *, ULMType_t *, void *,
int, ULMType_t *, int, int);
#endif
#endif /* LAM_DATATYPE_H_INCLUDED */ #endif /* LAM_DATATYPE_H_INCLUDED */

Просмотреть файл

@ -4,7 +4,8 @@
/** @file dataype copy function */ /** @file dataype copy function */
#include "lam_config.h" #include <stdlib.h>
#include "datatype.h" #include "datatype.h"
int lam_datatype_copy(void *dst, int lam_datatype_copy(void *dst,
@ -24,23 +25,23 @@ int lam_datatype_copy(void *dst,
if (LAM_SUCCESS == status) { if (LAM_SUCCESS == status) {
if (NULL == d) { if (NULL == d) {
memcpy_fn(dst, src, count, csum); (*memcpy_fn)(dst, src, count, csum);
} else if (LAM_DATATYPE_STATE_CONTIGUOUS & d->d_flags) { } else if (LAM_DATATYPE_STATE_CONTIGUOUS & d->d_flags) {
memcpy_fn(dst, src, count * d->d_extent, csum); (*memcpy_fn)(dst, src, count * d->d_extent, csum);
} else { } else {
lam_datavec_t dv = d->d_datavec; lam_datavec_t *dv = d->d_datavec;
size_t datavec_size = d->d_datavec; size_t datavec_size = d->d_datavec_size;
unsigned char *p = ((unsigned char *) dst); unsigned char *p = ((unsigned char *) dst);
unsigned char *q = ((unsigned char *) src); unsigned char *q = ((unsigned char *) src);
size_t i; size_t i, j;
while (count--) { while (count--) {
for (i = 0; i < d->d_datavec_size; i++) { for (i = 0; i < d->d_datavec_size; i++) {
for (j = 0; j < dv->dv_nrepeat; i++) { for (j = 0; j < dv->dv_nrepeat; i++) {
memcpy_fn(p + dv->dv_element[i].dve_offset, (*memcpy_fn)(p + dv->dv_element[i].dve_offset,
q + dv->dv_element[i].dve_offset, q + dv->dv_element[i].dve_offset,
dv->dv_element[i].dve_size, dv->dv_element[i].dve_size,
csum); csum);
} }
p += dv->dv_repeat_offset; p += dv->dv_repeat_offset;
q += dv->dv_repeat_offset; q += dv->dv_repeat_offset;

182
src/mpi/datatype/datatype_crc32.c Обычный файл
Просмотреть файл

@ -0,0 +1,182 @@
/*
* $HEADER$
*/
/** @file 32-bit cyclic redundancy check support */
#include <stdlib.h>
#include "lam_config.h"
#include "lam/stdint.h"
#include "datatype.h"
#define CRC_POLYNOMIAL ((uint32_t) 0x04c11db7)
#define CRC_INITIAL_REGISTER ((uint32_t) 0xffffffff)
#define IS_32BIT_ALIGNED(X) \
((uint32_t)(X) & (uint32_t) 3 == (uint32_t) 0 ? 1 : 0)
/*
* Look-up table for CRC32 generation
*/
static bool crc_table_initialized = false;
static uint32_t crc_table[256];
/**
* CRC32 table generation
*
* One time initializtion of CRC32 look-up table. Thanks to Charles
* Michael Heard for his optimized CRC32 code.
*/
static void initialize_crc_table(void)
{
register int i, j;
register uint32_t crc_accum;
for (i = 0; i < 256; i++) {
crc_accum = (i << 24);
for (j = 0; j < 8; j++) {
if (crc_accum & 0x80000000) {
crc_accum = (crc_accum << 1) ^ CRC_POLYNOMIAL;
} else {
crc_accum = (crc_accum << 1);
}
}
crc_table[i] = crc_accum;
}
crc_table_initialized = 1;
}
/**
* Generate a 32-bit CRC for a buffer
*
* @param buffer Data buffer
* @param size Size of buffer
* @param initial_crc Initial value of the CRC register
* @return The CRC
*
* Generate a 32-bit for a data buffer starting from a given CRC
* value.
*/
uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc)
{
register int i, j;
register unsigned char *t;
uint32_t tmp;
uint32_t crc = initial_crc;
if (!crc_table_initialized) {
initialize_crc_table();
}
if (IS_32BIT_ALIGNED(buffer)) {
register uint32_t *restrict src = (uint32_t *) buffer;
while (size >= sizeof(uint32_t)) {
tmp = *src++;
t = (unsigned char *) &tmp;
for (j = 0; j < (int) sizeof(uint32_t); j++) {
i = ((crc >> 24) ^ *t++) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
size -= sizeof(uint32_t);
}
t = (unsigned char *) src;
while (size--) {
i = ((crc >> 24) ^ *t++) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
} else {
register unsigned char *restrict src = (unsigned char *) buffer;
while (size--) {
i = ((crc >> 24) ^ *src++) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
}
return crc;
}
/**
* Copy data from one buffer to another and calculate a 32-bit CRC
*
* @param dst pointer to the destination buffer
* @param src pointer to the source buffer
* @param size size of the buffer
* @param state pointer to a memcpy with checksum/CRC state structure
* @return the original value of dst
*
* This handles cumulative CRCs for for arbitrary lengths and address
* alignments as best as it can. The initial contents of state->sum is
* used as the starting value of the CRC. The final CRC is placed
* back in state->sum.
*/
void *lam_memcpy_crc32(void *restrict dst,
const void *restrict src,
size_t size,
lam_memcpy_state_t *state)
{
size_t crclenresidue = (state->size > size) ? (state->size - size) : 0;
register int i, j;
uint32_t tmp;
register unsigned char t;
uint32_t crc = state->sum;
if (!crc_table_initialized) {
initialize_crc_table();
}
if (state->first_call) {
state->first_call = false;
state->sum = CRC_INITIAL_REGISTER;
}
if (IS_32BIT_ALIGNED(src) && IS_32BIT_ALIGNED(dst)) {
register uint32_t *restrict p = (uint32_t *) dst;
register uint32_t *restrict q = (uint32_t *) src;
register unsigned char *ts, *td;
/* copy whole integers */
while (size >= sizeof(uint32_t)) {
tmp = *q++;
*p++ = tmp;
ts = (unsigned char *) &tmp;
for (j = 0; j < (int) sizeof(uint32_t); j++) {
i = ((crc >> 24) ^ *ts++) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
size -= sizeof(uint32_t);
}
ts = (unsigned char *) q;
td = (unsigned char *) p;
/* copy partial integer */
while (size--) {
t = *ts++;
*td++ = t;
i = ((crc >> 24) ^ t) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
/* calculate CRC over remaining bytes... */
while (crclenresidue--) {
i = ((crc >> 24) ^ *ts++) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
} else {
register unsigned char *restrict q = (unsigned char *) src;
register unsigned char *restrict p = (unsigned char *) dst;
while (size--) {
t = *q++;
*p++ = t;
i = ((crc >> 24) ^ t) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
while (crclenresidue--) {
i = ((crc >> 24) ^ *q++) & 0xff;
crc = (crc << 8) ^ crc_table[i];
}
}
state->sum = crc;
return dst;
}

Просмотреть файл

@ -4,9 +4,6 @@
/** @file datatype creation function */ /** @file datatype creation function */
#include "lam_config.h"
#include "lam/constants.h"
#include "mpi.h"
#include "datatype.h" #include "datatype.h"
/** /**

Просмотреть файл

@ -4,9 +4,6 @@
/** @file datatype deletion function */ /** @file datatype deletion function */
#include "lam_config.h"
#include "lam/constants.h"
#include "mpi.h"
#include "datatype.h" #include "datatype.h"
/** /**

46
src/mpi/datatype/datatype_memcpy.c Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
/*
* $HEADER$
*/
/** @file alternative memcpy function */
#include <stdlib.h>
#include "lam_config.h"
#include "lam/stdint.h"
#define ALIGNED32(X) ((uint32_t)(X) & (uint32_t) 3 == (uint32_t) 0 ? 1 : 0)
/**
* Alternative memcpy function
*
* @param dst destination buffer
* @param src source buffer
* @param size size of buffer
* @param dummy unused variable
* @return the original value of dst
*
* On some systems, this performs better than the system memcpy.
*/
void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy)
{
if (ALIGNED32(src) && ALIGNED32(dst)) {
uint32_t *restrict p = (uint32_t *) dst;
uint32_t *restrict q = (uint32_t *) src;
uint32_t i;
uint32_t n = size >> 2;
for (i = 0; i < n; i++) {
*p++ = *q++;
}
size -= n * sizeof(size_t);
if (size != 0) {
while (size--) {
*p++ = *q++;
}
}
} else {
memcpy(dst, src, size);
}
return dst;
}

367
src/mpi/datatype/datatype_sum32.c Обычный файл
Просмотреть файл

@ -0,0 +1,367 @@
/*
* $HEADER$
*/
/** @file 32-bit checksum support */
#include <stdlib.h>
#include "lam_config.h"
#include "lam/stdint.h"
#include "datatype.h"
#define IS_32BIT_ALIGNED(X) \
((uint32_t)(X) & (uint32_t) 3 == (uint32_t) 0 ? 1 : 0)
/**
* Copy data from one buffer to another and calculate a 32-bit checksum
*
* @param dst pointer to the destination buffer
* @param src pointer to the source buffer
* @param size size of the buffer
* @param state pointer to a memcpy with checksum/CRC state structure
* @return the original value of dst
*
* This handles cumulative checksumming for arbitrary lengths and
* address alignments as best as it can; the contents of
* lastPartialLong and lastPartialLength are updated to reflected the
* last partial word's value and length (in bytes) -- this should
* allow proper handling of checksumming contiguous or noncontiguous
* buffers via multiple calls of bcopy_csum() - Mitch
*/
void *lam_memcpy_sum32(void *restrict dst,
const void *restrict src,
size_t size,
lam_memcpy_state_t *state)
{
uint32_t *restrict p = (uint32_t *) dst;
uint32_t *restrict q = (uint32_t *) src;
size_t csumlen = state->size;
size_t i;
ssize_t csumlenresidue;
uint32_t csum = 0;
uint32_t temp;
if (state->first_call) {
state->first_call = false;
state->partial_int = 0;
state->partial_size = 0;
}
csumlenresidue = (csumlen > size) ? (csumlen - size) : 0;
temp = state->partial_int;
if (intaligned(p) && intaligned(q)) {
if (state->partial_size) {
// do we have enough data to fill out the partial word?
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
memcpy(((char *) &temp + state->partial_size), q,
(sizeof(uint32_t) - state->partial_size));
memcpy(p, ((char *) &temp + state->partial_size),
(sizeof(uint32_t) - state->partial_size));
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
csum += (temp - state->partial_int);
size -= sizeof(uint32_t) - state->partial_size;
// now we have an unaligned source and an unaligned destination
for (; size >= sizeof(*q); size -= sizeof(*q)) {
memcpy(&temp, q, sizeof(temp));
q++;
csum += temp;
memcpy(p, &temp, sizeof(temp));
p++;
}
state->partial_size = 0;
state->partial_int = 0;
} else { // NO, we don't...
memcpy(((char *) &temp + state->partial_size), q, size);
memcpy(p, ((char *) &temp + state->partial_size), size);
q = (uint32_t *) ((char *) q + size);
p = (uint32_t *) ((char *) p + size);
csum += (temp - state->partial_int);
state->partial_int = temp;
state->partial_size += size;
size = 0;
}
} else { // fast path...
size_t numLongs = size / sizeof(uint32_t);
for (i = 0; i < numLongs; i++) {
csum += *q;
*p++ = *q++;
}
state->partial_int = 0;
state->partial_size = 0;
if (intaligned(size) && (csumlenresidue == 0)) {
state->sum = csum;
return dst;
} else {
size -= i * sizeof(uint32_t);
}
}
} else if (intaligned(q)) {
if (state->partial_size) {
// do we have enough data to fill out the partial word?
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
memcpy(((char *) &temp + state->partial_size), q,
(sizeof(uint32_t) - state->partial_size));
memcpy(p, ((char *) &temp + state->partial_size),
(sizeof(uint32_t) - state->partial_size));
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
csum += (temp - state->partial_int);
size -= sizeof(uint32_t) - state->partial_size;
// now we have an unaligned source and an unknown alignment for our destination
if (intaligned(p)) {
size_t numLongs = size / sizeof(uint32_t);
for (i = 0; i < numLongs; i++) {
memcpy(&temp, q, sizeof(temp));
q++;
csum += temp;
*p++ = temp;
}
size -= i * sizeof(uint32_t);
} else {
for (; size >= sizeof(*q); size -= sizeof(*q)) {
memcpy(&temp, q, sizeof(temp));
q++;
csum += temp;
memcpy(p, &temp, sizeof(temp));
p++;
}
}
state->partial_int = 0;
state->partial_size = 0;
} else { // NO, we don't...
memcpy(((char *) &temp + state->partial_size), q, size);
memcpy(p, ((char *) &temp + state->partial_size), size);
q = (uint32_t *) ((char *) q + size);
p = (uint32_t *) ((char *) p + size);
csum += (temp - state->partial_int);
state->partial_int = temp;
state->partial_size += size;
size = 0;
}
} else {
for (; size >= sizeof(*q); size -= sizeof(*q)) {
temp = *q++;
csum += temp;
memcpy(p, &temp, sizeof(temp));
p++;
}
state->partial_int = 0;
state->partial_size = 0;
}
} else if (intaligned(p)) {
if (state->partial_size) {
// do we have enough data to fill out the partial word?
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
memcpy(((char *) &temp + state->partial_size), q,
(sizeof(uint32_t) - state->partial_size));
memcpy(p, ((char *) &temp + state->partial_size),
(sizeof(uint32_t) - state->partial_size));
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
csum += (temp - state->partial_int);
size -= sizeof(uint32_t) - state->partial_size;
// now we have a source of unknown alignment and a unaligned destination
if (intaligned(q)) {
for (; size >= sizeof(*q); size -= sizeof(*q)) {
temp = *q++;
csum += temp;
memcpy(p, &temp, sizeof(temp));
p++;
}
state->partial_int = 0;
state->partial_size = 0;
} else {
for (; size >= sizeof(*q); size -= sizeof(*q)) {
memcpy(&temp, q, sizeof(temp));
q++;
csum += temp;
memcpy(p, &temp, sizeof(temp));
p++;
}
state->partial_size = 0;
state->partial_int = 0;
}
} else { // NO, we don't...
memcpy(((char *) &temp + state->partial_size), q, size);
memcpy(p, ((char *) &temp + state->partial_size), size);
q = (uint32_t *) ((char *) q + size);
p = (uint32_t *) ((char *) p + size);
csum += (temp - state->partial_int);
state->partial_int = temp;
state->partial_size += size;
size = 0;
}
} else {
for (; size >= sizeof(*q); size -= sizeof(*q)) {
memcpy(&temp, q, sizeof(temp));
q++;
csum += temp;
*p++ = temp;
}
state->partial_size = 0;
state->partial_int = 0;
}
} else {
if (state->partial_size) {
// do we have enough data to fill out the partial word?
if (size >= (sizeof(uint32_t) - state->partial_size)) { // YES, we do...
memcpy(((char *) &temp + state->partial_size), q,
(sizeof(uint32_t) - state->partial_size));
memcpy(p, ((char *) &temp + state->partial_size),
(sizeof(uint32_t) - state->partial_size));
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
p = (uint32_t *) ((char *) p + sizeof(uint32_t) - state->partial_size);
csum += (temp - state->partial_int);
size -= sizeof(uint32_t) - state->partial_size;
// now we have an unknown alignment for our source and destination
if (intaligned(q) && intaligned(p)) {
size_t numLongs = size / sizeof(uint32_t);
for (i = 0; i < numLongs; i++) {
csum += *q;
*p++ = *q++;
}
size -= i * sizeof(uint32_t);
} else { // safe but slower for all other alignments
for (; size >= sizeof(*q); size -= sizeof(*q)) {
memcpy(&temp, q, sizeof(temp));
q++;
csum += temp;
memcpy(p, &temp, sizeof(temp));
p++;
}
}
state->partial_int = 0;
state->partial_size = 0;
} else { // NO, we don't...
memcpy(((char *) &temp + state->partial_size), q, size);
memcpy(p, ((char *) &temp + state->partial_size), size);
q = (uint32_t *) ((char *) q + size);
p = (uint32_t *) ((char *) p + size);
csum += (temp - state->partial_int);
state->partial_int = temp;
state->partial_size += size;
size = 0;
}
} else {
for (; size >= sizeof(*q); size -= sizeof(*q)) {
memcpy(&temp, q, sizeof(temp));
q++;
csum += temp;
memcpy(p, &temp, sizeof(temp));
p++;
}
state->partial_size = 0;
state->partial_int = 0;
}
}
/* if size is non-zero there was a bit left, less than an uint32_t's worth */
if ((size != 0) && (csumlenresidue == 0)) {
temp = state->partial_int;
if (state->partial_size) {
if (size >= (sizeof(uint32_t) - state->partial_size)) {
// copy all remaining bytes from q to p
uint32_t copytemp = 0;
memcpy(&copytemp, q, size);
memcpy(p, &copytemp, size);
// fill out rest of partial word and add to checksum
memcpy(((char *) &temp + state->partial_size), q,
(sizeof(uint32_t) - state->partial_size));
// avoid unsigned arithmetic overflow by subtracting the old partial
// word from the new one before adding to the checksum...
csum += (temp - state->partial_int);
size -= sizeof(uint32_t) - state->partial_size;
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
state->partial_size = size;
// reset temp, and calculate next partial word
temp = 0;
if (size) {
memcpy(&temp, q, size);
}
// add it to the the checksum
csum += temp;
state->partial_int = temp;
} else {
// copy all remaining bytes from q to p
uint32_t copytemp = 0;
memcpy(&copytemp, q, size);
memcpy(p, &copytemp, size);
// fill out rest of partial word and add to checksum
memcpy(((char *) &temp + state->partial_size), q, size);
// avoid unsigned arithmetic overflow by subtracting the old partial
// word from the new one before adding to the checksum...
csum += temp - state->partial_int;
state->partial_int = temp;
state->partial_size += size;
}
} else { // fast path...
// temp and state->partial_int are 0 if state->partial_size is 0...
memcpy(&temp, q, size);
csum += temp;
memcpy(p, &temp, size);
state->partial_int = temp;
state->partial_size = size;
// done...return the checksum
}
} else if (csumlenresidue != 0) {
if (size != 0) {
temp = 0;
memcpy(&temp, q, size);
memcpy(p, &temp, size);
}
if (csumlenresidue < (ssize_t) (sizeof(uint32_t) - size - state->partial_size)) {
temp = state->partial_int;
memcpy(((char *) &temp + state->partial_size), q, (size + csumlenresidue));
// avoid unsigned arithmetic overflow by subtracting the old partial
// word from the new one before adding to the checksum...
csum += temp - state->partial_int;
q++;
state->partial_int = temp;
state->partial_size += size + csumlenresidue;
csumlenresidue = 0;
} else {
// we have enough chksum data to fill out our last partial
// word
temp = state->partial_int;
memcpy(((char *) &temp + state->partial_size), q,
(sizeof(uint32_t) - state->partial_size));
// avoid unsigned arithmetic overflow by subtracting the old partial
// word from the new one before adding to the checksum...
csum += temp - state->partial_int;
q = (uint32_t *) ((char *) q + sizeof(uint32_t) - state->partial_size);
csumlenresidue -= sizeof(uint32_t) - state->partial_size - size;
state->partial_size = 0;
state->partial_int = 0;
}
if (intaligned(q)) {
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
csum += *q++;
}
} else {
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
memcpy(&temp, q, sizeof(temp));
csum += temp;
q++;
}
}
csumlenresidue -= i * sizeof(uint32_t);
if (csumlenresidue) {
temp = 0;
memcpy(&temp, q, csumlenresidue);
csum += temp;
state->partial_int = temp;
state->partial_size = csumlenresidue;
}
}
/* end else if (csumlenresidue != 0) */
state->sum = csum;
return dst;
}