From 18c26e93815769218f59de67bc40e6d4d57679e6 Mon Sep 17 00:00:00 2001 From: David Daniel Date: Tue, 10 Feb 2004 22:07:08 +0000 Subject: [PATCH] More datatype updates This commit was SVN r710. --- src/mpi/datatype/datatype.c | 48 ++----- src/mpi/datatype/datatype.h | 194 ++++++++++++++++++++++------- src/mpi/datatype/datatype_copy.c | 5 +- src/mpi/datatype/datatype_crc32.c | 40 ++---- src/mpi/datatype/datatype_create.c | 14 +-- src/mpi/datatype/datatype_delete.c | 11 +- src/mpi/datatype/datatype_memcpy.c | 20 +-- src/mpi/datatype/datatype_sum32.c | 79 ++++++------ 8 files changed, 225 insertions(+), 186 deletions(-) diff --git a/src/mpi/datatype/datatype.c b/src/mpi/datatype/datatype.c index 632f135daf..6c8a02154b 100644 --- a/src/mpi/datatype/datatype.c +++ b/src/mpi/datatype/datatype.c @@ -2,7 +2,9 @@ * $HEADER$ */ -/** @file lam_datatype_t implementation */ +/* + * lam_datatype_t implementation + */ #include "lam_config.h" #include "lam/datatype.h" @@ -15,41 +17,6 @@ lam_class_info_t lam_datatype_t_class_info = { }; -static int lam_datatype_init = 0; -lam_dbl_list_t lam_p2p_cdis; - - -void lam_datatype_t(lam_p2p_cdi_t * cdi) -{ - if (fetchNset(&lam_p2p_cdis_init, 1) == 0) { - lam_dbl_construct(&lam_p2p_cdis); - } - lam_dbl_item_construct(&cdi->cdi_base); - cdi->cdi_name = 0; - cdi->cdi_id = lam_dbl_get_size(&lam_p2p_cdis) + 1; - cdi->cdi_frag_first_size = 0; - cdi->cdi_frag_min_size = 0; - cdi->cdi_frag_max_size = 0; - cdi->cdi_endpoint_latency = 0; - cdi->cdi_endpoint_bandwidth = 0; - cdi->cdi_endpoint_count = 0; - lam_dbl_construct(&cdi->cdi_incomplete_sends); - lam_dbl_append(&lam_p2p_cdis, &cdi->cdi_base); -} - - -void lam_p2p_cdi_destruct(lam_p2p_cdi_t * cdi) -{ - lam_dbl_remove(&lam_p2p_cdis, &cdi->cdi_base); - lam_dbl_destruct(&cdi->cdi_incomplete_sends); - lam_dbl_item_destruct(&cdi->cdi_base); -} - - -/* - * This random stuff checked in while I think about things ... - */ - /** * type_pack -- Incrementally copy data type arrays to/from a packed buffer * @@ -117,11 +84,11 @@ lam_packer_status_t lam_packer(lam_packer_direction_t direction, void *buf, size_t bufsize, - size_t * offset, + size_t *offset, void *typebuf, size_t ntype, - lam_datatype_t * datatype, - lam_pack_state_t * pack_state, lam_checksum_t * checksum) + lam_datatype_t *datatype, + lam_pack_state_t *pack_state, lam_checksum_t *checksum) { return 0; } @@ -139,8 +106,7 @@ void lam_datatype_copy(void *dest, const void *src, size_t count, - lam_datatype_t *datatype, - lam_checksum_t *csum) + lam_datatype_t *datatype, lam_checksum_t *csum) { if (datatype == NULL) { memmove(dest, src, count); diff --git a/src/mpi/datatype/datatype.h b/src/mpi/datatype/datatype.h index 1db62c00cd..3fbc4f7a8a 100644 --- a/src/mpi/datatype/datatype.h +++ b/src/mpi/datatype/datatype.h @@ -4,11 +4,11 @@ /** @file * - * Data stuctures and functions related to LAM datatypes. - */ - -/* - * LAM internal data type representation + * lam_datatype_t interface for LAM internal data type representation + * + * lam_datatype_t is a class which represents contiguous or + * non-contiguous datat together with constituent type-related + * information. It is the LAM's-eye view of MPI_Datatype. */ #ifndef LAM_DATATYPE_H_INCLUDED @@ -21,11 +21,33 @@ #include "lam_config.h" #include "lam/constants.h" +#include "lam/stdint.h" #include "lam/lfc/object.h" #include "lam/types.h" #include "mpi.h" +/* macros *************************************************************/ + +/** + * Test 32-bit alignment of an address + * + * @param address An address + * @return true if the address is 32-bit aligned + */ +#define LAM_IS_32BIT_ALIGNED(addr) \ + (((uint32_t) addr & (uint32_t) 3) == (uint32_t) 0 ? true : false) + +/** + * Test 64-bit alignment of an address + * + * @param address An address + * @return true if the address is 32-bit aligned + */ +#define LAM_IS_64BIT_ALIGNED(addr) \ + (((uint64_t) addr & (uint64_t) 7) == (uint64_t) 0 ? true : false) + + /* typedefs ***********************************************************/ typedef struct lam_checksum_t lam_checksum_t; @@ -34,7 +56,13 @@ typedef struct lam_datavec_element_t lam_datavec_element_t; typedef struct lam_datavec_t lam_datavec_t; typedef struct lam_dataxdr_t lam_dataxdr_t; typedef struct lam_pack_state_t lam_pack_state_t; +typedef struct lam_memcpy_state_t lam_memcpy_state_t; +/* Function prototype for a generalized memcpy() */ +typedef void *(lam_memcpy_fn_t) (void *restrict dst, + const void *restrict src, + size_t size, + lam_memcpy_state_t *check); /* enums **************************************************************/ @@ -50,7 +78,6 @@ enum lam_datatype_state_t { LAM_DATATYPE_STATE_XDR = 1 << 5, /* etc. */ }; -typedef enum lam_datatype_state_t lam_datatype_state_t; /** @@ -78,7 +105,6 @@ enum lam_datatype_kind_t { LAM_DATATYPE_KIND_STRUCT_FORTRAN, LAM_DATATYPE_KIND_VECTOR_FORTRAN }; -typedef enum lam_datatype_kind_t lam_datatype_kind_t; /** @@ -90,15 +116,18 @@ enum lam_checksum_kind_t { LAM_CHECKSUM_KIND_SUM32, LAM_CHECKSUM_KIND_SUM64 }; -typedef enum lam_checksum_kind_t lam_checksum_kind_t; +typedef enum lam_datatype_state_t lam_datatype_state_t; +typedef enum lam_datatype_kind_t lam_datatype_kind_t; +typedef enum lam_checksum_kind_t lam_checksum_kind_t; + /* structs ************************************************************/ /** * State of incremental memcpy with checksum or CRC */ -typedef struct lam_memcpy_state_t { +struct lam_memcpy_state_t { size_t size; /**< total size in bytes of the object * being checksummed / CRCed */ size_t partial_size; /**< size of non- uint32_t to be carried @@ -109,7 +138,7 @@ typedef struct lam_memcpy_state_t { * checksum */ bool first_call; /**< is this the first call for this * checksum/CRC? */ -} lam_memcpy_state_t; +}; /** @@ -181,24 +210,6 @@ struct lam_dataxdr_element_t { }; -/** - * Function protoype for a generalized memcpy() - * - * Copy data from one buffer to another and optionally calculate a - * checksum or CRC - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param check pointer to the optional checksum or CRC - * @return the original value of dst - */ -typedef void *(lam_memcpy_fn_t) (void *restrict dst, - const void *restrict src, - size_t size, - lam_memcpy_state_t *check); - - /* interface **********************************************************/ /** @@ -230,7 +241,7 @@ int lam_datatype_copy(void *dst, size_t count, lam_datatype_t *datatype, lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); + lam_memcpy_state_t *check); /** * Copy (the contents of) an array of data types, and convert to @@ -251,8 +262,9 @@ int lam_datatype_convert(void *dst, const void *src, lam_datatype_t *src_datatype, size_t src_count, - lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); + lam_memcpy_fn_t *memcpy_fn, + lam_memcpy_state_t *check); + /** * Pack state @@ -296,7 +308,7 @@ int lam_datatype_pack(lam_pack_state_t *state, size_t ntype, lam_datatype_t *datatype, lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); + lam_memcpy_state_t *check); /** @@ -326,7 +338,7 @@ int lam_datatype_unpack(lam_pack_state_t *state, size_t bufsize, lam_datatype_t *datatype, lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); + lam_memcpy_state_t *check); /** * Incrementally generate an iovec for gathering from an array of @@ -398,7 +410,7 @@ int lam_datatype_scatter_iovec(lam_pack_state_t *state, size_t bufsize, lam_datatype_t *datatype, lam_memcpy_fn_t *memcpy_fn, - lam_memcpy_state_t *check); + lam_memcpy_state_t *check); /* @@ -419,13 +431,6 @@ lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size) state->first_call = true; } -/* - * prototypes for memcpy functions - */ - -extern lam_memcpy_fn_t lam_memcpy_crc32; -extern lam_memcpy_fn_t lam_memcpy_sum32; -extern lam_memcpy_fn_t lam_memcpy_sum64; /** * Copy data from one buffer to another @@ -442,10 +447,67 @@ static inline void *lam_memcpy(void *dst, const void *src, size_t size, return memcpy(dst, src, size); } +/** + * An alternative version of memcpy that may out-perform the system + * version on some (silly) systems. + * + * @param dst pointer to the destination buffer + * @param src pointer to the source buffer + * @param size size of the buffer + * @param state unused + * @return the original value of dst + */ +void *lam_memcpy_alt(void *dst, const void *src, size_t size, + lam_memcpy_state_t *state); + + +/** + * Generate a 32-bit CRC for a buffer + * + * @param buffer Data buffer + * @param size Size of buffer + * @param initial_crc Initial value of the CRC register + * @return The CRC + * + * Generate a 32-bit for a data buffer starting from a given CRC + * value. + */ uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc); -uint32_t lam_sum32(const void *restrict buffer, size_t size, - uint32_t initial_crc); + + +/** + * Generate a 32-bit checksum for a buffer + * + * @param buffer Data buffer + * @param size Size of buffer + * @return The CRC + * + * Generate a 32-bit for a data buffer starting from a given CRC + * value. + */ +uint32_t lam_sum32(const void *restrict buffer, size_t size); + + +/** + * Copy data from one buffer to another and calculate a 32-bit CRC + * + * @param dst pointer to the destination buffer + * @param src pointer to the source buffer + * @param size size of the buffer + * @param state pointer to a memcpy with checksum/CRC state structure + * @return the original value of dst + * + * This handles cumulative CRCs for for arbitrary lengths and address + * alignments as best as it can. The initial contents of state->sum is + * used as the starting value of the CRC. The final CRC is placed + * back in state->sum. + */ +void *lam_memcpy_crc32(void *restrict dst, + const void *restrict src, + size_t size, + lam_memcpy_state_t *check); + /** * Copy data from one buffer to another and calculate a 32-bit checksum @@ -455,7 +517,18 @@ uint32_t lam_sum32(const void *restrict buffer, size_t size, * @param size size of the buffer * @param state pointer to a memcpy with checksum/CRC state structure * @return the original value of dst + * + * This handles cumulative checksumming for arbitrary lengths and + * address alignments as best as it can; the contents of + * lastPartialLong and lastPartialLength are updated to reflected the + * last partial word's value and length (in bytes) -- this should + * allow proper handling of checksumming contiguous or noncontiguous + * buffers via multiple calls of bcopy_csum() - Mitch */ +void *lam_memcpy_sum32(void *restrict dst, + const void *restrict src, + size_t size, + lam_memcpy_state_t *check); /** @@ -467,7 +540,29 @@ uint32_t lam_sum32(const void *restrict buffer, size_t size, * @param state pointer to a memcpy with checksum/CRC state structure * @return the original value of dst */ +void *lam_memcpy_sum64(void *restrict dst, + const void *restrict src, + size_t size, + lam_memcpy_state_t *check); + +/** + * Create a LAM/MPI datatype + * + * @param combiner integer identifying the kind of MPI create function + * @param ninteger number of integers passed to the create function + * @param integer array of integers passed to the create function + * @param naddress number of addresses passed to the create function + * @param address array of addresses passed to the create function + * @param ntype number of data types passed to the create function + * @param type array of data types passed to the create function + * @param newtype pointer to address of new type + * @return LAM_SUCCESS on successful creation, LAM_ERROR otherwise + * + * This is the central location for creation of data types in LAM/MPI. + * All MPI_Type_create functions rely upon this to do the actual type + * creation. + */ int lam_datatype_create(int combiner, int nintegers, int integers[], @@ -477,8 +572,19 @@ int lam_datatype_create(int combiner, lam_datatype_t *types[], lam_datatype_t **newtype); + +/** + * Delete a LAM/MPI datatype (actually, just mark it for deletion) + * + * @param type datatype + * @return LAM_SUCCESS on success, LAM_ERROR otherwise + * + * This is the central location for creation of data types in LAM/MPI. + * All MPI_Type_create functions rely upon this to do the actual type + * creation. + */ int lam_datatype_delete(lam_datatype_t *type); -void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy); + #endif /* LAM_DATATYPE_H_INCLUDED */ diff --git a/src/mpi/datatype/datatype_copy.c b/src/mpi/datatype/datatype_copy.c index 70b61b5f71..fabd83727b 100644 --- a/src/mpi/datatype/datatype_copy.c +++ b/src/mpi/datatype/datatype_copy.c @@ -2,12 +2,15 @@ * $HEADER$ */ -/** @file dataype copy function */ +/* lam_dataype_t copy function */ #include #include "datatype.h" +/* + * Copy (the contents of) an array of data types + */ int lam_datatype_copy(void *dst, const void *src, size_t count, diff --git a/src/mpi/datatype/datatype_crc32.c b/src/mpi/datatype/datatype_crc32.c index d14a611927..a5ee288ff0 100644 --- a/src/mpi/datatype/datatype_crc32.c +++ b/src/mpi/datatype/datatype_crc32.c @@ -2,18 +2,19 @@ * $HEADER$ */ -/** @file 32-bit cyclic redundancy check support */ +/** @file + * + * 32-bit cyclic redundancy check support + */ #include #include "lam_config.h" -#include "lam/stdint.h" #include "datatype.h" #define CRC_POLYNOMIAL ((uint32_t) 0x04c11db7) #define CRC_INITIAL_REGISTER ((uint32_t) 0xffffffff) -#define IS_32BIT_ALIGNED(X) \ - (((uint32_t)(X) & (uint32_t) 3) == (uint32_t) 0 ? 1 : 0) + /* * Look-up table for CRC32 generation @@ -21,6 +22,7 @@ static bool crc_table_initialized = false; static uint32_t crc_table[256]; + /** * CRC32 table generation * @@ -48,18 +50,11 @@ static void initialize_crc_table(void) } -/** +/* * Generate a 32-bit CRC for a buffer - * - * @param buffer Data buffer - * @param size Size of buffer - * @param initial_crc Initial value of the CRC register - * @return The CRC - * - * Generate a 32-bit for a data buffer starting from a given CRC - * value. */ -uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc) +uint32_t lam_crc32(const void *restrict buffer, size_t size, + uint32_t initial_crc) { register int i, j; register unsigned char *t; @@ -70,7 +65,7 @@ uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_cr initialize_crc_table(); } - if (IS_32BIT_ALIGNED(buffer)) { + if (LAM_IS_32BIT_ALIGNED(buffer)) { register uint32_t *restrict src = (uint32_t *) buffer; while (size >= sizeof(uint32_t)) { tmp = *src++; @@ -98,19 +93,8 @@ uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_cr } -/** +/* * Copy data from one buffer to another and calculate a 32-bit CRC - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param state pointer to a memcpy with checksum/CRC state structure - * @return the original value of dst - * - * This handles cumulative CRCs for for arbitrary lengths and address - * alignments as best as it can. The initial contents of state->sum is - * used as the starting value of the CRC. The final CRC is placed - * back in state->sum. */ void *lam_memcpy_crc32(void *restrict dst, const void *restrict src, @@ -132,7 +116,7 @@ void *lam_memcpy_crc32(void *restrict dst, state->sum = CRC_INITIAL_REGISTER; } - if (IS_32BIT_ALIGNED(src) && IS_32BIT_ALIGNED(dst)) { + if (LAM_IS_32BIT_ALIGNED(src) && LAM_IS_32BIT_ALIGNED(dst)) { register uint32_t *restrict p = (uint32_t *) dst; register uint32_t *restrict q = (uint32_t *) src; register unsigned char *ts, *td; diff --git a/src/mpi/datatype/datatype_create.c b/src/mpi/datatype/datatype_create.c index e9fd02340e..f4357deb6a 100644 --- a/src/mpi/datatype/datatype_create.c +++ b/src/mpi/datatype/datatype_create.c @@ -60,7 +60,7 @@ int lam_datatype_create(int combiner, } if (count == 0) { - newtype = (lam_datatype_t *) LAM_MALLOC(sizeof(lam_datatype_t)); + newtype = (lam_datatype_t *) malloc(sizeof(lam_datatype_t)); if (newtype == NULL) { ulm_err(("Error: MPI_Type_struct: Out of memory\n")); rc = MPI_ERR_TYPE; @@ -84,7 +84,7 @@ int lam_datatype_create(int combiner, newtype->envelope.nints = 1; newtype->envelope.naddrs = 0; newtype->envelope.ndatatypes = 0; - newtype->envelope.iarray = (int *) LAM_MALLOC(sizeof(int)); + newtype->envelope.iarray = (int *) malloc(sizeof(int)); newtype->envelope.aarray = NULL; newtype->envelope.darray = NULL; newtype->envelope.iarray[0] = count; @@ -97,7 +97,7 @@ int lam_datatype_create(int combiner, } /* Allocate new type */ - newtype = LAM_MALLOC(sizeof(lam_datatype_t)); + newtype = malloc(sizeof(lam_datatype_t)); if (newtype == NULL) { ulm_err(("Error: MPI_Type_struct: Out of memory\n")); rc = MPI_ERR_TYPE; @@ -118,12 +118,12 @@ int lam_datatype_create(int combiner, newtype->envelope.naddrs = count; newtype->envelope.ndatatypes = count; newtype->envelope.iarray = - (int *) LAM_MALLOC(newtype->envelope.nints * sizeof(int)); + (int *) malloc(newtype->envelope.nints * sizeof(int)); newtype->envelope.aarray = - (MPI_Aint *) LAM_MALLOC(newtype->envelope.naddrs * + (MPI_Aint *) malloc(newtype->envelope.naddrs * sizeof(MPI_Aint)); newtype->envelope.darray = - (MPI_Datatype *) LAM_MALLOC(newtype->envelope.ndatatypes * + (MPI_Datatype *) malloc(newtype->envelope.ndatatypes * sizeof(MPI_Datatype)); newtype->envelope.iarray[0] = count; for (i = 0; i < count; i++) { @@ -253,7 +253,7 @@ int lam_datatype_create(int combiner, if (newtype->num_pairs > 0) { /* allocate the type_map */ newtype->type_map = (ULMTypeMapElt_t *) - LAM_MALLOC(newtype->num_pairs * sizeof(ULMTypeMapElt_t)); + malloc(newtype->num_pairs * sizeof(ULMTypeMapElt_t)); if (newtype->type_map == NULL) { ulm_err(("Error: MPI_Type_struct: Out of memory\n")); rc = MPI_ERR_TYPE; diff --git a/src/mpi/datatype/datatype_delete.c b/src/mpi/datatype/datatype_delete.c index 7cfaf57cd4..3cc582d955 100644 --- a/src/mpi/datatype/datatype_delete.c +++ b/src/mpi/datatype/datatype_delete.c @@ -2,19 +2,12 @@ * $HEADER$ */ -/** @file datatype deletion function */ +/* lam_datatype_t deletion function */ #include "datatype.h" -/** +/* * Delete a LAM/MPI datatype (actually, just mark it for deletion) - * - * @param type datatype - * @return LAM_SUCCESS on success, LAM_ERROR otherwise - * - * This is the central location for creation of data types in LAM/MPI. - * All MPI_Type_create functions rely upon this to do the actual type - * creation. */ int lam_datatype_delete(lam_datatype_t *type) { diff --git a/src/mpi/datatype/datatype_memcpy.c b/src/mpi/datatype/datatype_memcpy.c index 2c0296b114..dba0b45c04 100644 --- a/src/mpi/datatype/datatype_memcpy.c +++ b/src/mpi/datatype/datatype_memcpy.c @@ -2,30 +2,22 @@ * $HEADER$ */ -/** @file alternative memcpy function */ +/* alternative memcpy function */ #include #include #include "lam_config.h" -#include "lam/stdint.h" - #include "datatype.h" #define ALIGNED32(X) (((uint32_t)(X) & (uint32_t) 3) == (uint32_t) 0 ? 1 : 0) -/** - * Alternative memcpy function - * - * @param dst destination buffer - * @param src source buffer - * @param size size of buffer - * @param dummy unused variable - * @return the original value of dst - * - * On some systems, this performs better than the system memcpy. +/* + * Alternative memcpy function: On some systems, this performs better + * than the system memcpy. */ -void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy) +void *lam_memcpy_alt(void *dst, const void *src, size_t size, + lam_memcpy_state_t *dummy) { if (ALIGNED32(src) && ALIGNED32(dst)) { uint32_t *restrict p = (uint32_t *) dst; diff --git a/src/mpi/datatype/datatype_sum32.c b/src/mpi/datatype/datatype_sum32.c index d5738f5429..8ab9839bce 100644 --- a/src/mpi/datatype/datatype_sum32.c +++ b/src/mpi/datatype/datatype_sum32.c @@ -2,37 +2,32 @@ * $HEADER$ */ -/** @file 32-bit checksum support */ +/** @file + * + * 32-bit checksum support + */ #include #include "lam_config.h" -#include "lam/stdint.h" #include "datatype.h" -#define IS_32BIT_ALIGNED(X) \ - (((uint32_t)(X) & (uint32_t) 3) == ((uint32_t) 0 ? 1 : 0)) + +/* + * Generate a 32-bit checksum for a buffer + */ +uint32_t lam_sum32(const void *restrict buffer, size_t size) +{ + return 0; +} -/** +/* * Copy data from one buffer to another and calculate a 32-bit checksum - * - * @param dst pointer to the destination buffer - * @param src pointer to the source buffer - * @param size size of the buffer - * @param state pointer to a memcpy with checksum/CRC state structure - * @return the original value of dst - * - * This handles cumulative checksumming for arbitrary lengths and - * address alignments as best as it can; the contents of - * lastPartialLong and lastPartialLength are updated to reflected the - * last partial word's value and length (in bytes) -- this should - * allow proper handling of checksumming contiguous or noncontiguous - * buffers via multiple calls of bcopy_csum() - Mitch */ void *lam_memcpy_sum32(void *restrict dst, const void *restrict src, - size_t size, lam_memcpy_state_t * state) + size_t size, lam_memcpy_state_t *state) { uint32_t *restrict p = (uint32_t *) dst; uint32_t *restrict q = (uint32_t *) src; @@ -51,7 +46,7 @@ void *lam_memcpy_sum32(void *restrict dst, csumlenresidue = (csumlen > size) ? (csumlen - size) : 0; temp = state->partial_int; - if (IS_32BIT_ALIGNED(p) && IS_32BIT_ALIGNED(q)) { + if (LAM_IS_32BIT_ALIGNED(p) && LAM_IS_32BIT_ALIGNED(q)) { if (state->partial_size) { /* do we have enough data to fill out the partial word? */ if (size >= (sizeof(uint32_t) - state->partial_size)) { @@ -67,9 +62,9 @@ void *lam_memcpy_sum32(void *restrict dst, csum += (temp - state->partial_int); size -= sizeof(uint32_t) - state->partial_size; /* - * now we have an unaligned source and an unaligned - * destination - */ + * now we have an unaligned source and an unaligned + * destination + */ for (; size >= sizeof(*q); size -= sizeof(*q)) { memcpy(&temp, q, sizeof(temp)); q++; @@ -98,14 +93,14 @@ void *lam_memcpy_sum32(void *restrict dst, } state->partial_int = 0; state->partial_size = 0; - if (IS_32BIT_ALIGNED(size) && (csumlenresidue == 0)) { + if (LAM_IS_32BIT_ALIGNED(size) && (csumlenresidue == 0)) { state->sum = csum; return dst; } else { size -= i * sizeof(uint32_t); } } - } else if (IS_32BIT_ALIGNED(q)) { + } else if (LAM_IS_32BIT_ALIGNED(q)) { if (state->partial_size) { /* do we have enough data to fill out the partial word? */ if (size >= (sizeof(uint32_t) - state->partial_size)) { @@ -121,10 +116,10 @@ void *lam_memcpy_sum32(void *restrict dst, csum += (temp - state->partial_int); size -= sizeof(uint32_t) - state->partial_size; /* - * now we have an unaligned source and an unknown - * alignment for our destination - */ - if (IS_32BIT_ALIGNED(p)) { + * now we have an unaligned source and an unknown + * alignment for our destination + */ + if (LAM_IS_32BIT_ALIGNED(p)) { size_t numLongs = size / sizeof(uint32_t); for (i = 0; i < numLongs; i++) { memcpy(&temp, q, sizeof(temp)); @@ -165,7 +160,7 @@ void *lam_memcpy_sum32(void *restrict dst, state->partial_int = 0; state->partial_size = 0; } - } else if (IS_32BIT_ALIGNED(p)) { + } else if (LAM_IS_32BIT_ALIGNED(p)) { if (state->partial_size) { /* do we have enough data to fill out the partial word? */ if (size >= (sizeof(uint32_t) - state->partial_size)) { @@ -181,10 +176,10 @@ void *lam_memcpy_sum32(void *restrict dst, csum += (temp - state->partial_int); size -= sizeof(uint32_t) - state->partial_size; /* - * now we have a source of unknown alignment and a - * unaligned destination - */ - if (IS_32BIT_ALIGNED(q)) { + * now we have a source of unknown alignment and a + * unaligned destination + */ + if (LAM_IS_32BIT_ALIGNED(q)) { for (; size >= sizeof(*q); size -= sizeof(*q)) { temp = *q++; csum += temp; @@ -241,10 +236,10 @@ void *lam_memcpy_sum32(void *restrict dst, csum += (temp - state->partial_int); size -= sizeof(uint32_t) - state->partial_size; /* - * now we have an unknown alignment for our source and - * destination - */ - if (IS_32BIT_ALIGNED(q) && IS_32BIT_ALIGNED(p)) { + * now we have an unknown alignment for our source and + * destination + */ + if (LAM_IS_32BIT_ALIGNED(q) && LAM_IS_32BIT_ALIGNED(p)) { size_t numLongs = size / sizeof(uint32_t); for (i = 0; i < numLongs; i++) { csum += *q; @@ -338,9 +333,9 @@ void *lam_memcpy_sum32(void *restrict dst, } } else { /* fast path... */ /* - * temp and state->partial_int are 0 if - * state->partial_size is 0... - */ + * temp and state->partial_int are 0 if + * state->partial_size is 0... + */ memcpy(&temp, q, size); csum += temp; memcpy(p, &temp, size); @@ -390,7 +385,7 @@ void *lam_memcpy_sum32(void *restrict dst, state->partial_size = 0; state->partial_int = 0; } - if (IS_32BIT_ALIGNED(q)) { + if (LAM_IS_32BIT_ALIGNED(q)) { for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) { csum += *q++; }