More datatype updates

This commit was SVN r710.
2004-02-10 22:07:08 +00:00 · 2004-02-10 22:07:08 +00:00 · 18c26e9381
--- a/src/mpi/datatype/datatype.c
+++ b/src/mpi/datatype/datatype.c
@ -2,7 +2,9 @@
 * $HEADER$
 */

-/** @file lam_datatype_t implementation */
+/*
+ * lam_datatype_t implementation
+ */

 #include "lam_config.h"
 #include "lam/datatype.h"
@ -15,41 +17,6 @@ lam_class_info_t lam_datatype_t_class_info = {
 };


-static int lam_datatype_init = 0;
-lam_dbl_list_t lam_p2p_cdis;
-
-
-void lam_datatype_t(lam_p2p_cdi_t * cdi)
-{
-    if (fetchNset(&lam_p2p_cdis_init, 1) == 0) {
-	lam_dbl_construct(&lam_p2p_cdis);
-    }
-    lam_dbl_item_construct(&cdi->cdi_base);
-    cdi->cdi_name = 0;
-    cdi->cdi_id = lam_dbl_get_size(&lam_p2p_cdis) + 1;
-    cdi->cdi_frag_first_size = 0;
-    cdi->cdi_frag_min_size = 0;
-    cdi->cdi_frag_max_size = 0;
-    cdi->cdi_endpoint_latency = 0;
-    cdi->cdi_endpoint_bandwidth = 0;
-    cdi->cdi_endpoint_count = 0;
-    lam_dbl_construct(&cdi->cdi_incomplete_sends);
-    lam_dbl_append(&lam_p2p_cdis, &cdi->cdi_base);
-}
-
-
-void lam_p2p_cdi_destruct(lam_p2p_cdi_t * cdi)
-{
-    lam_dbl_remove(&lam_p2p_cdis, &cdi->cdi_base);
-    lam_dbl_destruct(&cdi->cdi_incomplete_sends);
-    lam_dbl_item_destruct(&cdi->cdi_base);
-}
-
-
-/*
- * This random stuff checked in while I think about things ...
- */
-
 /**
 * type_pack -- Incrementally copy data type arrays to/from a packed buffer
 *
@ -117,11 +84,11 @@ lam_packer_status_t
 lam_packer(lam_packer_direction_t direction,
 	   void *buf,
 	   size_t bufsize,
-	   size_t * offset,
+	   size_t *offset,
 	   void *typebuf,
 	   size_t ntype,
-	   lam_datatype_t * datatype,
-	   lam_pack_state_t * pack_state, lam_checksum_t * checksum)
+	   lam_datatype_t *datatype,
+	   lam_pack_state_t *pack_state, lam_checksum_t *checksum)
 {
    return 0;
 }
@ -139,8 +106,7 @@ void
 lam_datatype_copy(void *dest,
 		  const void *src,
 		  size_t count,
-		  lam_datatype_t *datatype,
-                  lam_checksum_t *csum)
+		  lam_datatype_t *datatype, lam_checksum_t *csum)
 {
    if (datatype == NULL) {
 	memmove(dest, src, count);
--- a/src/mpi/datatype/datatype.h
+++ b/src/mpi/datatype/datatype.h
@ -4,11 +4,11 @@

 /** @file
 *
- * Data stuctures and functions related to LAM datatypes.
- */
-
-/*
- * LAM internal data type representation
+ * lam_datatype_t interface for LAM internal data type representation
+ *
+ * lam_datatype_t is a class which represents contiguous or
+ * non-contiguous datat together with constituent type-related
+ * information.  It is the LAM's-eye view of MPI_Datatype.
 */

 #ifndef LAM_DATATYPE_H_INCLUDED
@ -21,11 +21,33 @@

 #include "lam_config.h"
 #include "lam/constants.h"
+#include "lam/stdint.h"
 #include "lam/lfc/object.h"
 #include "lam/types.h"

 #include "mpi.h"

+/* macros *************************************************************/
+
+/**
+ * Test 32-bit alignment of an address
+ *
+ * @param address   An address
+ * @return          true if the address is 32-bit aligned
+ */
+#define LAM_IS_32BIT_ALIGNED(addr) \
+    (((uint32_t) addr & (uint32_t) 3) == (uint32_t) 0 ? true : false)
+
+/**
+ * Test 64-bit alignment of an address
+ *
+ * @param address   An address
+ * @return          true if the address is 32-bit aligned
+ */
+#define LAM_IS_64BIT_ALIGNED(addr) \
+    (((uint64_t) addr & (uint64_t) 7) == (uint64_t) 0 ? true : false)
+
+
 /* typedefs ***********************************************************/

 typedef struct lam_checksum_t lam_checksum_t;
@ -34,7 +56,13 @@ typedef struct lam_datavec_element_t lam_datavec_element_t;
 typedef struct lam_datavec_t lam_datavec_t;
 typedef struct lam_dataxdr_t lam_dataxdr_t;
 typedef struct lam_pack_state_t lam_pack_state_t;
+typedef struct lam_memcpy_state_t lam_memcpy_state_t;

+/* Function prototype for a generalized memcpy() */
+typedef void *(lam_memcpy_fn_t) (void *restrict dst,
+				 const void *restrict src,
+				 size_t size,
+                                 lam_memcpy_state_t *check);

 /* enums **************************************************************/

@ -50,7 +78,6 @@ enum lam_datatype_state_t {
    LAM_DATATYPE_STATE_XDR = 1 << 5,
    /* etc. */
 };
-typedef enum lam_datatype_state_t lam_datatype_state_t;


 /**
@ -78,7 +105,6 @@ enum lam_datatype_kind_t {
    LAM_DATATYPE_KIND_STRUCT_FORTRAN,
    LAM_DATATYPE_KIND_VECTOR_FORTRAN
 };
-typedef enum lam_datatype_kind_t lam_datatype_kind_t;


 /**
@ -90,15 +116,18 @@ enum lam_checksum_kind_t {
    LAM_CHECKSUM_KIND_SUM32,
    LAM_CHECKSUM_KIND_SUM64
 };
-typedef enum lam_checksum_kind_t lam_checksum_kind_t;


+typedef enum lam_datatype_state_t lam_datatype_state_t;
+typedef enum lam_datatype_kind_t lam_datatype_kind_t;
+typedef enum lam_checksum_kind_t lam_checksum_kind_t;
+
 /* structs ************************************************************/

 /**
 * State of incremental memcpy with checksum or CRC
 */
-typedef struct lam_memcpy_state_t {
+struct lam_memcpy_state_t {
    size_t size;	   /**< total size in bytes of the object
                            * being checksummed / CRCed */
    size_t partial_size;   /**< size of non- uint32_t to be carried
@ -109,7 +138,7 @@ typedef struct lam_memcpy_state_t {
                            * checksum */
    bool first_call;	   /**< is this the first call for this
                            * checksum/CRC? */
-} lam_memcpy_state_t;
+};


 /**
@ -181,24 +210,6 @@ struct lam_dataxdr_element_t {
 };


-/**
- * Function protoype for a generalized memcpy()
- *
- * Copy data from one buffer to another and optionally calculate a
- * checksum or CRC
- *
- * @param dst      pointer to the destination buffer
- * @param src      pointer to the source buffer
- * @param size     size of the buffer
- * @param check    pointer to the optional checksum or CRC
- * @return         the original value of dst
- */
-typedef void *(lam_memcpy_fn_t) (void *restrict dst,
-                                 const void *restrict src,
-                                 size_t size,
-                                 lam_memcpy_state_t *check);
-
-
 /* interface **********************************************************/

 /**
@ -230,7 +241,7 @@ int lam_datatype_copy(void *dst,
 		      size_t count,
 		      lam_datatype_t *datatype,
 		      lam_memcpy_fn_t *memcpy_fn,
-                      lam_memcpy_state_t *check);
+		      lam_memcpy_state_t *check);

 /**
 * Copy (the contents of) an array of data types, and convert to
@ -251,8 +262,9 @@ int lam_datatype_convert(void *dst,
 			 const void *src,
 			 lam_datatype_t *src_datatype,
 			 size_t src_count,
-                         lam_memcpy_fn_t *memcpy_fn,
-                         lam_memcpy_state_t *check);
+			 lam_memcpy_fn_t *memcpy_fn,
+			 lam_memcpy_state_t *check);
+

 /**
 * Pack state
@ -296,7 +308,7 @@ int lam_datatype_pack(lam_pack_state_t *state,
 		      size_t ntype,
 		      lam_datatype_t *datatype,
 		      lam_memcpy_fn_t *memcpy_fn,
-                      lam_memcpy_state_t *check);
+		      lam_memcpy_state_t *check);


 /**
@ -326,7 +338,7 @@ int lam_datatype_unpack(lam_pack_state_t *state,
 			size_t bufsize,
 			lam_datatype_t *datatype,
 			lam_memcpy_fn_t *memcpy_fn,
-                        lam_memcpy_state_t *check);
+			lam_memcpy_state_t *check);

 /**
 * Incrementally generate an iovec for gathering from an array of
@ -398,7 +410,7 @@ int lam_datatype_scatter_iovec(lam_pack_state_t *state,
 			       size_t bufsize,
 			       lam_datatype_t *datatype,
 			       lam_memcpy_fn_t *memcpy_fn,
-                               lam_memcpy_state_t *check);
+			       lam_memcpy_state_t *check);


 /*
@ -419,13 +431,6 @@ lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size)
    state->first_call = true;
 }

-/*
- * prototypes for memcpy functions
- */
-
-extern lam_memcpy_fn_t lam_memcpy_crc32;
-extern lam_memcpy_fn_t lam_memcpy_sum32;
-extern lam_memcpy_fn_t lam_memcpy_sum64;

 /**
 * Copy data from one buffer to another
@ -442,10 +447,67 @@ static inline void *lam_memcpy(void *dst, const void *src, size_t size,
    return memcpy(dst, src, size);
 }

+/**
+ * An alternative version of memcpy that may out-perform the system
+ * version on some (silly) systems.
+ *
+ * @param dst      pointer to the destination buffer
+ * @param src      pointer to the source buffer
+ * @param size     size of the buffer
+ * @param state    unused
+ * @return         the original value of dst
+ */
+void *lam_memcpy_alt(void *dst, const void *src, size_t size,
+                     lam_memcpy_state_t *state);
+
+
+/**
+ * Generate a 32-bit CRC for a buffer
+ *
+ * @param buffer      Data buffer
+ * @param size        Size of buffer
+ * @param initial_crc Initial value of the CRC register
+ * @return            The CRC
+ *
+ * Generate a 32-bit for a data buffer starting from a given CRC
+ * value.
+ */
 uint32_t lam_crc32(const void *restrict buffer, size_t size,
 		   uint32_t initial_crc);
-uint32_t lam_sum32(const void *restrict buffer, size_t size,
-		   uint32_t initial_crc);
+
+
+/**
+ * Generate a 32-bit checksum for a buffer
+ *
+ * @param buffer      Data buffer
+ * @param size        Size of buffer
+ * @return            The CRC
+ *
+ * Generate a 32-bit for a data buffer starting from a given CRC
+ * value.
+ */
+uint32_t lam_sum32(const void *restrict buffer, size_t size);
+
+
+/**
+ * Copy data from one buffer to another and calculate a 32-bit CRC
+ *
+ * @param dst      pointer to the destination buffer
+ * @param src      pointer to the source buffer
+ * @param size     size of the buffer
+ * @param state    pointer to a memcpy with checksum/CRC state structure
+ * @return         the original value of dst
+ *
+ * This handles cumulative CRCs for for arbitrary lengths and address
+ * alignments as best as it can. The initial contents of state->sum is
+ * used as the starting value of the CRC.  The final CRC is placed
+ * back in state->sum.
+ */
+void *lam_memcpy_crc32(void *restrict dst,
+                       const void *restrict src,
+                       size_t size,
+                       lam_memcpy_state_t *check);
+

 /**
 * Copy data from one buffer to another and calculate a 32-bit checksum
@ -455,7 +517,18 @@ uint32_t lam_sum32(const void *restrict buffer, size_t size,
 * @param size     size of the buffer
 * @param state    pointer to a memcpy with checksum/CRC state structure
 * @return         the original value of dst
+ *
+ * This handles cumulative checksumming for arbitrary lengths and
+ * address alignments as best as it can; the contents of
+ * lastPartialLong and lastPartialLength are updated to reflected the
+ * last partial word's value and length (in bytes) -- this should
+ * allow proper handling of checksumming contiguous or noncontiguous
+ * buffers via multiple calls of bcopy_csum() - Mitch
 */
+void *lam_memcpy_sum32(void *restrict dst,
+                       const void *restrict src,
+                       size_t size,
+                       lam_memcpy_state_t *check);


 /**
@ -467,7 +540,29 @@ uint32_t lam_sum32(const void *restrict buffer, size_t size,
 * @param state    pointer to a memcpy with checksum/CRC state structure
 * @return         the original value of dst
 */
+void *lam_memcpy_sum64(void *restrict dst,
+                       const void *restrict src,
+                       size_t size,
+                       lam_memcpy_state_t *check);

+
+/**
+ * Create a LAM/MPI datatype
+ *
+ * @param combiner   integer identifying the kind of MPI create function
+ * @param ninteger   number of integers passed to the create function
+ * @param integer    array of integers passed to the create function
+ * @param naddress   number of addresses passed to the create function
+ * @param address    array of addresses passed to the create function
+ * @param ntype      number of data types passed to the create function
+ * @param type       array of data types passed to the create function
+ * @param newtype    pointer to address of new type
+ * @return           LAM_SUCCESS on successful creation, LAM_ERROR otherwise
+ *
+ * This is the central location for creation of data types in LAM/MPI.
+ * All MPI_Type_create functions rely upon this to do the actual type
+ * creation.
+ */
 int lam_datatype_create(int combiner,
                        int nintegers,
                        int integers[],
@ -477,8 +572,19 @@ int lam_datatype_create(int combiner,
                        lam_datatype_t *types[],
                        lam_datatype_t **newtype);

+
+/**
+ * Delete a LAM/MPI datatype (actually, just mark it for deletion)
+ *
+ * @param type       datatype
+ * @return           LAM_SUCCESS on success, LAM_ERROR otherwise
+ *
+ * This is the central location for creation of data types in LAM/MPI.
+ * All MPI_Type_create functions rely upon this to do the actual type
+ * creation.
+ */
 int lam_datatype_delete(lam_datatype_t *type);

-void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy);
+

 #endif				/* LAM_DATATYPE_H_INCLUDED */
--- a/src/mpi/datatype/datatype_copy.c
+++ b/src/mpi/datatype/datatype_copy.c
@ -2,12 +2,15 @@
 * $HEADER$
 */

-/** @file dataype copy function */
+/* lam_dataype_t copy function */

 #include <stdlib.h>

 #include "datatype.h"

+/*
+ * Copy (the contents of) an array of data types
+ */
 int lam_datatype_copy(void *dst,
                      const void *src,
                      size_t count,
--- a/src/mpi/datatype/datatype_crc32.c
+++ b/src/mpi/datatype/datatype_crc32.c
@ -2,18 +2,19 @@
 * $HEADER$
 */

-/** @file 32-bit cyclic redundancy check support */
+/** @file
+ *
+ * 32-bit cyclic redundancy check support
+ */

 #include <stdlib.h>

 #include "lam_config.h"
-#include "lam/stdint.h"
 #include "datatype.h"

 #define CRC_POLYNOMIAL       ((uint32_t) 0x04c11db7)
 #define CRC_INITIAL_REGISTER ((uint32_t) 0xffffffff)
-#define IS_32BIT_ALIGNED(X) \
-    (((uint32_t)(X) & (uint32_t) 3) == (uint32_t) 0 ? 1 : 0)
+

 /*
 * Look-up table for CRC32 generation
@ -21,6 +22,7 @@
 static bool crc_table_initialized = false;
 static uint32_t crc_table[256];

+
 /**
 * CRC32 table generation
 *
@ -48,18 +50,11 @@ static void initialize_crc_table(void)
 }


-/**
+/*
 * Generate a 32-bit CRC for a buffer
- *
- * @param buffer      Data buffer
- * @param size        Size of buffer
- * @param initial_crc Initial value of the CRC register
- * @return            The CRC
- *
- * Generate a 32-bit for a data buffer starting from a given CRC
- * value.
 */
-uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc)
+uint32_t lam_crc32(const void *restrict buffer, size_t size,
+                   uint32_t initial_crc)
 {
    register int i, j;
    register unsigned char *t;
@ -70,7 +65,7 @@ uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_cr
        initialize_crc_table();
    }

-    if (IS_32BIT_ALIGNED(buffer)) {
+    if (LAM_IS_32BIT_ALIGNED(buffer)) {
        register uint32_t *restrict src = (uint32_t *) buffer;
        while (size >= sizeof(uint32_t)) {
            tmp = *src++;
@ -98,19 +93,8 @@ uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_cr
 }


-/**
+/*
 * Copy data from one buffer to another and calculate a 32-bit CRC
- *
- * @param dst      pointer to the destination buffer
- * @param src      pointer to the source buffer
- * @param size     size of the buffer
- * @param state    pointer to a memcpy with checksum/CRC state structure
- * @return         the original value of dst
- *
- * This handles cumulative CRCs for for arbitrary lengths and address
- * alignments as best as it can. The initial contents of state->sum is
- * used as the starting value of the CRC.  The final CRC is placed
- * back in state->sum.
 */
 void *lam_memcpy_crc32(void *restrict dst,
                       const void *restrict src,
@ -132,7 +116,7 @@ void *lam_memcpy_crc32(void *restrict dst,
        state->sum = CRC_INITIAL_REGISTER;
    }

-    if (IS_32BIT_ALIGNED(src) && IS_32BIT_ALIGNED(dst)) {
+    if (LAM_IS_32BIT_ALIGNED(src) && LAM_IS_32BIT_ALIGNED(dst)) {
        register uint32_t *restrict p = (uint32_t *) dst;
        register uint32_t *restrict q = (uint32_t *) src;
        register unsigned char *ts, *td;
--- a/src/mpi/datatype/datatype_create.c
+++ b/src/mpi/datatype/datatype_create.c
@ -60,7 +60,7 @@ int lam_datatype_create(int combiner,
    }

    if (count == 0) {
-        newtype = (lam_datatype_t *) LAM_MALLOC(sizeof(lam_datatype_t));
+        newtype = (lam_datatype_t *) malloc(sizeof(lam_datatype_t));
        if (newtype == NULL) {
            ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
            rc = MPI_ERR_TYPE;
@ -84,7 +84,7 @@ int lam_datatype_create(int combiner,
        newtype->envelope.nints = 1;
        newtype->envelope.naddrs = 0;
        newtype->envelope.ndatatypes = 0;
-        newtype->envelope.iarray = (int *) LAM_MALLOC(sizeof(int));
+        newtype->envelope.iarray = (int *) malloc(sizeof(int));
        newtype->envelope.aarray = NULL;
        newtype->envelope.darray = NULL;
        newtype->envelope.iarray[0] = count;
@ -97,7 +97,7 @@ int lam_datatype_create(int combiner,
    }

    /* Allocate new type */
-    newtype = LAM_MALLOC(sizeof(lam_datatype_t));
+    newtype = malloc(sizeof(lam_datatype_t));
    if (newtype == NULL) {
        ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
        rc = MPI_ERR_TYPE;
@ -118,12 +118,12 @@ int lam_datatype_create(int combiner,
    newtype->envelope.naddrs = count;
    newtype->envelope.ndatatypes = count;
    newtype->envelope.iarray =
-        (int *) LAM_MALLOC(newtype->envelope.nints * sizeof(int));
+        (int *) malloc(newtype->envelope.nints * sizeof(int));
    newtype->envelope.aarray =
-        (MPI_Aint *) LAM_MALLOC(newtype->envelope.naddrs *
+        (MPI_Aint *) malloc(newtype->envelope.naddrs *
                                sizeof(MPI_Aint));
    newtype->envelope.darray =
-        (MPI_Datatype *) LAM_MALLOC(newtype->envelope.ndatatypes *
+        (MPI_Datatype *) malloc(newtype->envelope.ndatatypes *
                                    sizeof(MPI_Datatype));
    newtype->envelope.iarray[0] = count;
    for (i = 0; i < count; i++) {
@ -253,7 +253,7 @@ int lam_datatype_create(int combiner,
    if (newtype->num_pairs > 0) {
        /* allocate the type_map */
        newtype->type_map = (ULMTypeMapElt_t *)
-            LAM_MALLOC(newtype->num_pairs * sizeof(ULMTypeMapElt_t));
+            malloc(newtype->num_pairs * sizeof(ULMTypeMapElt_t));
        if (newtype->type_map == NULL) {
            ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
            rc = MPI_ERR_TYPE;
--- a/src/mpi/datatype/datatype_delete.c
+++ b/src/mpi/datatype/datatype_delete.c
@ -2,19 +2,12 @@
 * $HEADER$
 */

-/** @file datatype deletion function */
+/* lam_datatype_t deletion function */

 #include "datatype.h"

-/**
+/*
 * Delete a LAM/MPI datatype (actually, just mark it for deletion)
- *
- * @param type       datatype
- * @return           LAM_SUCCESS on success, LAM_ERROR otherwise
- *
- * This is the central location for creation of data types in LAM/MPI.
- * All MPI_Type_create functions rely upon this to do the actual type
- * creation.
 */
 int lam_datatype_delete(lam_datatype_t *type)
 {
--- a/src/mpi/datatype/datatype_memcpy.c
+++ b/src/mpi/datatype/datatype_memcpy.c
@ -2,30 +2,22 @@
 * $HEADER$
 */

-/** @file alternative memcpy function */
+/* alternative memcpy function */

 #include <stdlib.h>
 #include <string.h>

 #include "lam_config.h"
-#include "lam/stdint.h"
-
 #include "datatype.h"

 #define ALIGNED32(X) (((uint32_t)(X) & (uint32_t) 3) == (uint32_t) 0 ? 1 : 0)

-/**
- * Alternative memcpy function
- *
- * @param dst   destination buffer
- * @param src   source buffer
- * @param size  size of buffer
- * @param dummy unused variable
- * @return      the original value of dst
- *
- * On some systems, this performs better than the system memcpy.
+/*
+ * Alternative memcpy function: On some systems, this performs better
+ * than the system memcpy.
 */
-void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy)
+void *lam_memcpy_alt(void *dst, const void *src, size_t size,
+                     lam_memcpy_state_t *dummy)
 {
    if (ALIGNED32(src) && ALIGNED32(dst)) {
        uint32_t *restrict p = (uint32_t *) dst;
--- a/src/mpi/datatype/datatype_sum32.c
+++ b/src/mpi/datatype/datatype_sum32.c
@ -2,37 +2,32 @@
 * $HEADER$
 */

-/** @file 32-bit checksum support */
+/** @file
+ *
+ * 32-bit checksum support
+ */

 #include <stdlib.h>

 #include "lam_config.h"
-#include "lam/stdint.h"
 #include "datatype.h"

-#define IS_32BIT_ALIGNED(X) \
-    (((uint32_t)(X) & (uint32_t) 3) == ((uint32_t) 0 ? 1 : 0))
+
+/*
+ * Generate a 32-bit checksum for a buffer
+ */
+uint32_t lam_sum32(const void *restrict buffer, size_t size)
+{
+    return 0;
+}


-/**
+/*
 * Copy data from one buffer to another and calculate a 32-bit checksum
- *
- * @param dst      pointer to the destination buffer
- * @param src      pointer to the source buffer
- * @param size     size of the buffer
- * @param state    pointer to a memcpy with checksum/CRC state structure
- * @return         the original value of dst
- *
- * This handles cumulative checksumming for arbitrary lengths and
- * address alignments as best as it can; the contents of
- * lastPartialLong and lastPartialLength are updated to reflected the
- * last partial word's value and length (in bytes) -- this should
- * allow proper handling of checksumming contiguous or noncontiguous
- * buffers via multiple calls of bcopy_csum() - Mitch
 */
 void *lam_memcpy_sum32(void *restrict dst,
 		       const void *restrict src,
-		       size_t size, lam_memcpy_state_t * state)
+		       size_t size, lam_memcpy_state_t *state)
 {
    uint32_t *restrict p = (uint32_t *) dst;
    uint32_t *restrict q = (uint32_t *) src;
@ -51,7 +46,7 @@ void *lam_memcpy_sum32(void *restrict dst,
    csumlenresidue = (csumlen > size) ? (csumlen - size) : 0;
    temp = state->partial_int;

-    if (IS_32BIT_ALIGNED(p) && IS_32BIT_ALIGNED(q)) {
+    if (LAM_IS_32BIT_ALIGNED(p) && LAM_IS_32BIT_ALIGNED(q)) {
 	if (state->partial_size) {
 	    /* do we have enough data to fill out the partial word? */
 	    if (size >= (sizeof(uint32_t) - state->partial_size)) {
@ -67,9 +62,9 @@ void *lam_memcpy_sum32(void *restrict dst,
 		csum += (temp - state->partial_int);
 		size -= sizeof(uint32_t) - state->partial_size;
 		/*
-                 * now we have an unaligned source and an unaligned
-                 * destination
-                 */
+		 * now we have an unaligned source and an unaligned
+		 * destination
+		 */
 		for (; size >= sizeof(*q); size -= sizeof(*q)) {
 		    memcpy(&temp, q, sizeof(temp));
 		    q++;
@ -98,14 +93,14 @@ void *lam_memcpy_sum32(void *restrict dst,
 	    }
 	    state->partial_int = 0;
 	    state->partial_size = 0;
-	    if (IS_32BIT_ALIGNED(size) && (csumlenresidue == 0)) {
+	    if (LAM_IS_32BIT_ALIGNED(size) && (csumlenresidue == 0)) {
 		state->sum = csum;
 		return dst;
 	    } else {
 		size -= i * sizeof(uint32_t);
 	    }
 	}
-    } else if (IS_32BIT_ALIGNED(q)) {
+    } else if (LAM_IS_32BIT_ALIGNED(q)) {
 	if (state->partial_size) {
 	    /* do we have enough data to fill out the partial word? */
 	    if (size >= (sizeof(uint32_t) - state->partial_size)) {
@ -121,10 +116,10 @@ void *lam_memcpy_sum32(void *restrict dst,
 		csum += (temp - state->partial_int);
 		size -= sizeof(uint32_t) - state->partial_size;
 		/*
-                 * now we have an unaligned source and an unknown
-                 * alignment for our destination
-                 */
-		if (IS_32BIT_ALIGNED(p)) {
+		 * now we have an unaligned source and an unknown
+		 * alignment for our destination
+		 */
+		if (LAM_IS_32BIT_ALIGNED(p)) {
 		    size_t numLongs = size / sizeof(uint32_t);
 		    for (i = 0; i < numLongs; i++) {
 			memcpy(&temp, q, sizeof(temp));
@ -165,7 +160,7 @@ void *lam_memcpy_sum32(void *restrict dst,
 	    state->partial_int = 0;
 	    state->partial_size = 0;
 	}
-    } else if (IS_32BIT_ALIGNED(p)) {
+    } else if (LAM_IS_32BIT_ALIGNED(p)) {
 	if (state->partial_size) {
 	    /* do we have enough data to fill out the partial word? */
 	    if (size >= (sizeof(uint32_t) - state->partial_size)) {
@ -181,10 +176,10 @@ void *lam_memcpy_sum32(void *restrict dst,
 		csum += (temp - state->partial_int);
 		size -= sizeof(uint32_t) - state->partial_size;
 		/*
-                 * now we have a source of unknown alignment and a
-                 * unaligned destination
-                 */
-		if (IS_32BIT_ALIGNED(q)) {
+		 * now we have a source of unknown alignment and a
+		 * unaligned destination
+		 */
+		if (LAM_IS_32BIT_ALIGNED(q)) {
 		    for (; size >= sizeof(*q); size -= sizeof(*q)) {
 			temp = *q++;
 			csum += temp;
@ -241,10 +236,10 @@ void *lam_memcpy_sum32(void *restrict dst,
 		csum += (temp - state->partial_int);
 		size -= sizeof(uint32_t) - state->partial_size;
 		/*
-                 * now we have an unknown alignment for our source and
-                 * destination
-                 */
-		if (IS_32BIT_ALIGNED(q) && IS_32BIT_ALIGNED(p)) {
+		 * now we have an unknown alignment for our source and
+		 * destination
+		 */
+		if (LAM_IS_32BIT_ALIGNED(q) && LAM_IS_32BIT_ALIGNED(p)) {
 		    size_t numLongs = size / sizeof(uint32_t);
 		    for (i = 0; i < numLongs; i++) {
 			csum += *q;
@ -338,9 +333,9 @@ void *lam_memcpy_sum32(void *restrict dst,
 	    }
 	} else {		/* fast path... */
 	    /*
-             * temp and state->partial_int are 0 if
-             * state->partial_size is 0...
-             */
+	     * temp and state->partial_int are 0 if
+	     * state->partial_size is 0...
+	     */
 	    memcpy(&temp, q, size);
 	    csum += temp;
 	    memcpy(p, &temp, size);
@ -390,7 +385,7 @@ void *lam_memcpy_sum32(void *restrict dst,
 	    state->partial_size = 0;
 	    state->partial_int = 0;
 	}
-	if (IS_32BIT_ALIGNED(q)) {
+	if (LAM_IS_32BIT_ALIGNED(q)) {
 	    for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
 		csum += *q++;
 	    }