More datatype updates
This commit was SVN r710.
Этот коммит содержится в:
родитель
969ad311a7
Коммит
18c26e9381
@ -2,7 +2,9 @@
|
|||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** @file lam_datatype_t implementation */
|
/*
|
||||||
|
* lam_datatype_t implementation
|
||||||
|
*/
|
||||||
|
|
||||||
#include "lam_config.h"
|
#include "lam_config.h"
|
||||||
#include "lam/datatype.h"
|
#include "lam/datatype.h"
|
||||||
@ -15,41 +17,6 @@ lam_class_info_t lam_datatype_t_class_info = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static int lam_datatype_init = 0;
|
|
||||||
lam_dbl_list_t lam_p2p_cdis;
|
|
||||||
|
|
||||||
|
|
||||||
void lam_datatype_t(lam_p2p_cdi_t * cdi)
|
|
||||||
{
|
|
||||||
if (fetchNset(&lam_p2p_cdis_init, 1) == 0) {
|
|
||||||
lam_dbl_construct(&lam_p2p_cdis);
|
|
||||||
}
|
|
||||||
lam_dbl_item_construct(&cdi->cdi_base);
|
|
||||||
cdi->cdi_name = 0;
|
|
||||||
cdi->cdi_id = lam_dbl_get_size(&lam_p2p_cdis) + 1;
|
|
||||||
cdi->cdi_frag_first_size = 0;
|
|
||||||
cdi->cdi_frag_min_size = 0;
|
|
||||||
cdi->cdi_frag_max_size = 0;
|
|
||||||
cdi->cdi_endpoint_latency = 0;
|
|
||||||
cdi->cdi_endpoint_bandwidth = 0;
|
|
||||||
cdi->cdi_endpoint_count = 0;
|
|
||||||
lam_dbl_construct(&cdi->cdi_incomplete_sends);
|
|
||||||
lam_dbl_append(&lam_p2p_cdis, &cdi->cdi_base);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void lam_p2p_cdi_destruct(lam_p2p_cdi_t * cdi)
|
|
||||||
{
|
|
||||||
lam_dbl_remove(&lam_p2p_cdis, &cdi->cdi_base);
|
|
||||||
lam_dbl_destruct(&cdi->cdi_incomplete_sends);
|
|
||||||
lam_dbl_item_destruct(&cdi->cdi_base);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This random stuff checked in while I think about things ...
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* type_pack -- Incrementally copy data type arrays to/from a packed buffer
|
* type_pack -- Incrementally copy data type arrays to/from a packed buffer
|
||||||
*
|
*
|
||||||
@ -139,8 +106,7 @@ void
|
|||||||
lam_datatype_copy(void *dest,
|
lam_datatype_copy(void *dest,
|
||||||
const void *src,
|
const void *src,
|
||||||
size_t count,
|
size_t count,
|
||||||
lam_datatype_t *datatype,
|
lam_datatype_t *datatype, lam_checksum_t *csum)
|
||||||
lam_checksum_t *csum)
|
|
||||||
{
|
{
|
||||||
if (datatype == NULL) {
|
if (datatype == NULL) {
|
||||||
memmove(dest, src, count);
|
memmove(dest, src, count);
|
||||||
|
@ -4,11 +4,11 @@
|
|||||||
|
|
||||||
/** @file
|
/** @file
|
||||||
*
|
*
|
||||||
* Data stuctures and functions related to LAM datatypes.
|
* lam_datatype_t interface for LAM internal data type representation
|
||||||
*/
|
*
|
||||||
|
* lam_datatype_t is a class which represents contiguous or
|
||||||
/*
|
* non-contiguous datat together with constituent type-related
|
||||||
* LAM internal data type representation
|
* information. It is the LAM's-eye view of MPI_Datatype.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef LAM_DATATYPE_H_INCLUDED
|
#ifndef LAM_DATATYPE_H_INCLUDED
|
||||||
@ -21,11 +21,33 @@
|
|||||||
|
|
||||||
#include "lam_config.h"
|
#include "lam_config.h"
|
||||||
#include "lam/constants.h"
|
#include "lam/constants.h"
|
||||||
|
#include "lam/stdint.h"
|
||||||
#include "lam/lfc/object.h"
|
#include "lam/lfc/object.h"
|
||||||
#include "lam/types.h"
|
#include "lam/types.h"
|
||||||
|
|
||||||
#include "mpi.h"
|
#include "mpi.h"
|
||||||
|
|
||||||
|
/* macros *************************************************************/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test 32-bit alignment of an address
|
||||||
|
*
|
||||||
|
* @param address An address
|
||||||
|
* @return true if the address is 32-bit aligned
|
||||||
|
*/
|
||||||
|
#define LAM_IS_32BIT_ALIGNED(addr) \
|
||||||
|
(((uint32_t) addr & (uint32_t) 3) == (uint32_t) 0 ? true : false)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test 64-bit alignment of an address
|
||||||
|
*
|
||||||
|
* @param address An address
|
||||||
|
* @return true if the address is 32-bit aligned
|
||||||
|
*/
|
||||||
|
#define LAM_IS_64BIT_ALIGNED(addr) \
|
||||||
|
(((uint64_t) addr & (uint64_t) 7) == (uint64_t) 0 ? true : false)
|
||||||
|
|
||||||
|
|
||||||
/* typedefs ***********************************************************/
|
/* typedefs ***********************************************************/
|
||||||
|
|
||||||
typedef struct lam_checksum_t lam_checksum_t;
|
typedef struct lam_checksum_t lam_checksum_t;
|
||||||
@ -34,7 +56,13 @@ typedef struct lam_datavec_element_t lam_datavec_element_t;
|
|||||||
typedef struct lam_datavec_t lam_datavec_t;
|
typedef struct lam_datavec_t lam_datavec_t;
|
||||||
typedef struct lam_dataxdr_t lam_dataxdr_t;
|
typedef struct lam_dataxdr_t lam_dataxdr_t;
|
||||||
typedef struct lam_pack_state_t lam_pack_state_t;
|
typedef struct lam_pack_state_t lam_pack_state_t;
|
||||||
|
typedef struct lam_memcpy_state_t lam_memcpy_state_t;
|
||||||
|
|
||||||
|
/* Function prototype for a generalized memcpy() */
|
||||||
|
typedef void *(lam_memcpy_fn_t) (void *restrict dst,
|
||||||
|
const void *restrict src,
|
||||||
|
size_t size,
|
||||||
|
lam_memcpy_state_t *check);
|
||||||
|
|
||||||
/* enums **************************************************************/
|
/* enums **************************************************************/
|
||||||
|
|
||||||
@ -50,7 +78,6 @@ enum lam_datatype_state_t {
|
|||||||
LAM_DATATYPE_STATE_XDR = 1 << 5,
|
LAM_DATATYPE_STATE_XDR = 1 << 5,
|
||||||
/* etc. */
|
/* etc. */
|
||||||
};
|
};
|
||||||
typedef enum lam_datatype_state_t lam_datatype_state_t;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -78,7 +105,6 @@ enum lam_datatype_kind_t {
|
|||||||
LAM_DATATYPE_KIND_STRUCT_FORTRAN,
|
LAM_DATATYPE_KIND_STRUCT_FORTRAN,
|
||||||
LAM_DATATYPE_KIND_VECTOR_FORTRAN
|
LAM_DATATYPE_KIND_VECTOR_FORTRAN
|
||||||
};
|
};
|
||||||
typedef enum lam_datatype_kind_t lam_datatype_kind_t;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -90,15 +116,18 @@ enum lam_checksum_kind_t {
|
|||||||
LAM_CHECKSUM_KIND_SUM32,
|
LAM_CHECKSUM_KIND_SUM32,
|
||||||
LAM_CHECKSUM_KIND_SUM64
|
LAM_CHECKSUM_KIND_SUM64
|
||||||
};
|
};
|
||||||
typedef enum lam_checksum_kind_t lam_checksum_kind_t;
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef enum lam_datatype_state_t lam_datatype_state_t;
|
||||||
|
typedef enum lam_datatype_kind_t lam_datatype_kind_t;
|
||||||
|
typedef enum lam_checksum_kind_t lam_checksum_kind_t;
|
||||||
|
|
||||||
/* structs ************************************************************/
|
/* structs ************************************************************/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* State of incremental memcpy with checksum or CRC
|
* State of incremental memcpy with checksum or CRC
|
||||||
*/
|
*/
|
||||||
typedef struct lam_memcpy_state_t {
|
struct lam_memcpy_state_t {
|
||||||
size_t size; /**< total size in bytes of the object
|
size_t size; /**< total size in bytes of the object
|
||||||
* being checksummed / CRCed */
|
* being checksummed / CRCed */
|
||||||
size_t partial_size; /**< size of non- uint32_t to be carried
|
size_t partial_size; /**< size of non- uint32_t to be carried
|
||||||
@ -109,7 +138,7 @@ typedef struct lam_memcpy_state_t {
|
|||||||
* checksum */
|
* checksum */
|
||||||
bool first_call; /**< is this the first call for this
|
bool first_call; /**< is this the first call for this
|
||||||
* checksum/CRC? */
|
* checksum/CRC? */
|
||||||
} lam_memcpy_state_t;
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -181,24 +210,6 @@ struct lam_dataxdr_element_t {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Function protoype for a generalized memcpy()
|
|
||||||
*
|
|
||||||
* Copy data from one buffer to another and optionally calculate a
|
|
||||||
* checksum or CRC
|
|
||||||
*
|
|
||||||
* @param dst pointer to the destination buffer
|
|
||||||
* @param src pointer to the source buffer
|
|
||||||
* @param size size of the buffer
|
|
||||||
* @param check pointer to the optional checksum or CRC
|
|
||||||
* @return the original value of dst
|
|
||||||
*/
|
|
||||||
typedef void *(lam_memcpy_fn_t) (void *restrict dst,
|
|
||||||
const void *restrict src,
|
|
||||||
size_t size,
|
|
||||||
lam_memcpy_state_t *check);
|
|
||||||
|
|
||||||
|
|
||||||
/* interface **********************************************************/
|
/* interface **********************************************************/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -254,6 +265,7 @@ int lam_datatype_convert(void *dst,
|
|||||||
lam_memcpy_fn_t *memcpy_fn,
|
lam_memcpy_fn_t *memcpy_fn,
|
||||||
lam_memcpy_state_t *check);
|
lam_memcpy_state_t *check);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pack state
|
* Pack state
|
||||||
*
|
*
|
||||||
@ -419,13 +431,6 @@ lam_memcpy_init(lam_memcpy_state_t *state, size_t sum_size)
|
|||||||
state->first_call = true;
|
state->first_call = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* prototypes for memcpy functions
|
|
||||||
*/
|
|
||||||
|
|
||||||
extern lam_memcpy_fn_t lam_memcpy_crc32;
|
|
||||||
extern lam_memcpy_fn_t lam_memcpy_sum32;
|
|
||||||
extern lam_memcpy_fn_t lam_memcpy_sum64;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy data from one buffer to another
|
* Copy data from one buffer to another
|
||||||
@ -442,10 +447,67 @@ static inline void *lam_memcpy(void *dst, const void *src, size_t size,
|
|||||||
return memcpy(dst, src, size);
|
return memcpy(dst, src, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An alternative version of memcpy that may out-perform the system
|
||||||
|
* version on some (silly) systems.
|
||||||
|
*
|
||||||
|
* @param dst pointer to the destination buffer
|
||||||
|
* @param src pointer to the source buffer
|
||||||
|
* @param size size of the buffer
|
||||||
|
* @param state unused
|
||||||
|
* @return the original value of dst
|
||||||
|
*/
|
||||||
|
void *lam_memcpy_alt(void *dst, const void *src, size_t size,
|
||||||
|
lam_memcpy_state_t *state);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a 32-bit CRC for a buffer
|
||||||
|
*
|
||||||
|
* @param buffer Data buffer
|
||||||
|
* @param size Size of buffer
|
||||||
|
* @param initial_crc Initial value of the CRC register
|
||||||
|
* @return The CRC
|
||||||
|
*
|
||||||
|
* Generate a 32-bit for a data buffer starting from a given CRC
|
||||||
|
* value.
|
||||||
|
*/
|
||||||
uint32_t lam_crc32(const void *restrict buffer, size_t size,
|
uint32_t lam_crc32(const void *restrict buffer, size_t size,
|
||||||
uint32_t initial_crc);
|
uint32_t initial_crc);
|
||||||
uint32_t lam_sum32(const void *restrict buffer, size_t size,
|
|
||||||
uint32_t initial_crc);
|
|
||||||
|
/**
|
||||||
|
* Generate a 32-bit checksum for a buffer
|
||||||
|
*
|
||||||
|
* @param buffer Data buffer
|
||||||
|
* @param size Size of buffer
|
||||||
|
* @return The CRC
|
||||||
|
*
|
||||||
|
* Generate a 32-bit for a data buffer starting from a given CRC
|
||||||
|
* value.
|
||||||
|
*/
|
||||||
|
uint32_t lam_sum32(const void *restrict buffer, size_t size);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy data from one buffer to another and calculate a 32-bit CRC
|
||||||
|
*
|
||||||
|
* @param dst pointer to the destination buffer
|
||||||
|
* @param src pointer to the source buffer
|
||||||
|
* @param size size of the buffer
|
||||||
|
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||||
|
* @return the original value of dst
|
||||||
|
*
|
||||||
|
* This handles cumulative CRCs for for arbitrary lengths and address
|
||||||
|
* alignments as best as it can. The initial contents of state->sum is
|
||||||
|
* used as the starting value of the CRC. The final CRC is placed
|
||||||
|
* back in state->sum.
|
||||||
|
*/
|
||||||
|
void *lam_memcpy_crc32(void *restrict dst,
|
||||||
|
const void *restrict src,
|
||||||
|
size_t size,
|
||||||
|
lam_memcpy_state_t *check);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy data from one buffer to another and calculate a 32-bit checksum
|
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||||
@ -455,7 +517,18 @@ uint32_t lam_sum32(const void *restrict buffer, size_t size,
|
|||||||
* @param size size of the buffer
|
* @param size size of the buffer
|
||||||
* @param state pointer to a memcpy with checksum/CRC state structure
|
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||||
* @return the original value of dst
|
* @return the original value of dst
|
||||||
|
*
|
||||||
|
* This handles cumulative checksumming for arbitrary lengths and
|
||||||
|
* address alignments as best as it can; the contents of
|
||||||
|
* lastPartialLong and lastPartialLength are updated to reflected the
|
||||||
|
* last partial word's value and length (in bytes) -- this should
|
||||||
|
* allow proper handling of checksumming contiguous or noncontiguous
|
||||||
|
* buffers via multiple calls of bcopy_csum() - Mitch
|
||||||
*/
|
*/
|
||||||
|
void *lam_memcpy_sum32(void *restrict dst,
|
||||||
|
const void *restrict src,
|
||||||
|
size_t size,
|
||||||
|
lam_memcpy_state_t *check);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -467,7 +540,29 @@ uint32_t lam_sum32(const void *restrict buffer, size_t size,
|
|||||||
* @param state pointer to a memcpy with checksum/CRC state structure
|
* @param state pointer to a memcpy with checksum/CRC state structure
|
||||||
* @return the original value of dst
|
* @return the original value of dst
|
||||||
*/
|
*/
|
||||||
|
void *lam_memcpy_sum64(void *restrict dst,
|
||||||
|
const void *restrict src,
|
||||||
|
size_t size,
|
||||||
|
lam_memcpy_state_t *check);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a LAM/MPI datatype
|
||||||
|
*
|
||||||
|
* @param combiner integer identifying the kind of MPI create function
|
||||||
|
* @param ninteger number of integers passed to the create function
|
||||||
|
* @param integer array of integers passed to the create function
|
||||||
|
* @param naddress number of addresses passed to the create function
|
||||||
|
* @param address array of addresses passed to the create function
|
||||||
|
* @param ntype number of data types passed to the create function
|
||||||
|
* @param type array of data types passed to the create function
|
||||||
|
* @param newtype pointer to address of new type
|
||||||
|
* @return LAM_SUCCESS on successful creation, LAM_ERROR otherwise
|
||||||
|
*
|
||||||
|
* This is the central location for creation of data types in LAM/MPI.
|
||||||
|
* All MPI_Type_create functions rely upon this to do the actual type
|
||||||
|
* creation.
|
||||||
|
*/
|
||||||
int lam_datatype_create(int combiner,
|
int lam_datatype_create(int combiner,
|
||||||
int nintegers,
|
int nintegers,
|
||||||
int integers[],
|
int integers[],
|
||||||
@ -477,8 +572,19 @@ int lam_datatype_create(int combiner,
|
|||||||
lam_datatype_t *types[],
|
lam_datatype_t *types[],
|
||||||
lam_datatype_t **newtype);
|
lam_datatype_t **newtype);
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete a LAM/MPI datatype (actually, just mark it for deletion)
|
||||||
|
*
|
||||||
|
* @param type datatype
|
||||||
|
* @return LAM_SUCCESS on success, LAM_ERROR otherwise
|
||||||
|
*
|
||||||
|
* This is the central location for creation of data types in LAM/MPI.
|
||||||
|
* All MPI_Type_create functions rely upon this to do the actual type
|
||||||
|
* creation.
|
||||||
|
*/
|
||||||
int lam_datatype_delete(lam_datatype_t *type);
|
int lam_datatype_delete(lam_datatype_t *type);
|
||||||
|
|
||||||
void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy);
|
|
||||||
|
|
||||||
#endif /* LAM_DATATYPE_H_INCLUDED */
|
#endif /* LAM_DATATYPE_H_INCLUDED */
|
||||||
|
@ -2,12 +2,15 @@
|
|||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** @file dataype copy function */
|
/* lam_dataype_t copy function */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copy (the contents of) an array of data types
|
||||||
|
*/
|
||||||
int lam_datatype_copy(void *dst,
|
int lam_datatype_copy(void *dst,
|
||||||
const void *src,
|
const void *src,
|
||||||
size_t count,
|
size_t count,
|
||||||
|
@ -2,18 +2,19 @@
|
|||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** @file 32-bit cyclic redundancy check support */
|
/** @file
|
||||||
|
*
|
||||||
|
* 32-bit cyclic redundancy check support
|
||||||
|
*/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "lam_config.h"
|
#include "lam_config.h"
|
||||||
#include "lam/stdint.h"
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
#define CRC_POLYNOMIAL ((uint32_t) 0x04c11db7)
|
#define CRC_POLYNOMIAL ((uint32_t) 0x04c11db7)
|
||||||
#define CRC_INITIAL_REGISTER ((uint32_t) 0xffffffff)
|
#define CRC_INITIAL_REGISTER ((uint32_t) 0xffffffff)
|
||||||
#define IS_32BIT_ALIGNED(X) \
|
|
||||||
(((uint32_t)(X) & (uint32_t) 3) == (uint32_t) 0 ? 1 : 0)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Look-up table for CRC32 generation
|
* Look-up table for CRC32 generation
|
||||||
@ -21,6 +22,7 @@
|
|||||||
static bool crc_table_initialized = false;
|
static bool crc_table_initialized = false;
|
||||||
static uint32_t crc_table[256];
|
static uint32_t crc_table[256];
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CRC32 table generation
|
* CRC32 table generation
|
||||||
*
|
*
|
||||||
@ -48,18 +50,11 @@ static void initialize_crc_table(void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* Generate a 32-bit CRC for a buffer
|
* Generate a 32-bit CRC for a buffer
|
||||||
*
|
|
||||||
* @param buffer Data buffer
|
|
||||||
* @param size Size of buffer
|
|
||||||
* @param initial_crc Initial value of the CRC register
|
|
||||||
* @return The CRC
|
|
||||||
*
|
|
||||||
* Generate a 32-bit for a data buffer starting from a given CRC
|
|
||||||
* value.
|
|
||||||
*/
|
*/
|
||||||
uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_crc)
|
uint32_t lam_crc32(const void *restrict buffer, size_t size,
|
||||||
|
uint32_t initial_crc)
|
||||||
{
|
{
|
||||||
register int i, j;
|
register int i, j;
|
||||||
register unsigned char *t;
|
register unsigned char *t;
|
||||||
@ -70,7 +65,7 @@ uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_cr
|
|||||||
initialize_crc_table();
|
initialize_crc_table();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_32BIT_ALIGNED(buffer)) {
|
if (LAM_IS_32BIT_ALIGNED(buffer)) {
|
||||||
register uint32_t *restrict src = (uint32_t *) buffer;
|
register uint32_t *restrict src = (uint32_t *) buffer;
|
||||||
while (size >= sizeof(uint32_t)) {
|
while (size >= sizeof(uint32_t)) {
|
||||||
tmp = *src++;
|
tmp = *src++;
|
||||||
@ -98,19 +93,8 @@ uint32_t lam_crc32(const void *restrict buffer, size_t size, uint32_t initial_cr
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* Copy data from one buffer to another and calculate a 32-bit CRC
|
* Copy data from one buffer to another and calculate a 32-bit CRC
|
||||||
*
|
|
||||||
* @param dst pointer to the destination buffer
|
|
||||||
* @param src pointer to the source buffer
|
|
||||||
* @param size size of the buffer
|
|
||||||
* @param state pointer to a memcpy with checksum/CRC state structure
|
|
||||||
* @return the original value of dst
|
|
||||||
*
|
|
||||||
* This handles cumulative CRCs for for arbitrary lengths and address
|
|
||||||
* alignments as best as it can. The initial contents of state->sum is
|
|
||||||
* used as the starting value of the CRC. The final CRC is placed
|
|
||||||
* back in state->sum.
|
|
||||||
*/
|
*/
|
||||||
void *lam_memcpy_crc32(void *restrict dst,
|
void *lam_memcpy_crc32(void *restrict dst,
|
||||||
const void *restrict src,
|
const void *restrict src,
|
||||||
@ -132,7 +116,7 @@ void *lam_memcpy_crc32(void *restrict dst,
|
|||||||
state->sum = CRC_INITIAL_REGISTER;
|
state->sum = CRC_INITIAL_REGISTER;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IS_32BIT_ALIGNED(src) && IS_32BIT_ALIGNED(dst)) {
|
if (LAM_IS_32BIT_ALIGNED(src) && LAM_IS_32BIT_ALIGNED(dst)) {
|
||||||
register uint32_t *restrict p = (uint32_t *) dst;
|
register uint32_t *restrict p = (uint32_t *) dst;
|
||||||
register uint32_t *restrict q = (uint32_t *) src;
|
register uint32_t *restrict q = (uint32_t *) src;
|
||||||
register unsigned char *ts, *td;
|
register unsigned char *ts, *td;
|
||||||
|
@ -60,7 +60,7 @@ int lam_datatype_create(int combiner,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (count == 0) {
|
if (count == 0) {
|
||||||
newtype = (lam_datatype_t *) LAM_MALLOC(sizeof(lam_datatype_t));
|
newtype = (lam_datatype_t *) malloc(sizeof(lam_datatype_t));
|
||||||
if (newtype == NULL) {
|
if (newtype == NULL) {
|
||||||
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
||||||
rc = MPI_ERR_TYPE;
|
rc = MPI_ERR_TYPE;
|
||||||
@ -84,7 +84,7 @@ int lam_datatype_create(int combiner,
|
|||||||
newtype->envelope.nints = 1;
|
newtype->envelope.nints = 1;
|
||||||
newtype->envelope.naddrs = 0;
|
newtype->envelope.naddrs = 0;
|
||||||
newtype->envelope.ndatatypes = 0;
|
newtype->envelope.ndatatypes = 0;
|
||||||
newtype->envelope.iarray = (int *) LAM_MALLOC(sizeof(int));
|
newtype->envelope.iarray = (int *) malloc(sizeof(int));
|
||||||
newtype->envelope.aarray = NULL;
|
newtype->envelope.aarray = NULL;
|
||||||
newtype->envelope.darray = NULL;
|
newtype->envelope.darray = NULL;
|
||||||
newtype->envelope.iarray[0] = count;
|
newtype->envelope.iarray[0] = count;
|
||||||
@ -97,7 +97,7 @@ int lam_datatype_create(int combiner,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Allocate new type */
|
/* Allocate new type */
|
||||||
newtype = LAM_MALLOC(sizeof(lam_datatype_t));
|
newtype = malloc(sizeof(lam_datatype_t));
|
||||||
if (newtype == NULL) {
|
if (newtype == NULL) {
|
||||||
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
||||||
rc = MPI_ERR_TYPE;
|
rc = MPI_ERR_TYPE;
|
||||||
@ -118,12 +118,12 @@ int lam_datatype_create(int combiner,
|
|||||||
newtype->envelope.naddrs = count;
|
newtype->envelope.naddrs = count;
|
||||||
newtype->envelope.ndatatypes = count;
|
newtype->envelope.ndatatypes = count;
|
||||||
newtype->envelope.iarray =
|
newtype->envelope.iarray =
|
||||||
(int *) LAM_MALLOC(newtype->envelope.nints * sizeof(int));
|
(int *) malloc(newtype->envelope.nints * sizeof(int));
|
||||||
newtype->envelope.aarray =
|
newtype->envelope.aarray =
|
||||||
(MPI_Aint *) LAM_MALLOC(newtype->envelope.naddrs *
|
(MPI_Aint *) malloc(newtype->envelope.naddrs *
|
||||||
sizeof(MPI_Aint));
|
sizeof(MPI_Aint));
|
||||||
newtype->envelope.darray =
|
newtype->envelope.darray =
|
||||||
(MPI_Datatype *) LAM_MALLOC(newtype->envelope.ndatatypes *
|
(MPI_Datatype *) malloc(newtype->envelope.ndatatypes *
|
||||||
sizeof(MPI_Datatype));
|
sizeof(MPI_Datatype));
|
||||||
newtype->envelope.iarray[0] = count;
|
newtype->envelope.iarray[0] = count;
|
||||||
for (i = 0; i < count; i++) {
|
for (i = 0; i < count; i++) {
|
||||||
@ -253,7 +253,7 @@ int lam_datatype_create(int combiner,
|
|||||||
if (newtype->num_pairs > 0) {
|
if (newtype->num_pairs > 0) {
|
||||||
/* allocate the type_map */
|
/* allocate the type_map */
|
||||||
newtype->type_map = (ULMTypeMapElt_t *)
|
newtype->type_map = (ULMTypeMapElt_t *)
|
||||||
LAM_MALLOC(newtype->num_pairs * sizeof(ULMTypeMapElt_t));
|
malloc(newtype->num_pairs * sizeof(ULMTypeMapElt_t));
|
||||||
if (newtype->type_map == NULL) {
|
if (newtype->type_map == NULL) {
|
||||||
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
ulm_err(("Error: MPI_Type_struct: Out of memory\n"));
|
||||||
rc = MPI_ERR_TYPE;
|
rc = MPI_ERR_TYPE;
|
||||||
|
@ -2,19 +2,12 @@
|
|||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** @file datatype deletion function */
|
/* lam_datatype_t deletion function */
|
||||||
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* Delete a LAM/MPI datatype (actually, just mark it for deletion)
|
* Delete a LAM/MPI datatype (actually, just mark it for deletion)
|
||||||
*
|
|
||||||
* @param type datatype
|
|
||||||
* @return LAM_SUCCESS on success, LAM_ERROR otherwise
|
|
||||||
*
|
|
||||||
* This is the central location for creation of data types in LAM/MPI.
|
|
||||||
* All MPI_Type_create functions rely upon this to do the actual type
|
|
||||||
* creation.
|
|
||||||
*/
|
*/
|
||||||
int lam_datatype_delete(lam_datatype_t *type)
|
int lam_datatype_delete(lam_datatype_t *type)
|
||||||
{
|
{
|
||||||
|
@ -2,30 +2,22 @@
|
|||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** @file alternative memcpy function */
|
/* alternative memcpy function */
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "lam_config.h"
|
#include "lam_config.h"
|
||||||
#include "lam/stdint.h"
|
|
||||||
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
#define ALIGNED32(X) (((uint32_t)(X) & (uint32_t) 3) == (uint32_t) 0 ? 1 : 0)
|
#define ALIGNED32(X) (((uint32_t)(X) & (uint32_t) 3) == (uint32_t) 0 ? 1 : 0)
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* Alternative memcpy function
|
* Alternative memcpy function: On some systems, this performs better
|
||||||
*
|
* than the system memcpy.
|
||||||
* @param dst destination buffer
|
|
||||||
* @param src source buffer
|
|
||||||
* @param size size of buffer
|
|
||||||
* @param dummy unused variable
|
|
||||||
* @return the original value of dst
|
|
||||||
*
|
|
||||||
* On some systems, this performs better than the system memcpy.
|
|
||||||
*/
|
*/
|
||||||
void *lam_memcpy_alt(void *dst, const void *src, size_t size, void *dummy)
|
void *lam_memcpy_alt(void *dst, const void *src, size_t size,
|
||||||
|
lam_memcpy_state_t *dummy)
|
||||||
{
|
{
|
||||||
if (ALIGNED32(src) && ALIGNED32(dst)) {
|
if (ALIGNED32(src) && ALIGNED32(dst)) {
|
||||||
uint32_t *restrict p = (uint32_t *) dst;
|
uint32_t *restrict p = (uint32_t *) dst;
|
||||||
|
@ -2,33 +2,28 @@
|
|||||||
* $HEADER$
|
* $HEADER$
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** @file 32-bit checksum support */
|
/** @file
|
||||||
|
*
|
||||||
|
* 32-bit checksum support
|
||||||
|
*/
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#include "lam_config.h"
|
#include "lam_config.h"
|
||||||
#include "lam/stdint.h"
|
|
||||||
#include "datatype.h"
|
#include "datatype.h"
|
||||||
|
|
||||||
#define IS_32BIT_ALIGNED(X) \
|
|
||||||
(((uint32_t)(X) & (uint32_t) 3) == ((uint32_t) 0 ? 1 : 0))
|
/*
|
||||||
|
* Generate a 32-bit checksum for a buffer
|
||||||
|
*/
|
||||||
|
uint32_t lam_sum32(const void *restrict buffer, size_t size)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* Copy data from one buffer to another and calculate a 32-bit checksum
|
* Copy data from one buffer to another and calculate a 32-bit checksum
|
||||||
*
|
|
||||||
* @param dst pointer to the destination buffer
|
|
||||||
* @param src pointer to the source buffer
|
|
||||||
* @param size size of the buffer
|
|
||||||
* @param state pointer to a memcpy with checksum/CRC state structure
|
|
||||||
* @return the original value of dst
|
|
||||||
*
|
|
||||||
* This handles cumulative checksumming for arbitrary lengths and
|
|
||||||
* address alignments as best as it can; the contents of
|
|
||||||
* lastPartialLong and lastPartialLength are updated to reflected the
|
|
||||||
* last partial word's value and length (in bytes) -- this should
|
|
||||||
* allow proper handling of checksumming contiguous or noncontiguous
|
|
||||||
* buffers via multiple calls of bcopy_csum() - Mitch
|
|
||||||
*/
|
*/
|
||||||
void *lam_memcpy_sum32(void *restrict dst,
|
void *lam_memcpy_sum32(void *restrict dst,
|
||||||
const void *restrict src,
|
const void *restrict src,
|
||||||
@ -51,7 +46,7 @@ void *lam_memcpy_sum32(void *restrict dst,
|
|||||||
csumlenresidue = (csumlen > size) ? (csumlen - size) : 0;
|
csumlenresidue = (csumlen > size) ? (csumlen - size) : 0;
|
||||||
temp = state->partial_int;
|
temp = state->partial_int;
|
||||||
|
|
||||||
if (IS_32BIT_ALIGNED(p) && IS_32BIT_ALIGNED(q)) {
|
if (LAM_IS_32BIT_ALIGNED(p) && LAM_IS_32BIT_ALIGNED(q)) {
|
||||||
if (state->partial_size) {
|
if (state->partial_size) {
|
||||||
/* do we have enough data to fill out the partial word? */
|
/* do we have enough data to fill out the partial word? */
|
||||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||||
@ -98,14 +93,14 @@ void *lam_memcpy_sum32(void *restrict dst,
|
|||||||
}
|
}
|
||||||
state->partial_int = 0;
|
state->partial_int = 0;
|
||||||
state->partial_size = 0;
|
state->partial_size = 0;
|
||||||
if (IS_32BIT_ALIGNED(size) && (csumlenresidue == 0)) {
|
if (LAM_IS_32BIT_ALIGNED(size) && (csumlenresidue == 0)) {
|
||||||
state->sum = csum;
|
state->sum = csum;
|
||||||
return dst;
|
return dst;
|
||||||
} else {
|
} else {
|
||||||
size -= i * sizeof(uint32_t);
|
size -= i * sizeof(uint32_t);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (IS_32BIT_ALIGNED(q)) {
|
} else if (LAM_IS_32BIT_ALIGNED(q)) {
|
||||||
if (state->partial_size) {
|
if (state->partial_size) {
|
||||||
/* do we have enough data to fill out the partial word? */
|
/* do we have enough data to fill out the partial word? */
|
||||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||||
@ -124,7 +119,7 @@ void *lam_memcpy_sum32(void *restrict dst,
|
|||||||
* now we have an unaligned source and an unknown
|
* now we have an unaligned source and an unknown
|
||||||
* alignment for our destination
|
* alignment for our destination
|
||||||
*/
|
*/
|
||||||
if (IS_32BIT_ALIGNED(p)) {
|
if (LAM_IS_32BIT_ALIGNED(p)) {
|
||||||
size_t numLongs = size / sizeof(uint32_t);
|
size_t numLongs = size / sizeof(uint32_t);
|
||||||
for (i = 0; i < numLongs; i++) {
|
for (i = 0; i < numLongs; i++) {
|
||||||
memcpy(&temp, q, sizeof(temp));
|
memcpy(&temp, q, sizeof(temp));
|
||||||
@ -165,7 +160,7 @@ void *lam_memcpy_sum32(void *restrict dst,
|
|||||||
state->partial_int = 0;
|
state->partial_int = 0;
|
||||||
state->partial_size = 0;
|
state->partial_size = 0;
|
||||||
}
|
}
|
||||||
} else if (IS_32BIT_ALIGNED(p)) {
|
} else if (LAM_IS_32BIT_ALIGNED(p)) {
|
||||||
if (state->partial_size) {
|
if (state->partial_size) {
|
||||||
/* do we have enough data to fill out the partial word? */
|
/* do we have enough data to fill out the partial word? */
|
||||||
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
if (size >= (sizeof(uint32_t) - state->partial_size)) {
|
||||||
@ -184,7 +179,7 @@ void *lam_memcpy_sum32(void *restrict dst,
|
|||||||
* now we have a source of unknown alignment and a
|
* now we have a source of unknown alignment and a
|
||||||
* unaligned destination
|
* unaligned destination
|
||||||
*/
|
*/
|
||||||
if (IS_32BIT_ALIGNED(q)) {
|
if (LAM_IS_32BIT_ALIGNED(q)) {
|
||||||
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
for (; size >= sizeof(*q); size -= sizeof(*q)) {
|
||||||
temp = *q++;
|
temp = *q++;
|
||||||
csum += temp;
|
csum += temp;
|
||||||
@ -244,7 +239,7 @@ void *lam_memcpy_sum32(void *restrict dst,
|
|||||||
* now we have an unknown alignment for our source and
|
* now we have an unknown alignment for our source and
|
||||||
* destination
|
* destination
|
||||||
*/
|
*/
|
||||||
if (IS_32BIT_ALIGNED(q) && IS_32BIT_ALIGNED(p)) {
|
if (LAM_IS_32BIT_ALIGNED(q) && LAM_IS_32BIT_ALIGNED(p)) {
|
||||||
size_t numLongs = size / sizeof(uint32_t);
|
size_t numLongs = size / sizeof(uint32_t);
|
||||||
for (i = 0; i < numLongs; i++) {
|
for (i = 0; i < numLongs; i++) {
|
||||||
csum += *q;
|
csum += *q;
|
||||||
@ -390,7 +385,7 @@ void *lam_memcpy_sum32(void *restrict dst,
|
|||||||
state->partial_size = 0;
|
state->partial_size = 0;
|
||||||
state->partial_int = 0;
|
state->partial_int = 0;
|
||||||
}
|
}
|
||||||
if (IS_32BIT_ALIGNED(q)) {
|
if (LAM_IS_32BIT_ALIGNED(q)) {
|
||||||
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
|
for (i = 0; i < csumlenresidue / sizeof(uint32_t); i++) {
|
||||||
csum += *q++;
|
csum += *q++;
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user