1
1

Last step for the generation of 2 set of pack/unpack functions. One with

checksum and the other without. Split the large files into smaller one, and
put similar functions together.

This commit was SVN r9323.
Этот коммит содержится в:
George Bosilca 2006-03-17 08:04:59 +00:00
родитель a465047e97
Коммит 9955eb2f2e
11 изменённых файлов: 993 добавлений и 523 удалений

Просмотреть файл

@ -28,9 +28,8 @@ headers = \
convertor.h
noinst_LTLIBRARIES = \
libdatatype.la
# libdatatype_reliable.la
libdatatype.la \
libdatatype_reliable.la
# these sources will be compiled with the normal CFLAGS only
libdatatype_la_SOURCES = \
@ -54,13 +53,12 @@ libdatatype_la_SOURCES = \
dt_external32.c \
dt_match_size.c \
convertor.c \
new_pack.c \
new_unpack.c \
new_position.c
new_position.c \
datatype_memcpy.c copy_functions.c get_count.c
# these sources will be compiled with the special -D
#libdatatype_reliable_la_SOURCES =
#libdatatype_reliable_la_CFLAGS = -DGEORGES_SPECIAL_FLAG=1 $(AM_CFLAGS)
libdatatype_reliable_la_SOURCES = dt_pack.c dt_unpack.c
libdatatype_reliable_la_CFLAGS = -DCHECKSUM $(AM_CFLAGS)
# Conditionally install the header files
if WANT_INSTALL_HEADERS

Просмотреть файл

@ -327,6 +327,91 @@ int ompi_convertor_prepare( ompi_convertor_t* convertor,
return ompi_convertor_create_stack_at_begining( convertor, ompi_ddt_local_sizes );
}
/*
* All the conversion functions (pack and unpack) we have. For each
* function there are 2 versions: one without checksum (using memcpy)
* and one with checksum.
*/
extern convertor_advance_fct_t ompi_unpack_general;
extern convertor_advance_fct_t ompi_unpack_general_checksum;
extern convertor_advance_fct_t ompi_unpack_homogeneous;
extern convertor_advance_fct_t ompi_unpack_homogeneous_checksum;
extern convertor_advance_fct_t ompi_generic_simple_unpack;
extern convertor_advance_fct_t ompi_generic_simple_unpack_checksum;
extern convertor_advance_fct_t ompi_unpack_homogeneous_contig;
extern convertor_advance_fct_t ompi_unpack_homogeneous_contig_checksum;
extern convertor_advance_fct_t ompi_pack_general;
extern convertor_advance_fct_t ompi_pack_general_checksum;
extern convertor_advance_fct_t ompi_pack_homogeneous_with_memcpy;
extern convertor_advance_fct_t ompi_pack_homogeneous_with_memcpy_checksum;
extern convertor_advance_fct_t ompi_pack_no_conversion;
extern convertor_advance_fct_t ompi_pack_no_conversion_checksum;
extern convertor_advance_fct_t ompi_generic_simple_pack;
extern convertor_advance_fct_t ompi_generic_simple_pack_checksum;
extern convertor_advance_fct_t ompi_pack_no_conv_contig;
extern convertor_advance_fct_t ompi_pack_no_conv_contig_checksum;
extern convertor_advance_fct_t ompi_pack_no_conv_contig_with_gaps;
extern convertor_advance_fct_t ompi_pack_no_conv_contig_with_gaps_checksum;
int32_t
ompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf )
{
/* Here I should check that the data is not overlapping */
if( OMPI_SUCCESS != ompi_convertor_prepare( convertor, datatype,
count, pUserBuf ) ) {
return OMPI_ERROR;
}
convertor->flags |= CONVERTOR_RECV;
convertor->memAlloc_fn = NULL;
convertor->fAdvance = ompi_unpack_general; /* TODO: just stop complaining */
convertor->fAdvance = ompi_unpack_homogeneous; /* default behaviour */
convertor->fAdvance = ompi_generic_simple_unpack;
/* TODO: work only on homogeneous architectures */
if( convertor->pDesc->flags & DT_FLAG_CONTIGUOUS ) {
assert( convertor->flags & DT_FLAG_CONTIGUOUS );
convertor->fAdvance = ompi_unpack_homogeneous_contig;
}
return OMPI_SUCCESS;
}
int32_t
ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf )
{
if( OMPI_SUCCESS != ompi_convertor_prepare( convertor, datatype,
count, pUserBuf ) ) {
return OMPI_ERROR;
}
convertor->flags |= CONVERTOR_SEND;
convertor->memAlloc_fn = NULL;
/* Just to avoid complaint from the compiler */
convertor->fAdvance = ompi_pack_general;
convertor->fAdvance = ompi_pack_homogeneous_with_memcpy;
convertor->fAdvance = ompi_pack_no_conversion;
convertor->fAdvance = ompi_generic_simple_pack;
if( datatype->flags & DT_FLAG_CONTIGUOUS ) {
assert( convertor->flags & DT_FLAG_CONTIGUOUS );
if( ((datatype->ub - datatype->lb) == (long)datatype->size) )
convertor->fAdvance = ompi_pack_no_conv_contig;
else if( 1 >= convertor->count ) /* gaps or no gaps */
convertor->fAdvance = ompi_pack_no_conv_contig;
else
convertor->fAdvance = ompi_pack_no_conv_contig_with_gaps;
}
return OMPI_SUCCESS;
}
/*
* These functions can be used in order to create an IDENTICAL copy of one convertor. In this
* context IDENTICAL means that the datatype and count and all other properties of the basic
@ -385,3 +470,21 @@ void ompi_convertor_dump( ompi_convertor_t* convertor )
ompi_ddt_dump_stack( convertor->pStack, convertor->stack_pos,
convertor->pDesc->desc.desc, convertor->pDesc->name );
}
void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos,
const union dt_elem_desc* pDesc, const char* name )
{
opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
for( ; stack_pos >= 0; stack_pos-- ) {
opal_output( 0, "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
if( pStack->index != -1 )
opal_output( 0, "\t[desc count %d disp %ld extent %d]\n",
pDesc[pStack[stack_pos].index].elem.count,
pDesc[pStack[stack_pos].index].elem.disp,
pDesc[pStack[stack_pos].index].elem.extent );
else
opal_output( 0, "\n" );
}
opal_output( 0, "\n" );
}

Просмотреть файл

@ -44,6 +44,7 @@ extern "C" {
#define CONVERTOR_SEND 0x00040000
#define CONVERTOR_HOMOGENEOUS 0x00080000
#define CONVERTOR_CLONE 0x00100000
#define CONVERTOR_WITH_CHECKSUM 0x00200000
#define CONVERTOR_TYPE_MASK 0x00FF0000
#define CONVERTOR_STATE_MASK 0xFF000000
#define CONVERTOR_STATE_START 0x01000000
@ -58,8 +59,8 @@ typedef int32_t (*conversion_fct_t)( uint32_t count,
typedef struct ompi_convertor_t ompi_convertor_t;
typedef int32_t (*convertor_advance_fct_t)( ompi_convertor_t* pConvertor,
struct iovec* pInputv,
uint32_t* inputCount,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
typedef void*(*memalloc_fct_t)( size_t* pLength, void* userdata );
@ -281,24 +282,6 @@ OMPI_DECLSPEC void ompi_convertor_dump( ompi_convertor_t* convertor );
OMPI_DECLSPEC void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos,
const union dt_elem_desc* pDesc, const char* name );
/*
*
*/
OMPI_DECLSPEC int
ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
/*
*
*/
OMPI_DECLSPEC int
ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
/*
*
*/

281
ompi/datatype/copy_functions.c Обычный файл
Просмотреть файл

@ -0,0 +1,281 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype_internal.h"
#include "ompi/datatype/datatype_checksum.h"
/*
* This function is used to copy data from one buffer to another. The assumption
* is that the number of bytes per element to copy at the source and destination
* are the same.
* count - number of instances of a given data-type to copy
* from - point to the source buffer
* to - pointer to the destination buffer
* from_len - length of source buffer (in bytes)
* to_len - length of destination buffer (in bytes)
* from_extent - extent of the source data type (in bytes)
* to_extent - extent of the destination data type (in bytes)
*
* Return value: Number of elements of type TYPE copied
*/
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
static int copy_##TYPENAME( uint32_t count, \
char* from, uint32_t from_len, long from_extent, \
char* to, uint32_t to_len, long to_extent ) \
{ \
uint32_t i; \
uint32_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
uint32_t local_TYPE_size = (COUNT) * sizeof(TYPE); \
\
/* make sure the remote buffer is large enough to hold the data */ \
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
if( (count * remote_TYPE_size) != from_len ) { \
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_TYPE_size) ); \
} \
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
} else \
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
\
if( (from_extent == (long)local_TYPE_size) && \
(to_extent == (long)remote_TYPE_size) ) { \
/* copy of contigous data at both source and destination */ \
MEMCPY( to, from, count * local_TYPE_size ); \
} else { \
/* source or destination are non-contigous */ \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, local_TYPE_size ); \
to += to_extent; \
from += from_extent; \
} \
} \
return count; \
}
/*
* This function is used to copy data from one buffer to another. The assumption
* is that the number of bytes per element to copy at the source and destination
* are the same.
* count - number of instances of a given data-type to copy
* from - point to the source buffer
* to - pointer to the destination buffer
* from_len - length of source buffer (in bytes)
* to_len - length of destination buffer (in bytes)
* from_extent - extent of the source data type (in bytes)
* to_extent - extent of the destination data type (in bytes)
*
* Return value: Number of elements of type TYPE copied
*/
#define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \
static int copy_##TYPENAME##_##COUNT( uint32_t count, \
char* from, uint32_t from_len, long from_extent, \
char* to, uint32_t to_len, long to_extent) \
{ \
uint32_t i; \
uint32_t remote_TYPE_size = (COUNT); /* TODO */ \
uint32_t local_TYPE_size = (COUNT); \
\
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
if( (count * remote_TYPE_size) != from_len ) { \
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_TYPE_size) ); \
} \
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPENAME, count, from, from_len, to, to_len ); \
} else \
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPENAME, count, from, from_len, to, to_len ); \
\
if( (from_extent == (long)local_TYPE_size) && \
(to_extent == (long)remote_TYPE_size) ) { \
MEMCPY( to, from, count * local_TYPE_size ); \
} else { \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, local_TYPE_size ); \
to += to_extent; \
from += from_extent; \
} \
} \
return count; \
}
/* set up copy functions for the basic C MPI data types */
COPY_TYPE( char, char, 1 )
COPY_TYPE( short, short, 1 )
COPY_TYPE( int, int, 1 )
COPY_TYPE( float, float, 1 )
COPY_TYPE( long, long, 1 )
COPY_TYPE( double, double, 1 )
COPY_TYPE( long_long, long long, 1 )
COPY_TYPE( long_double, long double, 1 )
COPY_TYPE( complex_float, ompi_complex_float_t, 1 )
COPY_TYPE( complex_double, ompi_complex_double_t, 1 )
COPY_TYPE( complex_long_double, ompi_complex_long_double_t, 1 )
COPY_TYPE( wchar, wchar_t, 1 )
COPY_TYPE( 2int, int, 2 )
COPY_TYPE( 2float, float, 2 )
COPY_TYPE( 2double, double, 2 )
COPY_TYPE( 2complex_float, ompi_complex_float_t, 2 )
COPY_TYPE( 2complex_double, ompi_complex_double_t, 2 )
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 1 || SIZEOF_BOOL == 1
#define REQUIRE_COPY_BYTES_1 1
#else
#define REQUIRE_COPY_BYTES_1 0
#endif
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 2 || SIZEOF_BOOL == 2
#define REQUIRE_COPY_BYTES_2 1
#else
#define REQUIRE_COPY_BYTES_2 0
#endif
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 4 || SIZEOF_BOOL == 4
#define REQUIRE_COPY_BYTES_4 1
#else
#define REQUIRE_COPY_BYTES_4 0
#endif
#if (SIZEOF_FLOAT + SIZEOF_INT) == 8 || (SIZEOF_LONG + SIZEOF_INT) == 8 || SIZEOF_BOOL == 8
#define REQUIRE_COPY_BYTES_8 1
#else
#define REQUIRE_COPY_BYTES_8 0
#endif
#if (SIZEOF_DOUBLE + SIZEOF_INT) == 12 || (SIZEOF_LONG + SIZEOF_INT) == 12
#define REQUIRE_COPY_BYTES_12 1
#else
#define REQUIRE_COPY_BYTES_12 0
#endif
#if (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 16
#define REQUIRE_COPY_BYTES_16 1
#else
#define REQUIRE_COPY_BYTES_16 0
#endif
#if (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 20
#define REQUIRE_COPY_BYTES_20 1
#else
#define REQUIRE_COPY_BYTES_20 0
#endif
#if REQUIRE_COPY_BYTES_1
COPY_CONTIGUOUS_BYTES( bytes, 1 )
#endif /* REQUIRE_COPY_BYTES_1 */
#if REQUIRE_COPY_BYTES_2
COPY_CONTIGUOUS_BYTES( bytes, 2 )
#endif /* REQUIRE_COPY_BYTES_2 */
#if REQUIRE_COPY_BYTES_4
COPY_CONTIGUOUS_BYTES( bytes, 4 )
#endif /* REQUIRE_COPY_BYTES_4 */
#if REQUIRE_COPY_BYTES_8
COPY_CONTIGUOUS_BYTES( bytes, 8 )
#endif /* REQUIRE_COPY_BYTES_8 */
#if REQUIRE_COPY_BYTES_12
COPY_CONTIGUOUS_BYTES( bytes, 12 )
#endif /* REQUIRE_COPY_BYTES_12 */
#if REQUIRE_COPY_BYTES_16
COPY_CONTIGUOUS_BYTES( bytes, 16 )
#endif /* REQUIRE_COPY_BYTES_16 */
#if REQUIRE_COPY_BYTES_20
COPY_CONTIGUOUS_BYTES( bytes, 20 )
#endif /* REQUIRE_COPY_BYTES_20 */
/* table of predefined copy functions - one for each MPI type */
conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED] = {
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_END_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)copy_char, /* DT_CHAR */
(conversion_fct_t)copy_char, /* DT_CHARACTER */
(conversion_fct_t)copy_char, /* DT_UNSIGNED_CHAR */
(conversion_fct_t)copy_char, /* DT_BYTE */
(conversion_fct_t)copy_short, /* DT_SHORT */
(conversion_fct_t)copy_short, /* DT_UNSIGNED_SHORT */
(conversion_fct_t)copy_int, /* DT_INT */
(conversion_fct_t)copy_int, /* DT_UNSIGNED_INT */
(conversion_fct_t)copy_long, /* DT_LONG */
(conversion_fct_t)copy_long, /* DT_UNSIGNED_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG_INT */
(conversion_fct_t)copy_long_long, /* DT_UNSIGNED_LONG_LONG */
(conversion_fct_t)copy_float, /* DT_FLOAT */
(conversion_fct_t)copy_double, /* DT_DOUBLE */
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
(conversion_fct_t)copy_complex_long_double, /* DT_COMPLEX_LONG_DOUBLE */
(conversion_fct_t)NULL, /* DT_PACKED */
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 1
(conversion_fct_t)copy_bytes_1, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == 4
(conversion_fct_t)copy_bytes_4, /* DT_LOGIC */
#elif 1 /* always, some compiler complain if there is not value */
NULL, /* DT_LOGIC */
#endif
#if (SIZEOF_FLOAT + SIZEOF_INT) == 8
(conversion_fct_t)copy_bytes_8, /* DT_FLOAT_INT */
#else
#error Complete me please
#endif
#if (SIZEOF_DOUBLE + SIZEOF_INT) == 12
(conversion_fct_t)copy_bytes_12, /* DT_DOUBLE_INT */
#else
#error Complete me please
#endif
#if (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 12
(conversion_fct_t)copy_bytes_12, /* DT_LONG_DOUBLE_INT */
#elif (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 16
(conversion_fct_t)copy_bytes_16, /* DT_LONG_DOUBLE_INT */
#elif (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 20
(conversion_fct_t)copy_bytes_20, /* DT_LONG_DOUBLE_INT */
#else
#error Complete me please
#endif
#if (SIZEOF_LONG + SIZEOF_INT) == 8
(conversion_fct_t)copy_bytes_8, /* DT_LONG_INT */
#elif (SIZEOF_LONG + SIZEOF_INT) == 12
(conversion_fct_t)copy_bytes_12, /* DT_LONG_INT */
#else
#error Complete me please
#endif
(conversion_fct_t)copy_2int, /* DT_2INT */
(conversion_fct_t)NULL, /* DT_SHORT_INT */
(conversion_fct_t)copy_int, /* DT_INTEGER */
(conversion_fct_t)copy_float, /* DT_REAL */
(conversion_fct_t)copy_double, /* DT_DBLPREC */
(conversion_fct_t)copy_2float, /* DT_2REAL */
(conversion_fct_t)copy_2double, /* DT_2DBLPREC */
(conversion_fct_t)copy_2int, /* DT_2INTEGER */
(conversion_fct_t)copy_wchar, /* DT_WCHAR */
(conversion_fct_t)copy_2complex_float, /* DT_2COMPLEX */
(conversion_fct_t)copy_2complex_double, /* DT_2DOUBLE_COMPLEX */
#if SIZEOF_BOOL == 1
(conversion_fct_t)copy_bytes_1, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == 4
(conversion_fct_t)copy_bytes_4, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == 8
(conversion_fct_t)copy_bytes_8, /* DT_CXX_BOOL */
#else
#error Complete me please
#endif
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
};

Просмотреть файл

@ -13,7 +13,35 @@
#ifndef DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED
#define DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED
extern void* mmx_memcpy( void* dst, const void* src, size_t n );
extern void* mmx2_memcpy( void* dst, const void* src, size_t n );
extern void* sse_memcpy( void* dst, const void* src, size_t n );
/* for small memory blocks (<256 bytes) this version is faster */
#define small_memcpy(to,from,n) \
{ \
register unsigned long int dummy; \
void *_dst = (to); \
const void *_src = (from); \
__asm__ __volatile__( "rep; movsb" \
:"=&D"(_dst), "=&S"(_src), "=&c"(dummy) \
:"0" (_dst), "1" (_src),"2" (n) \
: "memory"); \
}
#define MEMCPY( DST, SRC, BLENGTH ) \
memcpy( (DST), (SRC), (BLENGTH) )
#if 0
#define MEMCPY( DST, SRC, BLENGTH ) \
do { \
if( 128 > (BLENGTH) ) { \
small_memcpy( (DST), (SRC), (BLENGTH) ); \
} else if( (64*1024-100) > (BLENGTH) ) { \
mmx_memcpy( (DST), (SRC), (BLENGTH) ); \
} else {\
mmx2_memcpy( (DST), (SRC), (BLENGTH) ); \
} \
} while (0)
#endif
#endif /* DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED */

Просмотреть файл

@ -600,6 +600,20 @@ int32_t ompi_ddt_finalize( void )
return OMPI_SUCCESS;
}
#if OMPI_ENABLE_DEBUG
/*
* Set a breakpoint to this function in your favorite debugger
* to make it stopping on all pack and unpack errors.
*/
int ompi_ddt_safeguard_pointer_debug_breakpoint( const void* actual_ptr, int length,
const void* initial_ptr,
const ompi_datatype_t* pData,
int count )
{
return 0;
}
#endif /* OMPI_ENABLE_DEBUG */
/********************************************************
* Data dumping functions
********************************************************/

Просмотреть файл

@ -18,31 +18,74 @@
*/
#include "ompi_config.h"
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <stdlib.h>
#define DO_DEBUG(INST)
#include "ompi/datatype/datatype_checksum.h"
#if OMPI_ENABLE_DEBUG
int ompi_ddt_safeguard_pointer_debug_breakpoint( const void* actual_ptr, int length,
const void* initial_ptr,
const ompi_datatype_t* pData,
int count )
{
return 0;
}
int ompi_pack_debug = 0;
#define DO_DEBUG(INST) if( ompi_pack_debug ) { INST }
#else
#define DO_DEBUG(INST)
#endif /* OMPI_ENABLE_DEBUG */
static
int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
#include "ompi/datatype/datatype_checksum.h"
#include "ompi/datatype/datatype_pack.h"
#if defined(CHECKSUM)
#define ompi_pack_general_function ompi_pack_general_checksum
#define ompi_pack_homogeneous_with_memcpy_function ompi_pack_homogeneous_with_memcpy_checksum
#define ompi_pack_no_conversion_function ompi_pack_no_conversion_checksum
#define ompi_pack_no_conv_contig_function ompi_pack_no_conv_contig_checksum
#define ompi_pack_no_conv_contig_with_gaps_function ompi_pack_no_conv_contig_with_gaps_checksum
#define ompi_generic_simple_pack_function ompi_generic_simple_pack_checksum
#else
#define ompi_pack_general_function ompi_pack_general
#define ompi_pack_homogeneous_with_memcpy_function ompi_pack_homogeneous_with_memcpy
#define ompi_pack_no_conversion_function ompi_pack_no_conversion
#define ompi_pack_no_conv_contig_function ompi_pack_no_conv_contig
#define ompi_pack_no_conv_contig_with_gaps_function ompi_pack_no_conv_contig_with_gaps
#define ompi_generic_simple_pack_function ompi_generic_simple_pack
#endif /* defined(CHECKSUM) */
int32_t
ompi_pack_general_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
int32_t
ompi_pack_homogeneous_with_memcpy_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int* freeAfter );
int32_t
ompi_pack_no_conversion_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t *out_size,
size_t* max_data,
int* freeAfter );
int32_t
ompi_pack_no_conv_contig_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int* freeAfter );
int32_t
ompi_pack_no_conv_contig_with_gaps_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int* freeAfter );
int32_t
ompi_generic_simple_pack_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
int32_t
ompi_pack_general_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter )
@ -170,8 +213,8 @@ int ompi_convertor_pack_general( ompi_convertor_t* pConvertor,
/* We suppose here that we work with an already optimized version of the data
*/
static
int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
int32_t
ompi_pack_homogeneous_with_memcpy_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
@ -304,8 +347,8 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
* then is useless to allocate additional memory and do the memcpy operation. We can simply
* return the pointer to the contiguous piece of memory to the upper level.
*/
static
int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
int32_t
ompi_pack_no_conversion_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t *out_size,
size_t* max_data,
@ -336,8 +379,8 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
pack_elem.common.type = pElems[pos_desc].elem.common.type;
last_blength = pack_elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
lastDisp = pStack->disp;
DO_DEBUG( opal_output( 0, "pack_no_conversion stack_pos %d index %d count %d last_blength %ld lastDisp %ld savePos %p bConverted %d\n",
pConv->stack_pos, pStack->index, pStack->count, last_blength, lastDisp, savePos,
DO_DEBUG( opal_output( 0, "pack_no_conversion stack_pos %d index %d count %d last_blength %ld lastDisp %ld bConverted %d\n",
pConv->stack_pos, pStack->index, pStack->count, last_blength, lastDisp,
pConv->bConverted ); );
saveLength = 0;
pStack--;
@ -586,8 +629,8 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
/* the contig versions does not use the stack. They can easily retrieve
* the status with just the informations from pConvertor->bConverted.
*/
static int
ompi_convertor_pack_no_conv_contig( ompi_convertor_t* pConv,
int32_t
ompi_pack_no_conv_contig_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
@ -629,8 +672,8 @@ ompi_convertor_pack_no_conv_contig( ompi_convertor_t* pConv,
return (0 == length);
}
static int
ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
int32_t
ompi_pack_no_conv_contig_with_gaps_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
@ -764,35 +807,155 @@ ompi_convertor_pack_no_conv_contig_with_gaps( ompi_convertor_t* pConv,
return (pConv->bConverted == length);
}
/* The pack/unpack functions need a cleanup. I have to create a proper interface to access
* all basic functionalities, hence using them as basic blocks for all conversion functions.
*
* But first let's make some global assumptions:
* - a datatype (with the flag DT_DATA set) will have the contiguous flags set if and only if
* the data is really contiguous (extent equal with size)
* - for the DT_LOOP type the DT_CONTIGUOUS flag set means that the content of the loop is
* contiguous but with a gap in the begining or at the end.
* - the DT_CONTIGUOUS flag for the type DT_END_LOOP is meaningless.
*/
int32_t
ompi_convertor_prepare_for_send( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf )
ompi_generic_simple_pack_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter )
{
if( OMPI_SUCCESS != ompi_convertor_prepare( convertor, datatype,
count, pUserBuf ) ) {
return OMPI_ERROR;
dt_stack_t* pStack; /* pointer to the position on the stack */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
uint16_t type; /* type at current position */
size_t total_packed = 0; /* total amount packed this time */
dt_elem_desc_t* description;
dt_elem_desc_t* pElem;
const ompi_datatype_t *pData = pConvertor->pDesc;
char *source_base, *destination;
uint32_t iov_len_local, iov_count, required_space = 0;
DO_DEBUG( opal_output( 0, "ompi_convertor_generic_simple_pack( %p, {%p, %d}, %d )\n", (void*)pConvertor,
iov[0].iov_base, iov[0].iov_len, *out_size ); );
description = pConvertor->use_desc->desc;
/* For the first step we have to add both displacement to the source. After in the
* main while loop we will set back the source_base to the correct value. This is
* due to the fact that the convertor can stop in the middle of a data with a count
*/
source_base = pConvertor->pBaseBuf;
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
source_base += pStack->disp;
count_desc = pStack->count;
pStack--;
pConvertor->stack_pos--;
pElem = &(description[pos_desc]);
source_base += pStack->disp;
DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n"
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
pos_desc, count_desc, source_base - pConvertor->pBaseBuf,
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
if( required_space > ((*max_data) - total_packed) )
break; /* do not pack over the boundaries even if there are more iovecs */
if( iov[iov_count].iov_base == NULL ) {
/*
* ALLOCATE SOME MEMORY ...
*/
size_t length = iov[iov_count].iov_len;
if( length <= 0 )
length = pConvertor->local_size - pConvertor->bConverted;
if( ((*max_data) - total_packed) < length )
length = (*max_data) - total_packed;
assert( 0 < length );
iov[iov_count].iov_base = pConvertor->memAlloc_fn( &length, pConvertor->memAlloc_userdata );
iov[iov_count].iov_len = length;
*freeAfter = (*freeAfter) | (1 << iov_count);
}
convertor->flags |= CONVERTOR_SEND;
convertor->memAlloc_fn = NULL;
/* Just to avoid complaint from the compiler */
convertor->fAdvance = ompi_convertor_pack_general;
convertor->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
convertor->fAdvance = ompi_convertor_pack_no_conversion;
convertor->fAdvance = ompi_convertor_generic_simple_pack;
if( datatype->flags & DT_FLAG_CONTIGUOUS ) {
assert( convertor->flags & DT_FLAG_CONTIGUOUS );
if( ((datatype->ub - datatype->lb) == (long)datatype->size) )
convertor->fAdvance = ompi_convertor_pack_no_conv_contig;
else if( 1 >= convertor->count ) /* gaps or no gaps */
convertor->fAdvance = ompi_convertor_pack_no_conv_contig;
else
convertor->fAdvance = ompi_convertor_pack_no_conv_contig_with_gaps;
destination = iov[iov_count].iov_base;
iov_len_local = iov[iov_count].iov_len;
while( 1 ) {
if( DT_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
DO_DEBUG( opal_output( 0, "pack end_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConvertor->stack_pos == 0 ) {
/* we lie about the size of the next element in order to
* make sure we exit the main loop.
*/
required_space = 0xffffffff;
pConvertor->flags |= CONVERTOR_COMPLETED;
goto complete_loop; /* completed */
}
return OMPI_SUCCESS;
pConvertor->stack_pos--;
pStack--;
pos_desc++;
} else {
pos_desc = pStack->index + 1;
if( pStack->index == -1 ) {
pStack->disp += (pData->ub - pData->lb);
} else {
assert( DT_LOOP == description[pStack->index].loop.common.type );
pStack->disp += description[pStack->index].loop.extent;
}
}
source_base = pConvertor->pBaseBuf + pStack->disp;
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
DO_DEBUG( opal_output( 0, "pack new_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
}
if( DT_LOOP == pElem->elem.common.type ) {
long local_disp = (long)source_base;
if( pElem->loop.common.flags & DT_FLAG_CONTIGUOUS ) {
PACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
source_base, destination, iov_len_local );
if( 0 == count_desc ) { /* completed */
pos_desc += pElem->loop.items + 1;
goto update_loop_description;
}
/* Save the stack with the correct last_count value. */
}
local_disp = (long)source_base - local_disp;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_LOOP, count_desc,
pStack->disp + local_disp, pos_desc + pElem->elem.disp + 1);
pos_desc++;
update_loop_description: /* update the current state */
source_base = pConvertor->pBaseBuf + pStack->disp;
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
continue;
}
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
PACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
source_base, destination, iov_len_local );
if( 0 != count_desc ) { /* completed */
type = pElem->elem.common.type;
required_space = ompi_ddt_basicDatatypes[type]->size;
goto complete_loop;
}
source_base = pConvertor->pBaseBuf + pStack->disp;
pos_desc++; /* advance to the next data */
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
}
}
complete_loop:
iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */
total_packed += iov[iov_count].iov_len;
pConvertor->bConverted += iov[iov_count].iov_len; /* update the already converted bytes */
}
*max_data = total_packed;
*out_size = iov_count;
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_BYTE, count_desc,
source_base - pStack->disp - pConvertor->pBaseBuf, pos_desc );
DO_DEBUG( opal_output( 0, "pack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
return 0;
}
return 1;
}

Просмотреть файл

@ -22,30 +22,51 @@
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <stdlib.h>
#if OMPI_ENABLE_DEBUG
int ompi_unpack_debug = 0;
#define DO_DEBUG(INST) if( ompi_unpack_debug ) { INST }
#else
#define DO_DEBUG(INST)
#endif /* OMPI_ENABLE_DEBUG */
#include "ompi/datatype/datatype_checksum.h"
#include "ompi/datatype/datatype_unpack.h"
void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos,
const union dt_elem_desc* pDesc, const char* name )
{
opal_output( 0, "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
for( ; stack_pos >= 0; stack_pos-- ) {
opal_output( 0, "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
if( pStack->index != -1 )
opal_output( 0, "\t[desc count %d disp %ld extent %d]\n",
pDesc[pStack[stack_pos].index].elem.count,
pDesc[pStack[stack_pos].index].elem.disp,
pDesc[pStack[stack_pos].index].elem.extent );
else
opal_output( 0, "\n" );
}
opal_output( 0, "\n" );
}
#if defined(CHECKSUM)
#define ompi_unpack_general_function ompi_unpack_general_checksum
#define ompi_unpack_homogeneous_function ompi_unpack_homogeneous_checksum
#define ompi_unpack_homogeneous_contig_function ompi_unpack_homogeneous_contig_checksum
#define ompi_generic_simple_unpack_function ompi_generic_simple_unpack_checksum
#else
#define ompi_unpack_general_function ompi_unpack_general
#define ompi_unpack_homogeneous_function ompi_unpack_homogeneous
#define ompi_unpack_homogeneous_contig_function ompi_unpack_homogeneous_contig
#define ompi_generic_simple_unpack_function ompi_generic_simple_unpack
#endif /* defined(CHECKSUM) */
int32_t
ompi_unpack_general_function( ompi_convertor_t* pConvertor,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
int32_t
ompi_unpack_homogeneous_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
int32_t
ompi_unpack_homogeneous_contig_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
int32_t
ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter );
/*
* Remember that the first item in the stack (ie. position 0) is the number
@ -59,7 +80,8 @@ void ompi_ddt_dump_stack( const dt_stack_t* pStack, int stack_pos,
* 1 if everything went fine and the data was completly converted
* -1 something wrong occurs.
*/
static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
int32_t
ompi_unpack_general_function( ompi_convertor_t* pConvertor,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
@ -173,7 +195,8 @@ static int ompi_convertor_unpack_general( ompi_convertor_t* pConvertor,
return 0;
}
static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
int32_t
ompi_unpack_homogeneous_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
@ -321,7 +344,8 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
return 0;
}
static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
int32_t
ompi_unpack_homogeneous_contig_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
@ -402,369 +426,175 @@ static int ompi_convertor_unpack_homogeneous_contig( ompi_convertor_t* pConv,
return 0;
}
/*
* This function is used to copy data from one buffer to another. The assumption
* is that the number of bytes per element to copy at the source and destination
* are the same.
* count - number of instances of a given data-type to copy
* from - point to the source buffer
* to - pointer to the destination buffer
* from_len - length of source buffer (in bytes)
* to_len - length of destination buffer (in bytes)
* from_extent - extent of the source data type (in bytes)
* to_extent - extent of the destination data type (in bytes)
/* The pack/unpack functions need a cleanup. I have to create a proper interface to access
* all basic functionalities, hence using them as basic blocks for all conversion functions.
*
* Return value: Number of elements of type TYPE copied
* But first let's make some global assumptions:
* - a datatype (with the flag DT_DATA set) will have the contiguous flags set if and only if
* the data is really contiguous (extent equal with size)
* - for the DT_LOOP type the DT_CONTIGUOUS flag set means that the content of the loop is
* contiguous but with a gap in the begining or at the end.
* - the DT_CONTIGUOUS flag for the type DT_END_LOOP is meaningless.
*/
#define COPY_TYPE( TYPENAME, TYPE, COUNT ) \
static int copy_##TYPENAME( uint32_t count, \
char* from, uint32_t from_len, long from_extent, \
char* to, uint32_t to_len, long to_extent ) \
{ \
uint32_t i; \
uint32_t remote_TYPE_size = sizeof(TYPE) * (COUNT); /* TODO */ \
uint32_t local_TYPE_size = (COUNT) * sizeof(TYPE); \
\
/* make sure the remote buffer is large enough to hold the data */ \
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
if( (count * remote_TYPE_size) != from_len ) { \
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_TYPE_size) ); \
} \
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
} else \
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPE, count, from, from_len, to, to_len ); \
\
if( (from_extent == (long)local_TYPE_size) && \
(to_extent == (long)remote_TYPE_size) ) { \
/* copy of contigous data at both source and destination */ \
MEMCPY( to, from, count * local_TYPE_size ); \
} else { \
/* source or destination are non-contigous */ \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, local_TYPE_size ); \
to += to_extent; \
from += from_extent; \
} \
} \
return count; \
}
/*
* This function is used to copy data from one buffer to another. The assumption
* is that the number of bytes per element to copy at the source and destination
* are the same.
* count - number of instances of a given data-type to copy
* from - point to the source buffer
* to - pointer to the destination buffer
* from_len - length of source buffer (in bytes)
* to_len - length of destination buffer (in bytes)
* from_extent - extent of the source data type (in bytes)
* to_extent - extent of the destination data type (in bytes)
*
* Return value: Number of elements of type TYPE copied
*/
#define COPY_CONTIGUOUS_BYTES( TYPENAME, COUNT ) \
static int copy_##TYPENAME##_##COUNT( uint32_t count, \
char* from, uint32_t from_len, long from_extent, \
char* to, uint32_t to_len, long to_extent) \
{ \
uint32_t i; \
uint32_t remote_TYPE_size = (COUNT); /* TODO */ \
uint32_t local_TYPE_size = (COUNT); \
\
if( (remote_TYPE_size * count) > from_len ) { \
count = from_len / remote_TYPE_size; \
if( (count * remote_TYPE_size) != from_len ) { \
DUMP( "oops should I keep this data somewhere (excedent %d bytes)?\n", \
from_len - (count * remote_TYPE_size) ); \
} \
DUMP( "correct: copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPENAME, count, from, from_len, to, to_len ); \
} else \
DUMP( " copy %s count %d from buffer %p with length %d to %p space %d\n", \
#TYPENAME, count, from, from_len, to, to_len ); \
\
if( (from_extent == (long)local_TYPE_size) && \
(to_extent == (long)remote_TYPE_size) ) { \
MEMCPY( to, from, count * local_TYPE_size ); \
} else { \
for( i = 0; i < count; i++ ) { \
MEMCPY( to, from, local_TYPE_size ); \
to += to_extent; \
from += from_extent; \
} \
} \
return count; \
}
/* set up copy functions for the basic C MPI data types */
COPY_TYPE( char, char, 1 )
COPY_TYPE( short, short, 1 )
COPY_TYPE( int, int, 1 )
COPY_TYPE( float, float, 1 )
COPY_TYPE( long, long, 1 )
COPY_TYPE( double, double, 1 )
COPY_TYPE( long_long, long long, 1 )
COPY_TYPE( long_double, long double, 1 )
COPY_TYPE( complex_float, ompi_complex_float_t, 1 )
COPY_TYPE( complex_double, ompi_complex_double_t, 1 )
COPY_TYPE( complex_long_double, ompi_complex_long_double_t, 1 )
COPY_TYPE( wchar, wchar_t, 1 )
COPY_TYPE( 2int, int, 2 )
COPY_TYPE( 2float, float, 2 )
COPY_TYPE( 2double, double, 2 )
COPY_TYPE( 2complex_float, ompi_complex_float_t, 2 )
COPY_TYPE( 2complex_double, ompi_complex_double_t, 2 )
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 1 || SIZEOF_BOOL == 1
#define REQUIRE_COPY_BYTES_1 1
#else
#define REQUIRE_COPY_BYTES_1 0
#endif
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 2 || SIZEOF_BOOL == 2
#define REQUIRE_COPY_BYTES_2 1
#else
#define REQUIRE_COPY_BYTES_2 0
#endif
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 4 || SIZEOF_BOOL == 4
#define REQUIRE_COPY_BYTES_4 1
#else
#define REQUIRE_COPY_BYTES_4 0
#endif
#if (SIZEOF_FLOAT + SIZEOF_INT) == 8 || (SIZEOF_LONG + SIZEOF_INT) == 8 || SIZEOF_BOOL == 8
#define REQUIRE_COPY_BYTES_8 1
#else
#define REQUIRE_COPY_BYTES_8 0
#endif
#if (SIZEOF_DOUBLE + SIZEOF_INT) == 12 || (SIZEOF_LONG + SIZEOF_INT) == 12
#define REQUIRE_COPY_BYTES_12 1
#else
#define REQUIRE_COPY_BYTES_12 0
#endif
#if (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 16
#define REQUIRE_COPY_BYTES_16 1
#else
#define REQUIRE_COPY_BYTES_16 0
#endif
#if (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 20
#define REQUIRE_COPY_BYTES_20 1
#else
#define REQUIRE_COPY_BYTES_20 0
#endif
#if REQUIRE_COPY_BYTES_1
COPY_CONTIGUOUS_BYTES( bytes, 1 )
#endif /* REQUIRE_COPY_BYTES_1 */
#if REQUIRE_COPY_BYTES_2
COPY_CONTIGUOUS_BYTES( bytes, 2 )
#endif /* REQUIRE_COPY_BYTES_2 */
#if REQUIRE_COPY_BYTES_4
COPY_CONTIGUOUS_BYTES( bytes, 4 )
#endif /* REQUIRE_COPY_BYTES_4 */
#if REQUIRE_COPY_BYTES_8
COPY_CONTIGUOUS_BYTES( bytes, 8 )
#endif /* REQUIRE_COPY_BYTES_8 */
#if REQUIRE_COPY_BYTES_12
COPY_CONTIGUOUS_BYTES( bytes, 12 )
#endif /* REQUIRE_COPY_BYTES_12 */
#if REQUIRE_COPY_BYTES_16
COPY_CONTIGUOUS_BYTES( bytes, 16 )
#endif /* REQUIRE_COPY_BYTES_16 */
#if REQUIRE_COPY_BYTES_20
COPY_CONTIGUOUS_BYTES( bytes, 20 )
#endif /* REQUIRE_COPY_BYTES_20 */
/* table of predefined copy functions - one for each MPI type */
conversion_fct_t ompi_ddt_copy_functions[DT_MAX_PREDEFINED] = {
(conversion_fct_t)NULL, /* DT_LOOP */
(conversion_fct_t)NULL, /* DT_END_LOOP */
(conversion_fct_t)NULL, /* DT_LB */
(conversion_fct_t)NULL, /* DT_UB */
(conversion_fct_t)copy_char, /* DT_CHAR */
(conversion_fct_t)copy_char, /* DT_CHARACTER */
(conversion_fct_t)copy_char, /* DT_UNSIGNED_CHAR */
(conversion_fct_t)copy_char, /* DT_BYTE */
(conversion_fct_t)copy_short, /* DT_SHORT */
(conversion_fct_t)copy_short, /* DT_UNSIGNED_SHORT */
(conversion_fct_t)copy_int, /* DT_INT */
(conversion_fct_t)copy_int, /* DT_UNSIGNED_INT */
(conversion_fct_t)copy_long, /* DT_LONG */
(conversion_fct_t)copy_long, /* DT_UNSIGNED_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG */
(conversion_fct_t)copy_long_long, /* DT_LONG_LONG_INT */
(conversion_fct_t)copy_long_long, /* DT_UNSIGNED_LONG_LONG */
(conversion_fct_t)copy_float, /* DT_FLOAT */
(conversion_fct_t)copy_double, /* DT_DOUBLE */
(conversion_fct_t)copy_long_double, /* DT_LONG_DOUBLE */
(conversion_fct_t)copy_complex_float, /* DT_COMPLEX_FLOAT */
(conversion_fct_t)copy_complex_double, /* DT_COMPLEX_DOUBLE */
(conversion_fct_t)copy_complex_long_double, /* DT_COMPLEX_LONG_DOUBLE */
(conversion_fct_t)NULL, /* DT_PACKED */
#if OMPI_SIZEOF_FORTRAN_LOGICAL == 1
(conversion_fct_t)copy_bytes_1, /* DT_LOGIC */
#elif OMPI_SIZEOF_FORTRAN_LOGICAL == 4
(conversion_fct_t)copy_bytes_4, /* DT_LOGIC */
#elif 1 /* always, some compiler complain if there is not value */
NULL, /* DT_LOGIC */
#endif
#if (SIZEOF_FLOAT + SIZEOF_INT) == 8
(conversion_fct_t)copy_bytes_8, /* DT_FLOAT_INT */
#else
#error Complete me please
#endif
#if (SIZEOF_DOUBLE + SIZEOF_INT) == 12
(conversion_fct_t)copy_bytes_12, /* DT_DOUBLE_INT */
#else
#error Complete me please
#endif
#if (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 12
(conversion_fct_t)copy_bytes_12, /* DT_LONG_DOUBLE_INT */
#elif (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 16
(conversion_fct_t)copy_bytes_16, /* DT_LONG_DOUBLE_INT */
#elif (SIZEOF_LONG_DOUBLE + SIZEOF_INT) == 20
(conversion_fct_t)copy_bytes_20, /* DT_LONG_DOUBLE_INT */
#else
#error Complete me please
#endif
#if (SIZEOF_LONG + SIZEOF_INT) == 8
(conversion_fct_t)copy_bytes_8, /* DT_LONG_INT */
#elif (SIZEOF_LONG + SIZEOF_INT) == 12
(conversion_fct_t)copy_bytes_12, /* DT_LONG_INT */
#else
#error Complete me please
#endif
(conversion_fct_t)copy_2int, /* DT_2INT */
(conversion_fct_t)NULL, /* DT_SHORT_INT */
(conversion_fct_t)copy_int, /* DT_INTEGER */
(conversion_fct_t)copy_float, /* DT_REAL */
(conversion_fct_t)copy_double, /* DT_DBLPREC */
(conversion_fct_t)copy_2float, /* DT_2REAL */
(conversion_fct_t)copy_2double, /* DT_2DBLPREC */
(conversion_fct_t)copy_2int, /* DT_2INTEGER */
(conversion_fct_t)copy_wchar, /* DT_WCHAR */
(conversion_fct_t)copy_2complex_float, /* DT_2COMPLEX */
(conversion_fct_t)copy_2complex_double, /* DT_2DOUBLE_COMPLEX */
#if SIZEOF_BOOL == 1
(conversion_fct_t)copy_bytes_1, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == 4
(conversion_fct_t)copy_bytes_4, /* DT_CXX_BOOL */
#elif SIZEOF_BOOL == 8
(conversion_fct_t)copy_bytes_8, /* DT_CXX_BOOL */
#else
#error Complete me please
#endif
(conversion_fct_t)NULL, /* DT_UNAVAILABLE */
};
extern int ompi_ddt_local_sizes[DT_MAX_PREDEFINED];
int32_t
ompi_convertor_prepare_for_recv( ompi_convertor_t* convertor,
const struct ompi_datatype_t* datatype,
int32_t count,
const void* pUserBuf )
{
/* Here I should check that the data is not overlapping */
if( OMPI_SUCCESS != ompi_convertor_prepare( convertor, datatype,
count, pUserBuf ) ) {
return OMPI_ERROR;
}
convertor->flags |= CONVERTOR_RECV;
convertor->memAlloc_fn = NULL;
convertor->fAdvance = ompi_convertor_unpack_general; /* TODO: just stop complaining */
convertor->fAdvance = ompi_convertor_unpack_homogeneous; /* default behaviour */
convertor->fAdvance = ompi_convertor_generic_simple_unpack;
/* TODO: work only on homogeneous architectures */
if( convertor->pDesc->flags & DT_FLAG_CONTIGUOUS ) {
assert( convertor->flags & DT_FLAG_CONTIGUOUS );
convertor->fAdvance = ompi_convertor_unpack_homogeneous_contig;
}
return OMPI_SUCCESS;
}
/* Get the number of elements from the data associated with this convertor that can be
* retrieved from a recevied buffer with the size iSize.
* To spped-up this function you should use it with a iSize == to the modulo
* of the original size and the size of the data.
* This function should be called with a initialized clean convertor.
* Return value:
* positive = number of basic elements inside
* negative = some error occurs
*/
int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSize )
ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int rc, nbElems = 0;
int stack_pos = 0;
dt_elem_desc_t* pElems;
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
uint16_t type = DT_MAX_PREDEFINED; /* type at current position */
size_t total_unpacked = 0; /* total size unpacked this time */
dt_elem_desc_t* description;
dt_elem_desc_t* pElem;
const ompi_datatype_t *pData = pConvertor->pDesc;
char *user_memory_base, *packed_buffer;
uint32_t iov_len_local, iov_count, required_space = 0;
/* Normally the size should be less or equal to the size of the datatype.
* This function does not support a iSize bigger than the size of the datatype.
DO_DEBUG( opal_output( 0, "ompi_convertor_generic_simple_unpack( %p, {%p, %lu}, %u )\n",
(void*)pConvertor, iov[0].iov_base, (size_t)iov[0].iov_len, *out_size ); );
description = pConvertor->use_desc->desc;
/* For the first step we have to add both displacement to the source. After in the
* main while loop we will set back the source_base to the correct value. This is
* due to the fact that the convertor can stop in the middle of a data with a count
*/
assert( (uint32_t)iSize <= datatype->size );
DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize );
pStack = alloca( sizeof(dt_stack_t) * (datatype->btypes[DT_LOOP] + 2) );
pStack->count = 1;
pStack->index = -1;
pStack->disp = 0;
pElems = datatype->desc.desc;
pStack->end_loop = datatype->desc.used;
pos_desc = 0;
while( 1 ) { /* loop forever the exit conditionis on the last section */
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
stack_pos--;
user_memory_base = pConvertor->pBaseBuf;
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
user_memory_base += pStack->disp;
count_desc = pStack->count;
pStack--;
if( stack_pos == -1 )
return nbElems; /* completed */
}
if( pStack->index == -1 ) {
pStack->disp += (datatype->ub - datatype->lb);
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
}
pos_desc = pStack->index + 1;
continue;
}
if( DT_LOOP == pElems[pos_desc].elem.common.type ) {
ddt_loop_desc_t* loop = &(pElems[pos_desc].loop);
do {
PUSH_STACK( pStack, stack_pos, pos_desc, DT_LOOP, loop->loops,
0, pos_desc + loop->items );
pos_desc++;
} while( DT_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
continue;
}
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
rc = pElems[pos_desc].elem.count * basic_type->size;
if( rc >= iSize ) {
rc = iSize / basic_type->size;
nbElems += rc;
iSize -= rc * basic_type->size;
return (iSize == 0 ? nbElems : -1);
}
nbElems += pElems[pos_desc].elem.count;
iSize -= rc;
pConvertor->stack_pos--;
pElem = &(description[pos_desc]);
user_memory_base += pStack->disp;
DO_DEBUG( opal_output( 0, "unpack start pos_desc %d count_desc %d disp %ld\n"
"stack_pos %d pos_desc %d count_desc %d disp %ld\n",
pos_desc, count_desc, user_memory_base - pConvertor->pBaseBuf,
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
if( required_space > ((*max_data) - total_unpacked) )
break; /* do not pack over the boundaries even if there are more iovecs */
packed_buffer = iov[iov_count].iov_base;
iov_len_local = iov[iov_count].iov_len;
if( 0 != pConvertor->pending_length ) {
uint32_t element_length = ompi_ddt_basicDatatypes[pElem->elem.common.type]->size;
uint32_t missing_length = element_length - pConvertor->pending_length;
assert( pElem->elem.common.flags & DT_FLAG_DATA );
memcpy( pConvertor->pending + pConvertor->pending_length, packed_buffer, missing_length );
packed_buffer = pConvertor->pending;
DO_DEBUG( opal_output( 0, "unpack pending from the last unpack %d out of %d bytes\n",
pConvertor->pending_length, ompi_ddt_basicDatatypes[pElem->elem.common.type]->size ); );
UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
packed_buffer, user_memory_base, element_length );
if( 0 == count_desc ) {
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
pos_desc++; /* advance to the next data */
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
}
assert( 0 == element_length );
packed_buffer = (char*)iov[iov_count].iov_base + missing_length;
iov_len_local -= missing_length;
pConvertor->pending_length = 0; /* nothing more inside */
}
while( 1 ) {
if( DT_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
DO_DEBUG( opal_output( 0, "unpack end_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConvertor->stack_pos == 0 ) {
/* we lie about the size of the next element in order to
* make sure we exit the main loop.
*/
required_space = 0xffffffff;
pConvertor->flags |= CONVERTOR_COMPLETED;
goto complete_loop; /* completed */
}
pConvertor->stack_pos--;
pStack--;
pos_desc++;
} else {
pos_desc = pStack->index + 1;
if( pStack->index == -1 ) {
pStack->disp += (pData->ub - pData->lb);
} else {
assert( DT_LOOP == description[pStack->index].loop.common.type );
pStack->disp += description[pStack->index].loop.extent;
}
}
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
DO_DEBUG( opal_output( 0, "unpack new_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
pStack->count, pConvertor->stack_pos, pos_desc, pStack->disp, iov_len_local ); );
}
if( DT_LOOP == pElem->elem.common.type ) {
long local_disp = (long)user_memory_base;
if( pElem->loop.common.flags & DT_FLAG_CONTIGUOUS ) {
UNPACK_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
packed_buffer, user_memory_base, iov_len_local );
if( 0 == count_desc ) { /* completed */
pos_desc += pElem->loop.items + 1;
goto update_loop_description;
}
/* Save the stack with the correct last_count value. */
}
local_disp = (long)user_memory_base - local_disp;
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_LOOP, count_desc,
pStack->disp + local_disp, pos_desc + pElem->elem.disp + 1);
pos_desc++;
update_loop_description: /* update the current state */
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loop" );
continue;
}
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
UNPACK_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
packed_buffer, user_memory_base, iov_len_local );
if( 0 != count_desc ) { /* completed */
type = pElem->elem.common.type;
assert (type < DT_MAX_PREDEFINED);
required_space = ompi_ddt_basicDatatypes[type]->size;
goto complete_loop;
}
user_memory_base = pConvertor->pBaseBuf + pStack->disp;
pos_desc++; /* advance to the next data */
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
}
}
complete_loop:
if( !(pConvertor->flags & CONVERTOR_COMPLETED) && (0 != iov_len_local) ) {
/* We have some partial data here. Let's copy it into the convertor
* and keep it hot until the next round.
*/
assert (type < DT_MAX_PREDEFINED);
assert( iov_len_local < ompi_ddt_basicDatatypes[type]->size );
memcpy( pConvertor->pending, packed_buffer, iov_len_local );
DO_DEBUG( opal_output( 0, "Saving %d bytes for the next call\n", iov_len_local ); );
pConvertor->pending_length = iov_len_local;
iov_len_local = 0;
}
iov[iov_count].iov_len -= iov_len_local; /* update the amount of valid data */
total_unpacked += iov[iov_count].iov_len;
pConvertor->bConverted += iov[iov_count].iov_len; /* update the already converted bytes */
}
*max_data = total_unpacked;
*out_size = iov_count;
if( !(pConvertor->flags & CONVERTOR_COMPLETED) ) {
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, DT_BYTE, count_desc,
user_memory_base - pStack->disp - pConvertor->pBaseBuf, pos_desc );
DO_DEBUG( opal_output( 0, "unpack save stack stack_pos %d pos_desc %d count_desc %d disp %ld\n",
pConvertor->stack_pos, pStack->index, pStack->count, pStack->disp ); );
return 0;
}
return 1;
}

93
ompi/datatype/get_count.c Обычный файл
Просмотреть файл

@ -0,0 +1,93 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/datatype/datatype.h"
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
/* Get the number of elements from the data-type that can be
* retrieved from a received buffer with the size iSize.
* To speed-up this function you should use it with a iSize == to the modulo
* of the original size and the size of the data.
* Return value:
* positive = number of basic elements inside
* negative = some error occurs
*/
int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSize )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int rc, nbElems = 0;
int stack_pos = 0;
dt_elem_desc_t* pElems;
/* Normally the size should be less or equal to the size of the datatype.
* This function does not support a iSize bigger than the size of the datatype.
*/
assert( (uint32_t)iSize <= datatype->size );
DUMP( "dt_count_elements( %p, %d )\n", (void*)datatype, iSize );
pStack = alloca( sizeof(dt_stack_t) * (datatype->btypes[DT_LOOP] + 2) );
pStack->count = 1;
pStack->index = -1;
pStack->disp = 0;
pElems = datatype->desc.desc;
pStack->end_loop = datatype->desc.used;
pos_desc = 0;
while( 1 ) { /* loop forever the exit condition is on the last DT_END_LOOP */
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
stack_pos--;
pStack--;
if( stack_pos == -1 )
return nbElems; /* completed */
}
if( pStack->index == -1 ) {
pStack->disp += (datatype->ub - datatype->lb);
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
}
pos_desc = pStack->index + 1;
continue;
}
if( DT_LOOP == pElems[pos_desc].elem.common.type ) {
ddt_loop_desc_t* loop = &(pElems[pos_desc].loop);
do {
PUSH_STACK( pStack, stack_pos, pos_desc, DT_LOOP, loop->loops,
0, pos_desc + loop->items );
pos_desc++;
} while( DT_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
continue;
}
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
const ompi_datatype_t* basic_type = BASIC_DDT_FROM_ELEM(pElems[pos_desc]);
rc = pElems[pos_desc].elem.count * basic_type->size;
if( rc >= iSize ) {
rc = iSize / basic_type->size;
nbElems += rc;
iSize -= rc * basic_type->size;
return (iSize == 0 ? nbElems : -1);
}
nbElems += pElems[pos_desc].elem.count;
iSize -= rc;
pos_desc++; /* advance to the next data */
}
}
}

Просмотреть файл

@ -23,21 +23,10 @@
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <stdlib.h>
#if OMPI_ENABLE_DEBUG
int ompi_pack_debug = 0;
#define DO_DEBUG(INST) if( ompi_pack_debug ) { INST }
#else
#define DO_DEBUG(INST)
#endif /* OMPI_ENABLE_DEBUG */
#include "ompi/datatype/datatype_checksum.h"
#include "ompi/datatype/datatype_pack.h"
/* The pack/unpack functions need a cleanup. I have to create a proper interface to access
* all basic functionalities, hence using them as basic blocks for all conversion functions.
*
@ -48,8 +37,8 @@ int ompi_pack_debug = 0;
* contiguous but with a gap in the begining or at the end.
* - the DT_CONTIGUOUS flag for the type DT_END_LOOP is meaningless.
*/
int ompi_convertor_generic_simple_pack( ompi_convertor_t* pConvertor,
int32_t
ompi_generic_simple_pack_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter )

Просмотреть файл

@ -23,18 +23,6 @@
#include "ompi/datatype/convertor.h"
#include "ompi/datatype/datatype_internal.h"
#ifdef HAVE_ALLOCA_H
#include <alloca.h>
#endif
#include <stdlib.h>
#if OMPI_ENABLE_DEBUG
int ompi_unpack_debug = 0;
#define DO_DEBUG(INST) if( ompi_unpack_debug ) { INST }
#else
#define DO_DEBUG(INST)
#endif /* OMPI_ENABLE_DEBUG */
#include "ompi/datatype/datatype_checksum.h"
#include "ompi/datatype/datatype_unpack.h"
@ -48,8 +36,8 @@ int ompi_unpack_debug = 0;
* contiguous but with a gap in the begining or at the end.
* - the DT_CONTIGUOUS flag for the type DT_END_LOOP is meaningless.
*/
int ompi_convertor_generic_simple_unpack( ompi_convertor_t* pConvertor,
int32_t
ompi_generic_simple_unpack_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter )