New version of the send data to self for the case where both the send and the receive data are
the same. This commit was SVN r8141.
Этот коммит содержится в:
родитель
e6a3a406e2
Коммит
08a29da9a9
268
ompi/datatype/dt_copy.c
Обычный файл
268
ompi/datatype/dt_copy.c
Обычный файл
@ -0,0 +1,268 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "datatype/datatype.h"
|
||||
#include "datatype/convertor.h"
|
||||
#include "datatype/datatype_internal.h"
|
||||
|
||||
#ifdef HAVE_ALLOCA_H
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
|
||||
#if OMPI_ENABLE_DEBUG
|
||||
int32_t ompi_copy_debug = 0;
|
||||
#define DO_DEBUG(INST) if( ompi_copy_debug ) { INST }
|
||||
#else
|
||||
#define DO_DEBUG(INST)
|
||||
#endif /* OMPI_ENABLE_DEBUG */
|
||||
|
||||
size_t ompi_datatype_memcpy_block_size = 128 * 1024;
|
||||
|
||||
static inline void copy_predefined_data( const dt_elem_desc_t* ELEM,
|
||||
const ompi_datatype_t* DATATYPE,
|
||||
char* SOURCE_BASE,
|
||||
int32_t TOTAL_COUNT,
|
||||
uint32_t COUNT,
|
||||
char* SOURCE,
|
||||
char* DESTINATION,
|
||||
uint32_t* SPACE )
|
||||
{
|
||||
uint32_t _copy_count = (COUNT), _copy_blength;
|
||||
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
|
||||
char* _source = (SOURCE) + _elem->disp;
|
||||
char* _destination = (DESTINATION) + _elem->disp;
|
||||
|
||||
_copy_blength = ompi_ddt_basicDatatypes[_elem->common.type]->size;
|
||||
|
||||
if( _copy_blength == (uint32_t)_elem->extent ) {
|
||||
_copy_blength *= _copy_count;
|
||||
OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE),
|
||||
(DATATYPE), (TOTAL_COUNT) );
|
||||
/* the extent and the size of the basic datatype are equals */
|
||||
DO_DEBUG( opal_output( 0, "copy 1. memcpy( %p, %p, %ld ) => space %d\n",
|
||||
_destination, _source, _copy_blength, *(SPACE) ); );
|
||||
MEMCPY( _destination, _source, _copy_blength );
|
||||
_source += _copy_blength;
|
||||
_destination += _copy_blength;
|
||||
} else {
|
||||
uint32_t _i;
|
||||
for( _i = 0; _i < _copy_count; _i++ ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_blength, (SOURCE_BASE),
|
||||
(DATATYPE), (TOTAL_COUNT) );
|
||||
DO_DEBUG( opal_output( 0, "copy 2. memcpy( %p, %p, %ld ) => space %d\n",
|
||||
_destination, _source, _copy_blength, *(SPACE) - (_i * _copy_blength) ); );
|
||||
MEMCPY( _destination, _source, _copy_blength );
|
||||
_source += _elem->extent;
|
||||
_destination += _elem->extent;
|
||||
}
|
||||
_copy_blength *= _copy_count;
|
||||
}
|
||||
*(SPACE) -= _copy_blength;
|
||||
}
|
||||
|
||||
static inline void copy_contiguous_loop( const dt_elem_desc_t* ELEM,
|
||||
const ompi_datatype_t* DATATYPE,
|
||||
char* SOURCE_BASE,
|
||||
int32_t TOTAL_COUNT,
|
||||
uint32_t COUNT,
|
||||
char* SOURCE,
|
||||
char* DESTINATION,
|
||||
uint32_t* SPACE )
|
||||
{
|
||||
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
|
||||
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + _loop->items);
|
||||
char* _source = (SOURCE) + _end_loop->first_elem_disp;
|
||||
char* _destination = (DESTINATION) + _end_loop->first_elem_disp;
|
||||
size_t _copy_loops = (COUNT);
|
||||
uint32_t _i;
|
||||
|
||||
if( _loop->extent == (long)_end_loop->size ) { /* the loop is contiguous */
|
||||
_copy_loops *= _end_loop->size;
|
||||
OMPI_DDT_SAFEGUARD_POINTER( _source, _copy_loops, (SOURCE_BASE),
|
||||
(DATATYPE), (TOTAL_COUNT) );
|
||||
MEMCPY( _destination, _source, _copy_loops );
|
||||
} else {
|
||||
for( _i = 0; _i < _copy_loops; _i++ ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( _source, _end_loop->size, (SOURCE_BASE),
|
||||
(DATATYPE), (TOTAL_COUNT) );
|
||||
DO_DEBUG( opal_output( 0, "copy 3. memcpy( %p, %p, %ld ) => space %ld\n",
|
||||
_destination, _source, _end_loop->size, *(SPACE) - _i * _end_loop->size ); );
|
||||
MEMCPY( _destination, _source, _end_loop->size );
|
||||
_source += _loop->extent;
|
||||
_destination += _loop->extent;
|
||||
}
|
||||
_copy_loops *= _end_loop->size;
|
||||
}
|
||||
*(SPACE) -= _copy_loops;
|
||||
}
|
||||
|
||||
#define COPY_PREDEFINED_DATATYPE( ELEM, DATATYPE, SOURCE_BASE, TOTAL_COUNT, COUNT, SOURCE, DESTINATION, SPACE ) \
|
||||
copy_predefined_data( (ELEM), (DATATYPE), (SOURCE_BASE), (TOTAL_COUNT), \
|
||||
(COUNT), (SOURCE), (DESTINATION), &(SPACE) )
|
||||
|
||||
#define COPY_CONTIGUOUS_LOOP( ELEM, DATATYPE, SOURCE_BASE, TOTAL_COUNT, COUNT, SOURCE, DESTINATION, SPACE ) \
|
||||
copy_contiguous_loop( (ELEM), (DATATYPE), (SOURCE_BASE), (TOTAL_COUNT), \
|
||||
(COUNT), (SOURCE), (DESTINATION), &(SPACE) )
|
||||
|
||||
#define UPDATE_INTERNAL_COUNTERS( DESCRIPTION, POSITION, ELEMENT, COUNTER ) \
|
||||
do { \
|
||||
(ELEMENT) = &((DESCRIPTION)[(POSITION)]); \
|
||||
(COUNTER) = (ELEMENT)->elem.count; \
|
||||
} while (0)
|
||||
|
||||
int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* datatype, int32_t count,
|
||||
char* destination_base, char* source_base )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int32_t stack_pos; /* index of the stack level */
|
||||
uint32_t pos_desc; /* actual position in the description of the derived datatype */
|
||||
uint32_t count_desc; /* the number of items already done in the actual pos_desc */
|
||||
dt_elem_desc_t* description;
|
||||
dt_elem_desc_t* pElem;
|
||||
uint32_t iov_len_local;
|
||||
char *source = source_base, *destination = destination_base;
|
||||
|
||||
DO_DEBUG( opal_output( 0, "ompi_ddt_copy_content_same_ddt( %p, %d, dst %p, src %p )\n",
|
||||
datatype, count, destination_base, source_base ); );
|
||||
/* empty data ? then do nothing. This should normally be trapped
|
||||
* at a higher level.
|
||||
*/
|
||||
if( 0 == count ) return 1;
|
||||
|
||||
iov_len_local = count * datatype->size;
|
||||
|
||||
/* If we have to copy a contiguous datatype then simply
|
||||
* do a memcpy.
|
||||
*/
|
||||
if( (datatype->flags & DT_FLAG_CONTIGUOUS) == DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = (datatype->ub - datatype->lb);
|
||||
/* Now that we know the datatype is contiguous, we should move the 2 pointers
|
||||
* source and destination to the correct displacement.
|
||||
*/
|
||||
destination += datatype->lb;
|
||||
source += datatype->lb;
|
||||
if( (long)datatype->size == extent ) { /* all contiguous == no gaps around */
|
||||
size_t total_length = datatype->size * count;
|
||||
size_t memcpy_chunk = ompi_datatype_memcpy_block_size;
|
||||
while( total_length > 0 ) {
|
||||
if( memcpy_chunk > total_length ) memcpy_chunk = total_length;
|
||||
OMPI_DDT_SAFEGUARD_POINTER( destination, memcpy_chunk,
|
||||
destination, datatype, count );
|
||||
DO_DEBUG( opal_output( 0, "copy c1. memcpy( %p, %p, %ld ) => space %d\n",
|
||||
destination, source, memcpy_chunk, iov_len_local ); );
|
||||
MEMCPY( destination, source, memcpy_chunk );
|
||||
destination += memcpy_chunk;
|
||||
source += memcpy_chunk;
|
||||
total_length -= memcpy_chunk;
|
||||
iov_len_local -= memcpy_chunk;
|
||||
}
|
||||
} else {
|
||||
for( pos_desc = 0; (int32_t)pos_desc < count; pos_desc++ ) {
|
||||
OMPI_DDT_SAFEGUARD_POINTER( destination, datatype->size,
|
||||
destination, datatype, count );
|
||||
DO_DEBUG( opal_output( 0, "copy c2. memcpy( %p, %p, %ld ) => space %d\n",
|
||||
destination, source, datatype->size,
|
||||
iov_len_local - (pos_desc * datatype->size) ); );
|
||||
MEMCPY( destination, source, datatype->size );
|
||||
destination += extent;
|
||||
source += extent;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
pStack = alloca( sizeof(dt_stack_t) * (datatype->btypes[DT_LOOP] + 1) );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
stack_pos = 0;
|
||||
|
||||
if( datatype->opt_desc.desc != NULL ) {
|
||||
description = datatype->opt_desc.desc;
|
||||
pStack->end_loop = datatype->opt_desc.used;
|
||||
} else {
|
||||
description = datatype->desc.desc;
|
||||
pStack->end_loop = datatype->desc.used;
|
||||
}
|
||||
|
||||
if( description[0].elem.common.type == DT_LOOP )
|
||||
count_desc = description[0].loop.loops;
|
||||
else
|
||||
count_desc = description[0].elem.count;
|
||||
pElem = &(description[pos_desc]);
|
||||
|
||||
while( 1 ) {
|
||||
if( DT_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
|
||||
DO_DEBUG( opal_output( 0, "copy end_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
|
||||
pStack->count, stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
if( stack_pos == 0 ) {
|
||||
assert( iov_len_local == 0 );
|
||||
return 0; /* completed */
|
||||
}
|
||||
stack_pos--;
|
||||
pStack--;
|
||||
pos_desc++;
|
||||
} else {
|
||||
pos_desc = pStack->index + 1;
|
||||
if( pStack->index == -1 ) {
|
||||
pStack->disp += (datatype->ub - datatype->lb);
|
||||
} else {
|
||||
assert( DT_LOOP == description[pStack->index].loop.common.type );
|
||||
pStack->disp += description[pStack->index].loop.extent;
|
||||
}
|
||||
}
|
||||
source = source_base + pStack->disp;
|
||||
destination = destination_base + pStack->disp;
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
DO_DEBUG( opal_output( 0, "copy new_loop count %d stack_pos %d pos_desc %d disp %ld space %d\n",
|
||||
pStack->count, stack_pos, pos_desc, pStack->disp, iov_len_local ); );
|
||||
}
|
||||
if( DT_LOOP == pElem->elem.common.type ) {
|
||||
long local_disp = (long)source;
|
||||
if( pElem->loop.common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||
COPY_CONTIGUOUS_LOOP( pElem, datatype, source_base, count, count_desc,
|
||||
source, destination, iov_len_local );
|
||||
assert( 0 == count_desc );
|
||||
pos_desc += pElem->loop.items + 1;
|
||||
goto update_loop_description;
|
||||
}
|
||||
local_disp = (long)source - local_disp;
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, DT_LOOP, count_desc,
|
||||
pStack->disp + local_disp, pos_desc + pElem->elem.disp + 1);
|
||||
pos_desc++;
|
||||
update_loop_description: /* update the current state */
|
||||
source = source_base + pStack->disp;
|
||||
destination = destination_base + pStack->disp;
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
DDT_DUMP_STACK( pStack, stack_pos, pElem, "advance loop" );
|
||||
continue;
|
||||
}
|
||||
while( pElem->elem.common.flags & DT_FLAG_DATA ) {
|
||||
/* now here we have a basic datatype */
|
||||
COPY_PREDEFINED_DATATYPE( pElem, datatype, source_base, count, count_desc,
|
||||
source, destination, iov_len_local );
|
||||
pos_desc++; /* advance to the next data */
|
||||
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
|
||||
}
|
||||
}
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user