1
1

Add the new dt_copy.c to the Makefile.am

Remove the old function from the dt_unpack.c and activate the new one from dt_copy.c
Add a MCA param ompi_copy_debug to get messages about the local memory copies in the new function.
Slightly change the prototype of the function to keep the compilers happy on some platforms.

This commit was SVN r8142.
Этот коммит содержится в:
George Bosilca 2005-11-13 23:00:50 +00:00
родитель 08a29da9a9
Коммит 9359be2190
4 изменённых файлов: 7 добавлений и 138 удалений

Просмотреть файл

@ -42,6 +42,7 @@ libmpi_la_SOURCES += \
datatype/fake_stack.c \
datatype/dt_args.c \
datatype/dt_arch.c \
datatype/dt_copy.c \
datatype/dt_external32.c \
datatype/dt_match_size.c \
datatype/convertor.c \

Просмотреть файл

@ -192,7 +192,7 @@ static inline int32_t ompi_ddt_is_contiguous_memory_layout( ompi_datatype_t* dat
OMPI_DECLSPEC int32_t ompi_ddt_get_element_count( const ompi_datatype_t* pData, int32_t iSize );
OMPI_DECLSPEC int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* pData, int32_t count,
char* pDestBuf, const char* pSrcBuf );
char* pDestBuf, char* pSrcBuf );
OMPI_DECLSPEC const ompi_datatype_t* ompi_ddt_match_size( int size, uint16_t datakind, uint16_t datalang );

Просмотреть файл

@ -25,8 +25,11 @@
#include "mca/base/mca_base_param.h"
extern int32_t ompi_unpack_debug;
extern int32_t ompi_pack_debug;
extern int32_t ompi_copy_debug;
#endif /* OMPI_ENABLE_DEBUG */
extern size_t ompi_datatype_memcpy_block_size;
/* by default the debuging is turned off */
int ompi_ddt_dfd = -1;
@ -539,6 +542,8 @@ int32_t ompi_ddt_init( void )
false, false, 0, &ompi_unpack_debug );
mca_base_param_reg_int_name( "datatype", "pack_debug", "Non zero lead to output generated by the pack functions",
false, false, 0, &ompi_pack_debug );
mca_base_param_reg_int_name( "datatype", "copy_debug", "Non zero lead to output generated by the local copy functions",
false, false, 0, &ompi_copy_debug );
#endif /* OMPI_ENABLE_DEBUG */
ompi_ddt_default_convertors_init();

Просмотреть файл

@ -766,140 +766,3 @@ int32_t ompi_ddt_get_element_count( const ompi_datatype_t* datatype, int32_t iSi
}
}
}
int32_t ompi_ddt_copy_content_same_ddt( const ompi_datatype_t* datatype, int32_t count,
char* pDestBuf, const char* pSrcBuf )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int stack_pos = 0, i;
long lastDisp = 0, lastLength = 0;
dt_elem_desc_t* pElems;
if( !(datatype->flags & DT_FLAG_COMMITED) ) { /* datatype not committed */
return OMPI_ERROR;
}
/* empty data ? then do nothing. This should normally be trapped
* at a higher level.
*/
if( count == 0 ) return OMPI_SUCCESS;
/* If we have to copy a contiguous datatype then simply
* do a memcpy.
*/
if( (datatype->flags & DT_FLAG_CONTIGUOUS) == DT_FLAG_CONTIGUOUS ) {
long extent = (datatype->ub - datatype->lb);
/* Now that we know the datatype is contiguous, we should move the 2 pointers
* source and destination to the correct displacement.
*/
pDestBuf += datatype->lb;
pSrcBuf += datatype->lb;
if( (long)datatype->size == extent ) { /* all contiguous == no gaps around */
int total_length = datatype->size * count;
lastLength = 128 * 1024;
if( lastLength > total_length ) lastLength = total_length;
while( total_length > 0 ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf, lastLength,
pDestBuf, datatype, count );
MEMCPY( pDestBuf, pSrcBuf, lastLength );
pDestBuf += lastLength;
pSrcBuf += lastLength;
total_length -= lastLength;
if( lastLength > total_length ) lastLength = total_length;
}
} else {
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf, datatype->size,
pDestBuf, datatype, count );
MEMCPY( pDestBuf, pSrcBuf, datatype->size );
pDestBuf += extent;
pSrcBuf += extent;
}
}
return 0;
}
pStack = alloca( sizeof(dt_stack_t) * (datatype->btypes[DT_LOOP] + 1) );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( datatype->opt_desc.desc != NULL ) {
pElems = datatype->opt_desc.desc;
pStack->end_loop = datatype->opt_desc.used;
} else {
pElems = datatype->desc.desc;
pStack->end_loop = datatype->desc.used;
}
while( 1 ) {
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) {
goto end_loop;
}
DDT_DUMP_STACK( pStack, stack_pos, pElems, "loop finish" );
pos_desc++;
} else {
DDT_DUMP_STACK( pStack, stack_pos, pElems, "decrease loop count" );
if( pStack->index == -1 ) {
pStack->disp += (datatype->ub - datatype->lb);
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
}
pos_desc = pStack->index + 1;
}
}
if( DT_LOOP == pElems[pos_desc].elem.common.type ) {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, DT_LOOP, pElems[pos_desc].loop.loops,
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
pos_desc++;
} while( DT_LOOP == pElems[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pStack, stack_pos, pElems, "advance loops" );
}
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
if( (lastDisp + lastLength) != (pStack->disp + pElems[pos_desc].elem.disp) ) {
/* If now contiguous with the previous piece of data then first save
* the previous one...
*/
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, datatype, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
lastLength = 0;
}
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
/* a contiguous piece of memory. Just add it to the actual one. Notice that if this
* datatype is not contiguous with the previous one, then the old one is already
* copied. Thus we just have to increase the amount ...
*/
lastLength += pElems[pos_desc].elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
} else {
/* basic datatype but with an extent different that the size. Try to add the first
* one to the previous piece of memory ...
*/
lastLength += BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
for( i = 0; i < ((int)pElems[pos_desc].elem.count - 1); i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, datatype, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
lastDisp += pElems[pos_desc].elem.disp;
lastLength = BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
}
}
pos_desc++; /* advance to the next data */
}
}
end_loop:
if( lastLength != 0 ) {
OMPI_DDT_SAFEGUARD_POINTER( pDestBuf + lastDisp, lastLength,
pDestBuf, datatype, count );
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
}
/* cleanup the stack */
return OMPI_SUCCESS;
}