1
1

Correct several bugs in the case when we are in a homogeneous environnement. Now the ddt engine handle

the contiguous datatype correctly and fast. However in the "developer mode" (OMPI_ENABLE_DEBUG set to 1)
there are additional test that are made. If you plan to look for performances please compile OpenMPI
in normal mode.
Changes:
- correctly generate the stack when we have an initial position
- same thing when we start from ZERO.
- in contiguous mode handle the partial copy of a data (using the stack)
- in the case when not all the data is packed/unpacked leave the stack in a correct state
- take care of data with size ZERO (who really want to create such things !!!)
- general pack/unpack functions handle the new stack representation
- make sure that we never goes outside the user provided buffer

This commit was SVN r3581.
Этот коммит содержится в:
George Bosilca 2004-11-16 21:27:57 +00:00
родитель 1c579699b1
Коммит 45e39e033b
5 изменённых файлов: 69 добавлений и 39 удалений

Просмотреть файл

@ -190,12 +190,12 @@ do { \
/*printf( "memcpy dest = %p src = %p length = %d\n", (void*)(DST), (void*)(SRC), (int)(BLENGTH) );*/ \
memcpy( (DST), (SRC), (BLENGTH) ); }
#if defined(DO_INTENSIVE_DEBUGGING)
#if OMPI_ENABLE_DEBUG
#define OMPI_DDT_SAFEGUARD_POINTER( ACTPTR, LENGTH, INITPTR, PDATA, COUNT ) \
ompi_ddt_safeguard_pointer( (ACTPTR), (LENGTH), (INITPTR), (PDATA), (COUNT) )
#else
#define OMPI_DDT_SAFEGUARD_POINTER( ACTPTR, LENGTH, INITPTR, PDATA, COUNT )
#endif /* DO_INTENSIVE_DEBUGGING */
#endif /* OMPI_ENABLE_DEBUG */
static inline void ompi_ddt_safeguard_pointer( void* actual_ptr, int length,
void* initial_ptr,
@ -287,7 +287,11 @@ int ompi_convertor_create_stack_with_pos_contig( ompi_convertor_t* pConvertor,
}
/* Special case for contiguous datatypes */
if( pData->size == 0 ) { /* special case for empty datatypes */
count = pConvertor->count;
} else {
count = starting_point / pData->size;
}
extent = pData->ub - pData->lb;
pStack[0].disp = count * extent;
@ -312,32 +316,48 @@ static inline
int ompi_convertor_create_stack_at_begining( ompi_convertor_t* pConvertor, int* sizes )
{
ompi_datatype_t* pData = pConvertor->pDesc;
dt_stack_t* pStack;
dt_elem_desc_t* pElems;
int index;
int index = 0;
pConvertor->stack_pos = 1;
pConvertor->stack_pos = 0;
pStack = pConvertor->pStack;
/* Fill the first position on the stack. This one correspond to the
* last fake DT_END_LOOP that we add to the data representation and
* allow us to move quickly inside the datatype when we have a count.
*/
pConvertor->pStack[0].index = -1;
pConvertor->pStack[0].count = pConvertor->count;
pConvertor->pStack[0].disp = 0;
/* first here we should select which data representation will be used for
* this operation: normal one or the optimized version ? */
pElems = pData->desc.desc;
pConvertor->pStack[0].end_loop = pData->desc.used;
pStack[0].end_loop = pData->desc.used;
if( pConvertor->flags & CONVERTOR_HOMOGENEOUS ) {
if( pData->opt_desc.used > 0 ) {
pElems = pData->opt_desc.desc;
pConvertor->pStack[0].end_loop = pData->opt_desc.used;
}
}
index = GET_FIRST_NON_LOOP(pData->desc.desc);
pConvertor->pStack[0].disp = pElems[index].disp;
pConvertor->pStack[1].index = 0;
pConvertor->pStack[1].count = pElems->count;
pConvertor->pStack[1].disp = pConvertor->pStack[0].disp;
pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
/* In the case where the datatype start with loops, we should push them on the stack.
* Otherwise when we reach the end_loop field we will pop too many entries and finish
* by overriding other places in memory. Now the big question is when to stop creating
* the entries on the stack ? Should I stop when I reach the first data element or
* should I stop on the first contiguous loop ?
*/
while( pElems[index].type == DT_LOOP ) {
dt_loop_desc_t* loop = (dt_loop_desc_t*)&(pElems[index]);
PUSH_STACK( pStack, pConvertor->stack_pos, index,
loop->loops, 0, loop->items );
index++;
}
if( pElems[index].flags & DT_FLAG_DATA ) { /* let's stop here */
PUSH_STACK( pStack, pConvertor->stack_pos, index,
pElems[index].count, pElems[index].disp, 0 );
} else {
ompi_output( 0, "Here we should have a data in the datatype description\n" );
}
/* And set the correct status */
pConvertor->converted = 0;
pConvertor->bConverted = 0;

Просмотреть файл

@ -64,13 +64,13 @@ int ompi_ddt_optimize_short( dt_desc_t* pData, int count,
lastDisp += lastLength;
lastLength = 0;
}
pStartLoop = (dt_loop_desc_t*)&(pTypeDesc->desc[pStack->index - 1]);
SAVE_ELEM( pElemDesc, DT_END_LOOP, pData->desc.desc[pos_desc].flags,
nbElems - pStack->index + 1, /* # of elems in this loop */
pData->desc.desc[pos_desc].disp,
pData->desc.desc[pos_desc].extent );
pStack--; /* go down one position on the stack */
if( --stack_pos >= 0 ) { /* still something to do ? */
pStartLoop = (dt_loop_desc_t*)&(pTypeDesc->desc[pStack->index - 1]);
pStartLoop->loops = (pElemDesc - 1)->count;
totalDisp = pStack->disp; /* update the displacement position */
}
@ -79,7 +79,7 @@ int ompi_ddt_optimize_short( dt_desc_t* pData, int count,
}
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
dt_loop_desc_t* loop = (dt_loop_desc_t*)&(pData->desc.desc[pos_desc]);
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items + 1]);
dt_endloop_desc_t* end_loop = (dt_endloop_desc_t*)&(pData->desc.desc[pos_desc + loop->items]);
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
long loop_disp = pData->desc.desc[pos_desc + index].disp;
@ -95,11 +95,12 @@ int ompi_ddt_optimize_short( dt_desc_t* pData, int count,
lastLength += loop->loops * end_loop->size;
} else {
int counter = loop->loops;
/* if the previous data is contiguous with this piece and it has a length not ZERO */
if( lastLength != 0 ) {
if( (lastDisp + lastLength) == (totalDisp + loop_disp) ) {
lastLength += end_loop->size;
counter--;
}
if( lastLength != 0 ) {
SAVE_DESC( pElemDesc, lastDisp, lastLength );
lastDisp += lastLength;
lastLength = 0;

Просмотреть файл

@ -166,12 +166,11 @@ int ompi_convertor_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
}
pStack = pConv->pStack + pConv->stack_pos;
pStack--; /* first entry never used on this case */
pos_desc = pStack->index;
lastDisp = pStack->disp;
last_count = pStack->count;
pStack--;
pConv->stack_pos -= 2;
pConv->stack_pos--;
next_loop:
while( pos_desc >= 0 ) {
@ -310,14 +309,14 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
pStack = pConv->pStack + pConv->stack_pos;
/* retrieve the context of the last call */
saveLength = pStack->end_loop;
savePos = (char*)pStack->disp;
pStack--;
pos_desc = pStack->index;
last_count = pStack->count;
lastDisp = pStack->disp;
savePos = (char*)pConv->pBaseBuf + pStack->disp;
/*saveLength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;*/
saveLength = 0;
pStack--;
pConv->stack_pos -= 2;
pConv->stack_pos--;
*freeAfter = 0;
space_on_iovec = iov[0].iov_len;
@ -382,6 +381,8 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++; /* go to the next element */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
goto next_loop;
}
while( pElems[pos_desc].type == DT_LOOP ) {
@ -431,12 +432,11 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
/* do we have enough space in the buffer ? */
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
@ -526,6 +526,8 @@ int ompi_convertor_pack_no_conversion( ompi_convertor_t* pConv,
if( iov_pos == (*out_size) ) goto end_loop;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
}
}
last_count = 0; /* complete the data */
@ -755,7 +757,7 @@ int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
pConv->stack_pos = 0; /* just to be sure */
}
pConv->flags = CONVERTOR_SEND;
pConv->flags = CONVERTOR_SEND | CONVERTOR_HOMOGENEOUS; /* by default set to homogeneous */
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (dt->ub - dt->lb);
pConv->count = count;
@ -767,7 +769,7 @@ int ompi_convertor_init_for_send( ompi_convertor_t* pConv,
pConv->fAdvance = ompi_convertor_pack_general;
pConv->fAdvance = ompi_convertor_pack_homogeneous_with_memcpy;
if( dt->flags & DT_FLAG_CONTIGUOUS ) {
pConv->flags |= DT_FLAG_CONTIGUOUS | CONVERTOR_HOMOGENEOUS;
pConv->flags |= DT_FLAG_CONTIGUOUS;
if( (pConv->pDesc->ub - pConv->pDesc->lb) == (long)pConv->pDesc->size )
pConv->fAdvance = ompi_convertor_pack_no_conv_contig;
else

Просмотреть файл

@ -174,13 +174,11 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
pElems = pData->desc.desc;
}
pStack = pConv->pStack + pConv->stack_pos;
pStack--;
DUMP_STACK( pStack, pConv->stack_pos, pElems, "starting" );
pos_desc = pStack->index;
lastDisp = pStack->disp;
last_count = pStack->count;
pStack--;
pConv->stack_pos -= 2;
pConv->stack_pos--;
next_loop:
while( pos_desc >= 0 ) {
@ -201,6 +199,8 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
pStack->disp += pElems[pStack->index].extent;
pos_desc = pStack->index + 1;
}
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
goto next_loop;
}
while( pElems[pos_desc].type == DT_LOOP ) {
@ -233,12 +233,11 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
/* do we have enough space in the buffer ? */
last_blength = last_count * ompi_ddt_basicDatatypes[pElems[pos_desc].type]->size;
if( space < last_blength ) {
@ -255,6 +254,8 @@ static int ompi_convertor_unpack_homogeneous( ompi_convertor_t* pConv,
space -= last_blength;
pSrcBuf += last_blength;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].disp;
last_count = pElems[pos_desc].count;
}
}
last_count = 0; /* complete the data */
@ -607,19 +608,20 @@ int ompi_convertor_init_for_recv( ompi_convertor_t* pConv, uint32_t flags,
pConv->stack_pos = 0;
}
pConv->flags = CONVERTOR_RECV | CONVERTOR_HOMOGENEOUS;
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (pData->ub - pData->lb);
pConv->count = count;
pConv->pFunctions = ompi_ddt_copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
pConv->fAdvance = ompi_convertor_unpack_homogeneous; /* default behaviour */
pConv->fAdvance = ompi_convertor_unpack_general; /* TODO: just stop complaining */
pConv->fAdvance = ompi_convertor_unpack_homogeneous; /* default behaviour */
pConv->memAlloc_fn = allocfn;
/* TODO: work only on homogeneous architectures */
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
pConv->flags |= DT_FLAG_CONTIGUOUS | CONVERTOR_HOMOGENEOUS;
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = ompi_convertor_unpack_homogeneous_contig;
}
if( starting_point != 0 )

Просмотреть файл

@ -59,8 +59,13 @@ int ompi_convertor_create_stack_with_pos_general( ompi_convertor_t* pConvertor,
* last fake DT_END_LOOP that we add to the data representation and
* allow us to move quickly inside the datatype when we have a count.
*/
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
pStack->count = pConvertor->count;
pStack->index = -1;
if( pConvertor->flags & CONVERTOR_HOMOGENEOUS ) {