1
1

A faster implementation (but temporary I want to play with the internal size before the final commit). And yes it can be a mca parameter :)

This commit was SVN r1220.
Этот коммит содержится в:
George Bosilca 2004-06-10 19:02:11 +00:00
родитель bcb7960080
Коммит 630c2f0c8b

Просмотреть файл

@ -569,7 +569,31 @@ int ompi_ddt_copy_content_same_ddt( dt_desc_t* pData, int count,
if( (pData->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { if( (pData->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) {
/* basic datatype with count */ /* basic datatype with count */
pSrcBuf += pData->true_lb;
pDestBuf += pData->true_lb;
if( (pData->true_ub - pData->true_lb) == pData->size ) {
/* all the data is contiguous in the memory */
if( pData->size * count < (512*1024) ) {
MEMCPY( pDestBuf, pSrcBuf, pData->size * count ); MEMCPY( pDestBuf, pSrcBuf, pData->size * count );
} else {
type = 512 * 1024;
lastLength = count * pData->size;
while( lastLength > 0 ) {
if( type > lastLength ) type = lastLength;
MEMCPY( pDestBuf, pSrcBuf, type );
pDestBuf += type;
pSrcBuf += type;
lastLength -= type;
}
}
} else {
/* there are gaps between elements */
for( type = 0; type < count; type++ ) {
MEMCPY( pDestBuf, pSrcBuf, pData->size );
pDestBuf += pData->size;
pSrcBuf += (pData->ub - pData->lb);
}
}
return 0; return 0;
} }
@ -593,18 +617,19 @@ int ompi_ddt_copy_content_same_ddt( dt_desc_t* pData, int count,
pStack->index, pStack->count ); pStack->index, pStack->count );
next_loop: next_loop:
while( pos_desc < pStack->end_loop ) { while( pos_desc >= 0 ) {
if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */ if( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */ if( --(pStack->count) == 0 ) { /* end of loop */
pStack--; pStack--;
if( --stack_pos == -1 ) break; if( --stack_pos == -1 ) break;
} else } else {
pos_desc = pStack->index; pos_desc = pStack->index;
if( pos_desc == -1 ) if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb); pStack->disp += (pData->ub - pData->lb);
else else
pStack->disp += pElems[pos_desc].extent; pStack->disp += pElems[pos_desc].extent;
pos_desc++; pos_desc++;
}
goto next_loop; goto next_loop;
} }
if( pElems[pos_desc].type == DT_LOOP ) { if( pElems[pos_desc].type == DT_LOOP ) {
@ -628,6 +653,7 @@ int ompi_ddt_copy_content_same_ddt( dt_desc_t* pData, int count,
pos_desc++; /* advance to the next data */ pos_desc++; /* advance to the next data */
} }
if( lastLength != 0 )
MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength ); MEMCPY( pDestBuf + lastDisp, pSrcBuf + lastDisp, lastLength );
/* cleanup the stack */ /* cleanup the stack */
return 0; return 0;