1
1

Add some macros in the begining of the file. They are not used right now, but they will be in few days.

Do not ignore the type and extent of the last optimized basic type in some special cases.

Update the last fake END_LOOpP with the correct value for the first_elem_disp field.

This commit was SVN r8023.
Этот коммит содержится в:
George Bosilca 2005-11-07 23:17:00 +00:00
родитель 53cb3c2bee
Коммит f7359e24d6

Просмотреть файл

@ -27,17 +27,53 @@
#endif
#include <stdlib.h>
#define SET_EMPTY_ELEMENT( ELEM ) \
do { \
ddt_elem_desc_t* _elem = (ELEM); \
_elem->common.flags = DT_FLAG_BASIC; \
_elem->common.type = DT_LOOP; \
_elem->count = 0; \
_elem->disp = 0; \
_elem->extent = 0; \
} while (0)
static inline int SAVE_OPTIMIZED_ELEMENT( dt_elem_desc_t* pElemDesc,
ddt_elem_desc_t* opt_elem )
{
if( 0 != opt_elem->count ) {
pElemDesc->elem = *opt_elem;
SET_EMPTY_ELEMENT( opt_elem );
}
return 0;
}
static inline int ADD_ELEMENT( dt_elem_desc_t* pElemDesc,
ddt_elem_desc_t* opt_elem,
uint16_t type, uint32_t count, long disp, int32_t extent )
{
if( 0 == opt_elem->count ) {
opt_elem->common.flags = DT_FLAG_BASIC;
opt_elem->common.type = type;
opt_elem->count = count;
opt_elem->disp = disp;
opt_elem->extent = extent;
return 0;
}
return 1;
}
static int32_t
ompi_ddt_optimize_short( ompi_datatype_t* pData,
int32_t count,
dt_type_desc_t* pTypeDesc )
{
dt_elem_desc_t* pElemDesc;
ddt_elem_desc_t opt_elem;
long last_disp = 0;
dt_stack_t* pStack; /* pointer to the position on the stack */
int32_t pos_desc = 0; /* actual position in the description of the derived datatype */
int32_t stack_pos = 0, last_type = DT_BYTE;
int32_t type = DT_BYTE, last_length = 0, nbElems = 0, changes = 0, last_extent = 1;
int32_t type = DT_LOOP, last_length = 0, nbElems = 0, changes = 0, last_extent = 1;
uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */
long total_disp = 0;
int32_t optimized = 0;
@ -49,6 +85,10 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
pTypeDesc->desc = pElemDesc = (dt_elem_desc_t*)malloc( sizeof(dt_elem_desc_t) * pTypeDesc->length );
pTypeDesc->used = 0;
SET_EMPTY_ELEMENT( &opt_elem );
assert( DT_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type );
opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp;
while( stack_pos >= 0 ) {
if( DT_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
ddt_endloop_desc_t* end_loop = &(pData->desc.desc[pos_desc].end_loop);
@ -59,7 +99,7 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
last_length = 0;
}
CREATE_LOOP_END( pElemDesc, nbElems - pStack->index + 1, /* # of elems in this loop */
end_loop->total_extent, end_loop->size, end_loop->common.flags );
end_loop->first_elem_disp, end_loop->size, end_loop->common.flags );
pElemDesc++; nbElems++;
if( --stack_pos >= 0 ) { /* still something to do ? */
ddt_loop_desc_t* pStartLoop = &(pTypeDesc->desc[pStack->index - 1].loop);
@ -80,7 +120,8 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
/* the loop is contiguous or composed by contiguous elements with a gap */
if( loop->extent == (long)end_loop->size ) {
/* the whole loop is contiguous */
if( (last_disp + last_length) != (total_disp + loop_disp) ) {
if( (last_disp + last_length * (long)ompi_ddt_basicDatatypes[last_type]->size)
!= (total_disp + loop_disp) ) {
if( 0 != last_length ) {
CREATE_ELEM( pElemDesc, last_type, DT_FLAG_BASIC, last_length, last_disp, last_extent );
pElemDesc++; nbElems++;
@ -88,7 +129,10 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
}
last_disp = total_disp + loop_disp;
}
last_length += loop->loops * end_loop->size;
last_length = last_length * ompi_ddt_basicDatatypes[last_type]->size
+ loop->loops * end_loop->size;
last_type = DT_BYTE;
last_extent = 1;
optimized++;
} else {
int counter = loop->loops;
@ -97,34 +141,37 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
if( (last_disp + last_length) == (total_disp + loop_disp) ) {
last_length *= ompi_ddt_basicDatatypes[last_type]->size;
last_length += end_loop->size;
last_type = DT_BYTE;
last_type = DT_BYTE;
last_extent = 1;
counter--;
}
CREATE_ELEM( pElemDesc, last_type, DT_FLAG_BASIC, last_length, last_disp, last_extent );
pElemDesc++; nbElems++;
last_disp += last_length;
last_length = 0;
last_type = DT_LOOP;
}
/* we have a gap in the begining or the end of the loop but the whole
* loop can be merged in just one memcpy.
*/
CREATE_LOOP_START( pElemDesc, counter, (long)2, loop->extent, loop->common.flags );
pElemDesc++; nbElems++;
CREATE_ELEM( pElemDesc, last_type, DT_FLAG_BASIC, end_loop->size, loop_disp, last_extent );
CREATE_ELEM( pElemDesc, DT_BYTE, DT_FLAG_BASIC, end_loop->size, loop_disp, 1);
pElemDesc++; nbElems++;
CREATE_LOOP_END( pElemDesc, 2, end_loop->total_extent, end_loop->size,
CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp, end_loop->size,
end_loop->common.flags );
pElemDesc++; nbElems++;
if( loop->items > 2 ) optimized++;
}
pos_desc += pData->desc.desc[pos_desc].loop.items + 1;
pos_desc += loop->items + 1;
changes++;
} else {
if( last_length != 0 ) {
CREATE_ELEM( pElemDesc, last_type, DT_FLAG_BASIC, last_length, last_disp, last_extent );
pElemDesc++; nbElems++;
last_disp += last_length;
last_disp += last_length;
last_length = 0;
last_type = DT_LOOP;
}
CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags );
pElemDesc++; nbElems++;
@ -174,7 +221,7 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
}
if( last_length != 0 ) {
CREATE_ELEM( pElemDesc, DT_BYTE, DT_FLAG_BASIC, last_length, last_disp, last_extent );
CREATE_ELEM( pElemDesc, last_type, DT_FLAG_BASIC, last_length, last_disp, last_extent );
pElemDesc++; nbElems++;
}
/* cleanup the stack */
@ -186,18 +233,31 @@ int32_t ompi_ddt_commit( ompi_datatype_t** data )
{
ompi_datatype_t* pData = *data;
ddt_endloop_desc_t* pLast = &(pData->desc.desc[pData->desc.used].end_loop);
long first_elem_disp = 0;
if( pData->flags & DT_FLAG_COMMITED ) return OMPI_SUCCESS;
pData->flags |= DT_FLAG_COMMITED;
/* We have to compute the displacement of the first non loop item in the
* description.
*/
if( 0 != pData->size ) {
int index;
dt_elem_desc_t* pElem = pData->desc.desc;
index = GET_FIRST_NON_LOOP( pElem );
assert( pData->desc.desc[index].elem.common.flags & DT_FLAG_DATA );
first_elem_disp = pData->desc.desc[index].elem.disp;
}
/* let's add a fake element at the end just to avoid useless comparaisons
* in pack/unpack functions.
*/
pLast->common.type = DT_END_LOOP;
pLast->common.flags = 0;
pLast->items = pData->desc.used;
pLast->total_extent = pData->ub - pData->lb;
pLast->size = pData->size;
pLast->common.type = DT_END_LOOP;
pLast->common.flags = 0;
pLast->items = pData->desc.used;
pLast->first_elem_disp = first_elem_disp;
pLast->size = pData->size;
/* If there is no datatype description how can we have an optimized description ? */
if( 0 == pData->desc.used ) {
@ -216,11 +276,11 @@ int32_t ompi_ddt_commit( ompi_datatype_t** data )
* in pack/unpack functions.
*/
pLast = &(pData->opt_desc.desc[pData->opt_desc.used].end_loop);
pLast->common.type = DT_END_LOOP;
pLast->common.flags = 0;
pLast->items = pData->opt_desc.used;
pLast->total_extent = pData->ub - pData->lb;
pLast->size = pData->size;
pLast->common.type = DT_END_LOOP;
pLast->common.flags = 0;
pLast->items = pData->opt_desc.used;
pLast->first_elem_disp = first_elem_disp;
pLast->size = pData->size;
}
return OMPI_SUCCESS;
}