1
1

Fix an issue identified by Thomas Jahns and his colleague when the data

representation is not correctly optimized (it is off by the extend).

During the data representation process, if the opportunity to merge several
items appear, we replace them with the new merged element. However, if one
of the components of this merged element was comming from a "loop representation"
then the new first element of this loop must have a displacement moved by the
extent of the loop.

This commit was SVN r28319.
Этот коммит содержится в:
George Bosilca 2013-04-09 23:01:54 +00:00
родитель dc47473e6d
Коммит 43e4d3654e

Просмотреть файл

@ -73,15 +73,12 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
{
dt_elem_desc_t* pElemDesc;
ddt_elem_desc_t opt_elem;
OPAL_PTRDIFF_TYPE last_disp = 0;
dt_stack_t* pStack; /* pointer to the position on the stack */
int32_t pos_desc = 0; /* actual position in the description of the derived datatype */
int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1;
int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, changes = 0;
int32_t optimized = 0, continuity;
int32_t stack_pos = 0, last_type = OPAL_DATATYPE_UINT1, last_length = 0;
int32_t type = OPAL_DATATYPE_LOOP, nbElems = 0, continuity;
OPAL_PTRDIFF_TYPE total_disp = 0, last_extent = 1, last_disp = 0;
uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */
OPAL_PTRDIFF_TYPE total_disp = 0, last_extent = 1;
int32_t last_length = 0;
uint32_t i;
pStack = (dt_stack_t*)alloca( sizeof(dt_stack_t) * (pData->btypes[OPAL_DATATYPE_LOOP]+2) );
@ -134,7 +131,8 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
/* the whole loop is contiguous */
if( !continuity ) {
if( 0 != last_length ) {
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent );
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC,
last_length, last_disp, last_extent );
pElemDesc++; nbElems++;
last_length = 0;
}
@ -144,9 +142,9 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
+ loop->loops * end_loop->size);
last_type = OPAL_DATATYPE_UINT1;
last_extent = 1;
optimized++;
} else {
int counter = loop->loops;
OPAL_PTRDIFF_TYPE merged_disp = 0;
/* if the previous data is contiguous with this piece and it has a length not ZERO */
if( last_length != 0 ) {
if( continuity ) {
@ -155,27 +153,42 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
last_type = OPAL_DATATYPE_UINT1;
last_extent = 1;
counter--;
merged_disp = loop->extent; /* merged loop, update the disp of the remaining elems */
}
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC, last_length, last_disp, last_extent );
CREATE_ELEM( pElemDesc, last_type, OPAL_DATATYPE_FLAG_BASIC,
last_length, last_disp, last_extent );
pElemDesc++; nbElems++;
last_disp += last_length;
last_length = 0;
last_type = OPAL_DATATYPE_LOOP;
}
/* we have a gap in the begining or the end of the loop but the whole
* loop can be merged in just one memcpy.
/**
* The content of the loop is contiguous (maybe with a gap before or after).
*
* If any of the loops have been merged with the previous element, then the
* displacement of the first element (or the displacement of all elements if the
* loop will be removed) must be updated accordingly.
*/
CREATE_LOOP_START( pElemDesc, counter, 2, loop->extent, loop->common.flags );
pElemDesc++; nbElems++;
CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC, end_loop->size, loop_disp, 1);
pElemDesc++; nbElems++;
CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp, end_loop->size,
end_loop->common.flags );
pElemDesc++; nbElems++;
if( loop->items > 2 ) optimized++;
if( counter <= 2 ) {
merged_disp += end_loop->first_elem_disp;
while( counter > 0 ) {
CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC,
end_loop->size, merged_disp, 1);
pElemDesc++; nbElems++; counter--;
merged_disp += loop->extent;
}
} else {
CREATE_LOOP_START( pElemDesc, counter, 2, loop->extent, loop->common.flags );
pElemDesc++; nbElems++;
CREATE_ELEM( pElemDesc, OPAL_DATATYPE_UINT1, OPAL_DATATYPE_FLAG_BASIC,
end_loop->size, loop_disp, 1);
pElemDesc++; nbElems++;
CREATE_LOOP_END( pElemDesc, 2, end_loop->first_elem_disp + merged_disp,
end_loop->size, end_loop->common.flags );
pElemDesc++; nbElems++;
}
}
pos_desc += loop->items + 1;
changes++;
} else {
ddt_elem_desc_t* elem = (ddt_elem_desc_t*)&(pData->desc.desc[pos_desc+1]);
if( last_length != 0 ) {
@ -192,7 +205,6 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
loop->loops, elem->disp, loop->extent );
pElemDesc++; nbElems++;
pos_desc += loop->items + 1;
changes++; optimized++;
goto complete_loop;
} else if( loop->loops < 3 ) {
OPAL_PTRDIFF_TYPE elem_displ = elem->disp;
@ -203,7 +215,6 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
pElemDesc++; nbElems++;
}
pos_desc += loop->items + 1;
changes += loop->loops; optimized += loop->loops;
goto complete_loop;
}
}
@ -238,7 +249,6 @@ opal_datatype_optimize_short( opal_datatype_t* pData,
pData->desc.desc[pos_desc].elem.count * opal_datatype_basicDatatypes[type]->size;
last_type = OPAL_DATATYPE_UINT1;
last_extent = 1;
optimized++;
}
}
last_flags &= pData->desc.desc[pos_desc].elem.common.flags;