More optimization of the data-type description are now possibles. Some corner cases are corrected. As a result we discover more accurately the contiguous part of the data memory layout.
This commit was SVN r8051.
Этот коммит содержится в:
родитель
78522ed454
Коммит
025a8a04c5
@ -545,7 +545,6 @@ int32_t ompi_ddt_init( void )
|
|||||||
false, false, 0, &ompi_unpack_debug );
|
false, false, 0, &ompi_unpack_debug );
|
||||||
mca_base_param_reg_int_name( "datatype", "pack_debug", "Non zero lead to output generated by the pack functions",
|
mca_base_param_reg_int_name( "datatype", "pack_debug", "Non zero lead to output generated by the pack functions",
|
||||||
false, false, 0, &ompi_pack_debug );
|
false, false, 0, &ompi_pack_debug );
|
||||||
|
|
||||||
#endif /* OMPI_ENABLE_DEBUG */
|
#endif /* OMPI_ENABLE_DEBUG */
|
||||||
|
|
||||||
ompi_ddt_default_convertors_init();
|
ompi_ddt_default_convertors_init();
|
||||||
|
@ -76,7 +76,7 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
|
|||||||
int32_t type = DT_LOOP, last_length = 0, nbElems = 0, changes = 0, last_extent = 1;
|
int32_t type = DT_LOOP, last_length = 0, nbElems = 0, changes = 0, last_extent = 1;
|
||||||
uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */
|
uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */
|
||||||
long total_disp = 0;
|
long total_disp = 0;
|
||||||
int32_t optimized = 0;
|
int32_t optimized = 0, continuity;
|
||||||
|
|
||||||
pStack = alloca( sizeof(dt_stack_t) * (pData->btypes[DT_LOOP]+2) );
|
pStack = alloca( sizeof(dt_stack_t) * (pData->btypes[DT_LOOP]+2) );
|
||||||
SAVE_STACK( pStack, -1, 0, count, 0, pData->desc.used );
|
SAVE_STACK( pStack, -1, 0, count, 0, pData->desc.used );
|
||||||
@ -87,7 +87,11 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
|
|||||||
|
|
||||||
SET_EMPTY_ELEMENT( &opt_elem );
|
SET_EMPTY_ELEMENT( &opt_elem );
|
||||||
assert( DT_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type );
|
assert( DT_END_LOOP == pData->desc.desc[pData->desc.used].elem.common.type );
|
||||||
|
opt_elem.common.type = DT_LOOP;
|
||||||
|
opt_elem.common.flags = 0xFFFF; /* keep all for the first datatype */
|
||||||
|
opt_elem.count = 0;
|
||||||
opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp;
|
opt_elem.disp = pData->desc.desc[pData->desc.used].end_loop.first_elem_disp;
|
||||||
|
opt_elem.extent = 0;
|
||||||
|
|
||||||
while( stack_pos >= 0 ) {
|
while( stack_pos >= 0 ) {
|
||||||
if( DT_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
|
if( DT_END_LOOP == pData->desc.desc[pos_desc].elem.common.type ) { /* end of the current loop */
|
||||||
@ -116,12 +120,13 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
|
|||||||
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
|
int index = GET_FIRST_NON_LOOP( &(pData->desc.desc[pos_desc]) );
|
||||||
long loop_disp = pData->desc.desc[pos_desc + index].elem.disp;
|
long loop_disp = pData->desc.desc[pos_desc + index].elem.disp;
|
||||||
|
|
||||||
|
continuity = ((last_disp + last_length * (long)ompi_ddt_basicDatatypes[last_type]->size)
|
||||||
|
== (total_disp + loop_disp));
|
||||||
if( loop->common.flags & DT_FLAG_CONTIGUOUS ) {
|
if( loop->common.flags & DT_FLAG_CONTIGUOUS ) {
|
||||||
/* the loop is contiguous or composed by contiguous elements with a gap */
|
/* the loop is contiguous or composed by contiguous elements with a gap */
|
||||||
if( loop->extent == (long)end_loop->size ) {
|
if( loop->extent == (long)end_loop->size ) {
|
||||||
/* the whole loop is contiguous */
|
/* the whole loop is contiguous */
|
||||||
if( (last_disp + last_length * (long)ompi_ddt_basicDatatypes[last_type]->size)
|
if( !continuity ) {
|
||||||
!= (total_disp + loop_disp) ) {
|
|
||||||
if( 0 != last_length ) {
|
if( 0 != last_length ) {
|
||||||
CREATE_ELEM( pElemDesc, last_type, DT_FLAG_BASIC, last_length, last_disp, last_extent );
|
CREATE_ELEM( pElemDesc, last_type, DT_FLAG_BASIC, last_length, last_disp, last_extent );
|
||||||
pElemDesc++; nbElems++;
|
pElemDesc++; nbElems++;
|
||||||
@ -129,8 +134,8 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
|
|||||||
}
|
}
|
||||||
last_disp = total_disp + loop_disp;
|
last_disp = total_disp + loop_disp;
|
||||||
}
|
}
|
||||||
last_length = last_length * ompi_ddt_basicDatatypes[last_type]->size
|
last_length = (last_length * ompi_ddt_basicDatatypes[last_type]->size
|
||||||
+ loop->loops * end_loop->size;
|
+ loop->loops * end_loop->size);
|
||||||
last_type = DT_BYTE;
|
last_type = DT_BYTE;
|
||||||
last_extent = 1;
|
last_extent = 1;
|
||||||
optimized++;
|
optimized++;
|
||||||
@ -138,7 +143,7 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
|
|||||||
int counter = loop->loops;
|
int counter = loop->loops;
|
||||||
/* if the previous data is contiguous with this piece and it has a length not ZERO */
|
/* if the previous data is contiguous with this piece and it has a length not ZERO */
|
||||||
if( last_length != 0 ) {
|
if( last_length != 0 ) {
|
||||||
if( (last_disp + last_length) == (total_disp + loop_disp) ) {
|
if( continuity ) {
|
||||||
last_length *= ompi_ddt_basicDatatypes[last_type]->size;
|
last_length *= ompi_ddt_basicDatatypes[last_type]->size;
|
||||||
last_length += end_loop->size;
|
last_length += end_loop->size;
|
||||||
last_type = DT_BYTE;
|
last_type = DT_BYTE;
|
||||||
@ -185,9 +190,10 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData,
|
|||||||
while( pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
|
while( pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_DATA ) { /* keep doing it until we reach a non datatype element */
|
||||||
/* now here we have a basic datatype */
|
/* now here we have a basic datatype */
|
||||||
type = pData->desc.desc[pos_desc].elem.common.type;
|
type = pData->desc.desc[pos_desc].elem.common.type;
|
||||||
|
continuity = ((last_disp + last_length * (long)ompi_ddt_basicDatatypes[last_type]->size)
|
||||||
|
== (total_disp + pData->desc.desc[pos_desc].elem.disp));
|
||||||
|
|
||||||
if( (pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS) &&
|
if( (pData->desc.desc[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS) && continuity &&
|
||||||
(last_disp + last_length) == (total_disp + pData->desc.desc[pos_desc].elem.disp) &&
|
|
||||||
(pData->desc.desc[pos_desc].elem.extent == (int32_t)ompi_ddt_basicDatatypes[type]->size) ) {
|
(pData->desc.desc[pos_desc].elem.extent == (int32_t)ompi_ddt_basicDatatypes[type]->size) ) {
|
||||||
if( type == last_type ) {
|
if( type == last_type ) {
|
||||||
last_length += pData->desc.desc[pos_desc].elem.count;
|
last_length += pData->desc.desc[pos_desc].elem.count;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user