From 3219b917b97ca112ec39e79a168eae755ffdcf7b Mon Sep 17 00:00:00 2001 From: George Bosilca Date: Fri, 16 Jun 2006 03:00:20 +0000 Subject: [PATCH] Generate more optimal internal data representations. This commit was SVN r10382. --- ompi/datatype/dt_optimize.c | 46 ++++++++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/ompi/datatype/dt_optimize.c b/ompi/datatype/dt_optimize.c index 3805e77425..6bef07c8cc 100644 --- a/ompi/datatype/dt_optimize.c +++ b/ompi/datatype/dt_optimize.c @@ -77,6 +77,7 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData, uint16_t last_flags = 0xFFFF; /* keep all for the first datatype */ long total_disp = 0; int32_t optimized = 0, continuity; + uint32_t i; pStack = alloca( sizeof(dt_stack_t) * (pData->btypes[DT_LOOP]+2) ); SAVE_STACK( pStack, -1, 0, count, 0, pData->desc.used ); @@ -179,21 +180,40 @@ ompi_ddt_optimize_short( ompi_datatype_t* pData, last_length = 0; last_type = DT_LOOP; } - if( (2 == loop->items) && (1 == elem->count) - && (elem->extent == (long)ompi_ddt_basicDatatypes[elem->common.type]->size) ) { - CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~DT_FLAG_CONTIGUOUS, - loop->loops, elem->disp, loop->extent ); - pElemDesc++; nbElems++; - pos_desc += loop->items + 1; - changes++; optimized++; - } else { - CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); - pElemDesc++; nbElems++; - PUSH_STACK( pStack, stack_pos, nbElems, DT_LOOP, loop->loops, total_disp, pos_desc + loop->extent ); - pos_desc++; - DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); + if( 2 == loop->items ) { /* small loop */ + if( (1 == elem->count) + && (elem->extent == (long)ompi_ddt_basicDatatypes[elem->common.type]->size) ) { + CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags & ~DT_FLAG_CONTIGUOUS, + loop->loops, elem->disp, loop->extent ); + pElemDesc++; nbElems++; + pos_desc += loop->items + 1; + changes++; optimized++; + goto complete_loop; + } else if( (elem->extent * (int32_t)elem->count) == loop->extent ) { + CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags, + loop->loops * elem->count, elem->disp, loop->extent ); + pElemDesc++; nbElems++; + pos_desc += loop->items + 1; + changes++; optimized++; + goto complete_loop; + } else if( loop->loops < 3 ) { + for( i = 0; i < loop->loops; i++ ) { + CREATE_ELEM( pElemDesc, elem->common.type, elem->common.flags, + elem->count, elem->disp, loop->extent ); + pElemDesc++; nbElems++; + } + pos_desc += loop->items + 1; + changes += loop->loops; optimized += loop->loops; + goto complete_loop; + } } + CREATE_LOOP_START( pElemDesc, loop->loops, loop->items, loop->extent, loop->common.flags ); + pElemDesc++; nbElems++; + PUSH_STACK( pStack, stack_pos, nbElems, DT_LOOP, loop->loops, total_disp, pos_desc + loop->extent ); + pos_desc++; + DDT_DUMP_STACK( pStack, stack_pos, pData->desc.desc, "advance loops" ); } + complete_loop: total_disp = pStack->disp; /* update the displacement */ continue; }