diff --git a/src/datatype/datatype_internal.h b/src/datatype/datatype_internal.h index 9eb0f01a80..72e560d4d7 100644 --- a/src/datatype/datatype_internal.h +++ b/src/datatype/datatype_internal.h @@ -91,6 +91,25 @@ struct __dt_stack { long disp; }; +/* These 2 typedefs are the same as the dt_elem_desc_t except + * for the name of the fields. + */ +typedef struct __dt_loop_desc { + u_int16_t flags; /**< flags for the record */ + u_int16_t type; /**< the basic data type id */ + u_int32_t count; /**< number of elements */ + long disp; /**< displacement of the first element */ + u_int32_t extent; /**< extent of each element */ +} dt_loop_desc_t; + +typedef struct __dt_endloop_desc { + u_int16_t flags; /**< flags for the record */ + u_int16_t type; /**< the basic data type id */ + u_int32_t count; /**< number of elements */ + long disp; /**< displacement of the first element */ + u_int32_t extent; /**< extent of each element */ +} dt_endloop_desc_t; + /* keep the last 16 bits free for data flags */ #define CONVERTOR_USELESS 0x00010000 #define CONVERTOR_RECV 0x00020000 @@ -104,12 +123,12 @@ struct __dt_stack { typedef struct { float r; float i; -} complex_float_t; +} lam_complex_float_t; typedef struct { double r; double i; -} complex_double_t; +} lam_complex_double_t; extern dt_desc_t basicDatatypes[DT_MAX_PREDEFINED]; diff --git a/src/datatype/dt_create.c b/src/datatype/dt_create.c index ba6a86a031..f463788cf0 100644 --- a/src/datatype/dt_create.c +++ b/src/datatype/dt_create.c @@ -168,8 +168,8 @@ DUMP_TYPE( long, long ) DUMP_TYPE( double, double ) DUMP_TYPE( long_long, long long ) DUMP_TYPE( long_double, long double ) -DUMP_TYPE( complex_float, complex_float_t ) -DUMP_TYPE( complex_double, complex_double_t ) +DUMP_TYPE( complex_float, lam_complex_float_t ) +DUMP_TYPE( complex_double, lam_complex_double_t ) static lam_convertor_t* pDumpConv = NULL; diff --git a/src/datatype/dt_module.c b/src/datatype/dt_module.c index 67d9add9a2..dbf8b94174 100644 --- a/src/datatype/dt_module.c +++ b/src/datatype/dt_module.c @@ -36,8 +36,8 @@ dt_desc_t basicDatatypes[DT_MAX_PREDEFINED] = { INIT_BASIC_DATA( float, LAM_ALIGNMENT_FLOAT, FLOAT ), INIT_BASIC_DATA( double, LAM_ALIGNMENT_DOUBLE, DOUBLE ), INIT_BASIC_DATA( long double, LAM_ALIGNMENT_LONG_DOUBLE, LONG_DOUBLE ), - INIT_BASIC_DATA( complex_float_t, LAM_ALIGNMENT_FLOAT, COMPLEX_FLOAT ), - INIT_BASIC_DATA( complex_double_t, LAM_ALIGNMENT_DOUBLE, COMPLEX_DOUBLE ), + INIT_BASIC_DATA( lam_complex_float_t, LAM_ALIGNMENT_FLOAT, COMPLEX_FLOAT ), + INIT_BASIC_DATA( lam_complex_double_t, LAM_ALIGNMENT_DOUBLE, COMPLEX_DOUBLE ), INIT_BASIC_DATA( char, LAM_ALIGNMENT_CHAR, PACKED ), INIT_BASIC_DATA( int, LAM_ALIGNMENT_INT, LOGIC ), INIT_BASIC_TYPE( DT_FLOAT_INT, FLOAT_INT ), diff --git a/src/datatype/dt_unpack.c b/src/datatype/dt_unpack.c index bc3ffb79cb..11d6ec821d 100644 --- a/src/datatype/dt_unpack.c +++ b/src/datatype/dt_unpack.c @@ -390,8 +390,8 @@ COPY_TYPE( long, long ) /*COPY_TYPE( double, double );*/ COPY_TYPE( long_long, long long ) COPY_TYPE( long_double, long double ) -COPY_TYPE( complex_float, complex_float_t ) -COPY_TYPE( complex_double, complex_double_t ) +COPY_TYPE( complex_float, lam_complex_float_t ) +COPY_TYPE( complex_double, lam_complex_double_t ) static int copy_double( unsigned int count, char* from, unsigned int from_len, long from_extent, diff --git a/src/datatype/fake_stack.c b/src/datatype/fake_stack.c index b26acc7cb5..e0911e5652 100644 --- a/src/datatype/fake_stack.c +++ b/src/datatype/fake_stack.c @@ -17,22 +17,24 @@ static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem ) } int lam_create_stack_with_pos( lam_convertor_t* pConvertor, - int starting_point, - int* sizes ) + int starting_point, int* sizes ); +int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor, + int starting_point, int* sizes ); + +int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor, + int starting_point, int* sizes ) { - long lastDisp = 0; dt_stack_t* pStack; /* pointer to the position on the stack */ int pos_desc; /* actual position in the description of the derived datatype */ - int end_loop; /* last element in the actual loop */ - int stack_pos = 0; - int type, lastLength = 0, nbElems = 0, changes = 0; + int type, lastLength = 0; long totalDisp; lam_datatype_t* pData = pConvertor->pDesc; int* remoteLength; int loop_length; + int resting_place = starting_point; + dt_elem_desc_t* pElems; if( starting_point == 0 ) { - dt_elem_desc_t* pElems; pConvertor->stack_pos = 1; pConvertor->pStack[0].index = 0; @@ -56,7 +58,7 @@ int lam_create_stack_with_pos( lam_convertor_t* pConvertor, /* if the convertor continue from the last position * there is nothing to do. */ - if( pConvertor->bConverted != starting_point ) return 0; + if( pConvertor->bConverted == starting_point ) return 0; remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] ); pStack = pConvertor->pStack; @@ -65,88 +67,190 @@ int lam_create_stack_with_pos( lam_convertor_t* pConvertor, pStack->end_loop = pData->desc.used - 1; pStack->disp = 0; pos_desc = 0; - + remoteLength[0] = 0; /* initial value set to ZERO */ + pConvertor->stack_pos = 0; + pElems = &(pData->desc.desc[pos_desc]); + next_loop: - end_loop = pStack->end_loop; totalDisp = pStack->disp; - loop_length = remoteLength[stack_pos]; - while( pos_desc <= end_loop ) { - if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */ - /* now we know the length of the loop. We can compute if the the starting_position - * will happend in this loop. + loop_length = remoteLength[pConvertor->stack_pos]; + while( pConvertor->stack_pos >= 0 ) { + if( pElems->type == DT_END_LOOP ) { /* end of the current loop */ + /* now we know the length of the loop. We can compute + * if the the starting_position will happend in one of the + * iterations of this loop. */ - remoteLength[stack_pos] = loop_length; - stack_pos--; + remoteLength[pConvertor->stack_pos] = loop_length; + if( (loop_length * pStack->count) > resting_place ) { + /* OK here we stop in this loop. First save the loop + * on the stack, then save the position of the last + * data */ + int cnt = resting_place / loop_length; + pStack->count -= cnt; + resting_place -= cnt * loop_length; + pStack->disp += cnt * pElems->extent; + pConvertor->bConverted += (cnt * loop_length); + goto next_loop; + } + /* Not in this loop. Cleanup the stack and advance to the + * next data description. + */ + pConvertor->stack_pos--; pStack--; pos_desc++; + pElems++; goto next_loop; } - if( pData->desc.desc[pos_desc].type == DT_LOOP ) { - dt_elem_desc_t* pEndLoop = &(pData->desc.desc[pos_desc + pData->desc.desc[pos_desc].disp]); - long loop_disp = GET_LOOP_DISP( &(pData->desc.desc[pos_desc]) ); - remoteLength [stack_pos] = 0; - if( pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS ) { - /* the loop is contiguous or composed by contiguous elements with a gap */ - if( pData->desc.desc[pos_desc].extent == pEndLoop->extent ) { - /* the whole loop is contiguous */ - if( (lastDisp + lastLength) != (totalDisp + loop_disp) ) { -/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */ - lastLength = 0; - lastDisp = totalDisp + loop_disp; - } - lastLength += pData->desc.desc[pos_desc].count * pEndLoop->extent; - } else { - int counter = pData->desc.desc[pos_desc].count; - if( (lastDisp + lastLength) == (totalDisp + loop_disp) ) { - lastLength += pEndLoop->extent; - counter--; - } - if( lastLength != 0 ) { -/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */ - lastDisp += lastLength; - lastLength = 0; - } - /* we have a gap in the begining or the end of the loop but the whole - * loop can be merged in just one memcpy. - */ -/* SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags, */ -/* counter, (long)2, pData->desc.desc[pos_desc].extent ); */ -/* SAVE_DESC( pElemDesc, loop_disp, pEndLoop->extent ); */ -/* SAVE_ELEM( pElemDesc, DT_END_LOOP, pEndLoop->flags, */ -/* 2, pEndLoop->disp, pEndLoop->extent ); */ - } - pos_desc += pData->desc.desc[pos_desc].disp + 1; - changes++; - } else { - if( lastLength != 0 ) { -/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */ - lastDisp += lastLength; - lastLength = 0; - } -/* SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags, */ -/* pData->desc.desc[pos_desc].count, (long)nbElems, */ -/* pData->desc.desc[pos_desc].extent ); */ - nbElems = 1; - PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count, - totalDisp, pos_desc + pData->desc.desc[pos_desc].disp ); - pos_desc++; - } + if( pElems->type == DT_LOOP ) { + remoteLength[pConvertor->stack_pos + 1] = 0; + totalDisp = pElems->disp; + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, + pData->desc.desc[pos_desc].count, + totalDisp, pos_desc + pElems->disp ); + pos_desc++; + pElems++; + loop_length = 0; /* starting a new loop */ goto next_loop; } /* now here we have a basic datatype */ - type = pData->desc.desc[pos_desc].type; - if( (lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].disp) ) { - lastLength += pData->desc.desc[pos_desc].count * basicDatatypes[type].size; + type = pElems->type; + lastLength = pElems->count * basicDatatypes[type].size; + if( resting_place > lastLength ) { + resting_place -= lastLength; + loop_length += lastLength; } else { -/* if( lastLength != 0 ) */ -/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */ - lastDisp = totalDisp + pData->desc.desc[pos_desc].disp; - lastLength = pData->desc.desc[pos_desc].count * basicDatatypes[type].size; + int cnt = resting_place / basicDatatypes[type].size; + resting_place -= cnt * basicDatatypes[type].size; + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, + pElems->count - cnt, + totalDisp + pElems->disp + cnt * pElems->extent, + pos_desc ); + pConvertor->bConverted += (starting_point - resting_place); + return 0; } pos_desc++; /* advance to the next data */ + pElems++; + } + + return 0; +} + +/* This function works for homogeneous architectures. As we keep + * trace of the size inside the loop in the END_LOOP element + * we can easily jump directly where we need. It works only + * because we can split a basic data in the middle if we + * have a optimized representation. + */ +int lam_create_stack_with_pos( lam_convertor_t* pConvertor, + int starting_point, int* sizes ) +{ + dt_stack_t* pStack; /* pointer to the position on the stack */ + int pos_desc; /* actual position in the description of the derived datatype */ + int type, lastLength = 0; + long totalDisp; + lam_datatype_t* pData = pConvertor->pDesc; + int* remoteLength; + int loop_length; + int resting_place = starting_point; + dt_elem_desc_t* pElems; + + if( starting_point == 0 ) { + + pConvertor->stack_pos = 1; + pConvertor->pStack[0].index = 0; + pConvertor->pStack[0].count = pConvertor->count; + pConvertor->pStack[0].disp = 0; + /* first here we should select which data representation will be used for + * this operation: normal one or the optimized version ? */ + if( pData->opt_desc.used > 0 ) { + pElems = pData->opt_desc.desc; + pConvertor->pStack[0].end_loop = pData->opt_desc.used; + } else { + pElems = pData->desc.desc; + pConvertor->pStack[0].end_loop = pData->desc.used; + } + pConvertor->pStack[1].index = 0; + pConvertor->pStack[1].count = pElems->count; + pConvertor->pStack[1].disp = pElems->disp; + pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop; + return 0; + } + /* if the convertor continue from the last position + * there is nothing to do. + */ + if( pConvertor->bConverted == starting_point ) return 0; + + remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] ); + pStack = pConvertor->pStack; + pStack->count = pConvertor->count; + pStack->index = -1; + pStack->end_loop = pData->desc.used - 1; + pStack->disp = 0; + pos_desc = 0; + remoteLength[0] = 0; /* initial value set to ZERO */ + pConvertor->stack_pos = 0; + pElems = &(pData->desc.desc[pos_desc]); + + next_loop: + totalDisp = pStack->disp; + loop_length = remoteLength[pConvertor->stack_pos]; + while( pConvertor->stack_pos >= 0 ) { + if( pElems->type == DT_END_LOOP ) { /* end of the current loop */ + /* now we know the length of the loop. We can compute + * if the the starting_position will happend in one of the + * iterations of this loop. + */ + remoteLength[pConvertor->stack_pos] = loop_length; + if( (loop_length * pStack->count) > resting_place ) { + /* OK here we stop in this loop. First save the loop + * on the stack, then save the position of the last + * data */ + int cnt = resting_place / loop_length; + pStack->count -= cnt; + resting_place -= cnt * loop_length; + pStack->disp += cnt * pElems->extent; + pConvertor->bConverted += (cnt * loop_length); + goto next_loop; + } + /* Not in this loop. Cleanup the stack and advance to the + * next data description. + */ + pConvertor->stack_pos--; + pStack--; + pos_desc++; + pElems++; + goto next_loop; + } + if( pElems->type == DT_LOOP ) { + remoteLength[pConvertor->stack_pos + 1] = 0; + totalDisp = pElems->disp; + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, + pData->desc.desc[pos_desc].count, + totalDisp, pos_desc + pElems->disp ); + pos_desc++; + pElems++; + loop_length = 0; /* starting a new loop */ + goto next_loop; + } + /* now here we have a basic datatype */ + type = pElems->type; + lastLength = pElems->count * basicDatatypes[type].size; + if( resting_place > lastLength ) { + resting_place -= lastLength; + loop_length += lastLength; + } else { + int cnt = resting_place / basicDatatypes[type].size; + resting_place -= cnt * basicDatatypes[type].size; + PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, + pElems->count - cnt, + totalDisp + pElems->disp + cnt * pElems->extent, + pos_desc ); + pConvertor->bConverted += (starting_point - resting_place); + return 0; + } + pos_desc++; /* advance to the next data */ + pElems++; } -/* if( lastLength != 0 ) */ -/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */ return 0; }