Prefix complex_float_t and complex_double_t with lam_
Add 2 new structures for the element description, dt_loop_desc_t and dt_endloop_desc_t. It's just a easy way to describe those 2 particular elems. Add 2 functions to create a fake stack (homogeneous and heterogeneous environments). They are used to start packing/unpacking the data from an arbitrary offset. This commit was SVN r1083.
Этот коммит содержится в:
родитель
673eb325f1
Коммит
d355978053
@ -91,6 +91,25 @@ struct __dt_stack {
|
||||
long disp;
|
||||
};
|
||||
|
||||
/* These 2 typedefs are the same as the dt_elem_desc_t except
|
||||
* for the name of the fields.
|
||||
*/
|
||||
typedef struct __dt_loop_desc {
|
||||
u_int16_t flags; /**< flags for the record */
|
||||
u_int16_t type; /**< the basic data type id */
|
||||
u_int32_t count; /**< number of elements */
|
||||
long disp; /**< displacement of the first element */
|
||||
u_int32_t extent; /**< extent of each element */
|
||||
} dt_loop_desc_t;
|
||||
|
||||
typedef struct __dt_endloop_desc {
|
||||
u_int16_t flags; /**< flags for the record */
|
||||
u_int16_t type; /**< the basic data type id */
|
||||
u_int32_t count; /**< number of elements */
|
||||
long disp; /**< displacement of the first element */
|
||||
u_int32_t extent; /**< extent of each element */
|
||||
} dt_endloop_desc_t;
|
||||
|
||||
/* keep the last 16 bits free for data flags */
|
||||
#define CONVERTOR_USELESS 0x00010000
|
||||
#define CONVERTOR_RECV 0x00020000
|
||||
@ -104,12 +123,12 @@ struct __dt_stack {
|
||||
typedef struct {
|
||||
float r;
|
||||
float i;
|
||||
} complex_float_t;
|
||||
} lam_complex_float_t;
|
||||
|
||||
typedef struct {
|
||||
double r;
|
||||
double i;
|
||||
} complex_double_t;
|
||||
} lam_complex_double_t;
|
||||
|
||||
extern dt_desc_t basicDatatypes[DT_MAX_PREDEFINED];
|
||||
|
||||
|
@ -168,8 +168,8 @@ DUMP_TYPE( long, long )
|
||||
DUMP_TYPE( double, double )
|
||||
DUMP_TYPE( long_long, long long )
|
||||
DUMP_TYPE( long_double, long double )
|
||||
DUMP_TYPE( complex_float, complex_float_t )
|
||||
DUMP_TYPE( complex_double, complex_double_t )
|
||||
DUMP_TYPE( complex_float, lam_complex_float_t )
|
||||
DUMP_TYPE( complex_double, lam_complex_double_t )
|
||||
|
||||
static lam_convertor_t* pDumpConv = NULL;
|
||||
|
||||
|
@ -36,8 +36,8 @@ dt_desc_t basicDatatypes[DT_MAX_PREDEFINED] = {
|
||||
INIT_BASIC_DATA( float, LAM_ALIGNMENT_FLOAT, FLOAT ),
|
||||
INIT_BASIC_DATA( double, LAM_ALIGNMENT_DOUBLE, DOUBLE ),
|
||||
INIT_BASIC_DATA( long double, LAM_ALIGNMENT_LONG_DOUBLE, LONG_DOUBLE ),
|
||||
INIT_BASIC_DATA( complex_float_t, LAM_ALIGNMENT_FLOAT, COMPLEX_FLOAT ),
|
||||
INIT_BASIC_DATA( complex_double_t, LAM_ALIGNMENT_DOUBLE, COMPLEX_DOUBLE ),
|
||||
INIT_BASIC_DATA( lam_complex_float_t, LAM_ALIGNMENT_FLOAT, COMPLEX_FLOAT ),
|
||||
INIT_BASIC_DATA( lam_complex_double_t, LAM_ALIGNMENT_DOUBLE, COMPLEX_DOUBLE ),
|
||||
INIT_BASIC_DATA( char, LAM_ALIGNMENT_CHAR, PACKED ),
|
||||
INIT_BASIC_DATA( int, LAM_ALIGNMENT_INT, LOGIC ),
|
||||
INIT_BASIC_TYPE( DT_FLOAT_INT, FLOAT_INT ),
|
||||
|
@ -390,8 +390,8 @@ COPY_TYPE( long, long )
|
||||
/*COPY_TYPE( double, double );*/
|
||||
COPY_TYPE( long_long, long long )
|
||||
COPY_TYPE( long_double, long double )
|
||||
COPY_TYPE( complex_float, complex_float_t )
|
||||
COPY_TYPE( complex_double, complex_double_t )
|
||||
COPY_TYPE( complex_float, lam_complex_float_t )
|
||||
COPY_TYPE( complex_double, lam_complex_double_t )
|
||||
|
||||
static int copy_double( unsigned int count,
|
||||
char* from, unsigned int from_len, long from_extent,
|
||||
|
@ -17,22 +17,24 @@ static inline long GET_LOOP_DISP( dt_elem_desc_t* _pElem )
|
||||
}
|
||||
|
||||
int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
|
||||
int starting_point,
|
||||
int* sizes )
|
||||
int starting_point, int* sizes );
|
||||
int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor,
|
||||
int starting_point, int* sizes );
|
||||
|
||||
int lam_create_stack_with_pos_general( lam_convertor_t* pConvertor,
|
||||
int starting_point, int* sizes )
|
||||
{
|
||||
long lastDisp = 0;
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int end_loop; /* last element in the actual loop */
|
||||
int stack_pos = 0;
|
||||
int type, lastLength = 0, nbElems = 0, changes = 0;
|
||||
int type, lastLength = 0;
|
||||
long totalDisp;
|
||||
lam_datatype_t* pData = pConvertor->pDesc;
|
||||
int* remoteLength;
|
||||
int loop_length;
|
||||
int resting_place = starting_point;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
if( starting_point == 0 ) {
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pConvertor->stack_pos = 1;
|
||||
pConvertor->pStack[0].index = 0;
|
||||
@ -56,7 +58,7 @@ int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
|
||||
/* if the convertor continue from the last position
|
||||
* there is nothing to do.
|
||||
*/
|
||||
if( pConvertor->bConverted != starting_point ) return 0;
|
||||
if( pConvertor->bConverted == starting_point ) return 0;
|
||||
|
||||
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
|
||||
pStack = pConvertor->pStack;
|
||||
@ -65,88 +67,190 @@ int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
|
||||
pStack->end_loop = pData->desc.used - 1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
remoteLength[0] = 0; /* initial value set to ZERO */
|
||||
pConvertor->stack_pos = 0;
|
||||
pElems = &(pData->desc.desc[pos_desc]);
|
||||
|
||||
next_loop:
|
||||
end_loop = pStack->end_loop;
|
||||
totalDisp = pStack->disp;
|
||||
loop_length = remoteLength[stack_pos];
|
||||
while( pos_desc <= end_loop ) {
|
||||
if( pData->desc.desc[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
/* now we know the length of the loop. We can compute if the the starting_position
|
||||
* will happend in this loop.
|
||||
loop_length = remoteLength[pConvertor->stack_pos];
|
||||
while( pConvertor->stack_pos >= 0 ) {
|
||||
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
|
||||
/* now we know the length of the loop. We can compute
|
||||
* if the the starting_position will happend in one of the
|
||||
* iterations of this loop.
|
||||
*/
|
||||
remoteLength[stack_pos] = loop_length;
|
||||
stack_pos--;
|
||||
remoteLength[pConvertor->stack_pos] = loop_length;
|
||||
if( (loop_length * pStack->count) > resting_place ) {
|
||||
/* OK here we stop in this loop. First save the loop
|
||||
* on the stack, then save the position of the last
|
||||
* data */
|
||||
int cnt = resting_place / loop_length;
|
||||
pStack->count -= cnt;
|
||||
resting_place -= cnt * loop_length;
|
||||
pStack->disp += cnt * pElems->extent;
|
||||
pConvertor->bConverted += (cnt * loop_length);
|
||||
goto next_loop;
|
||||
}
|
||||
/* Not in this loop. Cleanup the stack and advance to the
|
||||
* next data description.
|
||||
*/
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
pos_desc++;
|
||||
pElems++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pData->desc.desc[pos_desc].type == DT_LOOP ) {
|
||||
dt_elem_desc_t* pEndLoop = &(pData->desc.desc[pos_desc + pData->desc.desc[pos_desc].disp]);
|
||||
long loop_disp = GET_LOOP_DISP( &(pData->desc.desc[pos_desc]) );
|
||||
remoteLength [stack_pos] = 0;
|
||||
if( pData->desc.desc[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
/* the loop is contiguous or composed by contiguous elements with a gap */
|
||||
if( pData->desc.desc[pos_desc].extent == pEndLoop->extent ) {
|
||||
/* the whole loop is contiguous */
|
||||
if( (lastDisp + lastLength) != (totalDisp + loop_disp) ) {
|
||||
/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */
|
||||
lastLength = 0;
|
||||
lastDisp = totalDisp + loop_disp;
|
||||
}
|
||||
lastLength += pData->desc.desc[pos_desc].count * pEndLoop->extent;
|
||||
} else {
|
||||
int counter = pData->desc.desc[pos_desc].count;
|
||||
if( (lastDisp + lastLength) == (totalDisp + loop_disp) ) {
|
||||
lastLength += pEndLoop->extent;
|
||||
counter--;
|
||||
}
|
||||
if( lastLength != 0 ) {
|
||||
/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */
|
||||
lastDisp += lastLength;
|
||||
lastLength = 0;
|
||||
}
|
||||
/* we have a gap in the begining or the end of the loop but the whole
|
||||
* loop can be merged in just one memcpy.
|
||||
*/
|
||||
/* SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags, */
|
||||
/* counter, (long)2, pData->desc.desc[pos_desc].extent ); */
|
||||
/* SAVE_DESC( pElemDesc, loop_disp, pEndLoop->extent ); */
|
||||
/* SAVE_ELEM( pElemDesc, DT_END_LOOP, pEndLoop->flags, */
|
||||
/* 2, pEndLoop->disp, pEndLoop->extent ); */
|
||||
}
|
||||
pos_desc += pData->desc.desc[pos_desc].disp + 1;
|
||||
changes++;
|
||||
} else {
|
||||
if( lastLength != 0 ) {
|
||||
/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */
|
||||
lastDisp += lastLength;
|
||||
lastLength = 0;
|
||||
}
|
||||
/* SAVE_ELEM( pElemDesc, DT_LOOP, pData->desc.desc[pos_desc].flags, */
|
||||
/* pData->desc.desc[pos_desc].count, (long)nbElems, */
|
||||
/* pData->desc.desc[pos_desc].extent ); */
|
||||
nbElems = 1;
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pData->desc.desc[pos_desc].count,
|
||||
totalDisp, pos_desc + pData->desc.desc[pos_desc].disp );
|
||||
pos_desc++;
|
||||
}
|
||||
if( pElems->type == DT_LOOP ) {
|
||||
remoteLength[pConvertor->stack_pos + 1] = 0;
|
||||
totalDisp = pElems->disp;
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pData->desc.desc[pos_desc].count,
|
||||
totalDisp, pos_desc + pElems->disp );
|
||||
pos_desc++;
|
||||
pElems++;
|
||||
loop_length = 0; /* starting a new loop */
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pData->desc.desc[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (totalDisp + pData->desc.desc[pos_desc].disp) ) {
|
||||
lastLength += pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
|
||||
type = pElems->type;
|
||||
lastLength = pElems->count * basicDatatypes[type].size;
|
||||
if( resting_place > lastLength ) {
|
||||
resting_place -= lastLength;
|
||||
loop_length += lastLength;
|
||||
} else {
|
||||
/* if( lastLength != 0 ) */
|
||||
/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */
|
||||
lastDisp = totalDisp + pData->desc.desc[pos_desc].disp;
|
||||
lastLength = pData->desc.desc[pos_desc].count * basicDatatypes[type].size;
|
||||
int cnt = resting_place / basicDatatypes[type].size;
|
||||
resting_place -= cnt * basicDatatypes[type].size;
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pElems->count - cnt,
|
||||
totalDisp + pElems->disp + cnt * pElems->extent,
|
||||
pos_desc );
|
||||
pConvertor->bConverted += (starting_point - resting_place);
|
||||
return 0;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
pElems++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function works for homogeneous architectures. As we keep
|
||||
* trace of the size inside the loop in the END_LOOP element
|
||||
* we can easily jump directly where we need. It works only
|
||||
* because we can split a basic data in the middle if we
|
||||
* have a optimized representation.
|
||||
*/
|
||||
int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
|
||||
int starting_point, int* sizes )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type, lastLength = 0;
|
||||
long totalDisp;
|
||||
lam_datatype_t* pData = pConvertor->pDesc;
|
||||
int* remoteLength;
|
||||
int loop_length;
|
||||
int resting_place = starting_point;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
if( starting_point == 0 ) {
|
||||
|
||||
pConvertor->stack_pos = 1;
|
||||
pConvertor->pStack[0].index = 0;
|
||||
pConvertor->pStack[0].count = pConvertor->count;
|
||||
pConvertor->pStack[0].disp = 0;
|
||||
/* first here we should select which data representation will be used for
|
||||
* this operation: normal one or the optimized version ? */
|
||||
if( pData->opt_desc.used > 0 ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pConvertor->pStack[0].end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pConvertor->pStack[0].end_loop = pData->desc.used;
|
||||
}
|
||||
pConvertor->pStack[1].index = 0;
|
||||
pConvertor->pStack[1].count = pElems->count;
|
||||
pConvertor->pStack[1].disp = pElems->disp;
|
||||
pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
|
||||
return 0;
|
||||
}
|
||||
/* if the convertor continue from the last position
|
||||
* there is nothing to do.
|
||||
*/
|
||||
if( pConvertor->bConverted == starting_point ) return 0;
|
||||
|
||||
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
|
||||
pStack = pConvertor->pStack;
|
||||
pStack->count = pConvertor->count;
|
||||
pStack->index = -1;
|
||||
pStack->end_loop = pData->desc.used - 1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
remoteLength[0] = 0; /* initial value set to ZERO */
|
||||
pConvertor->stack_pos = 0;
|
||||
pElems = &(pData->desc.desc[pos_desc]);
|
||||
|
||||
next_loop:
|
||||
totalDisp = pStack->disp;
|
||||
loop_length = remoteLength[pConvertor->stack_pos];
|
||||
while( pConvertor->stack_pos >= 0 ) {
|
||||
if( pElems->type == DT_END_LOOP ) { /* end of the current loop */
|
||||
/* now we know the length of the loop. We can compute
|
||||
* if the the starting_position will happend in one of the
|
||||
* iterations of this loop.
|
||||
*/
|
||||
remoteLength[pConvertor->stack_pos] = loop_length;
|
||||
if( (loop_length * pStack->count) > resting_place ) {
|
||||
/* OK here we stop in this loop. First save the loop
|
||||
* on the stack, then save the position of the last
|
||||
* data */
|
||||
int cnt = resting_place / loop_length;
|
||||
pStack->count -= cnt;
|
||||
resting_place -= cnt * loop_length;
|
||||
pStack->disp += cnt * pElems->extent;
|
||||
pConvertor->bConverted += (cnt * loop_length);
|
||||
goto next_loop;
|
||||
}
|
||||
/* Not in this loop. Cleanup the stack and advance to the
|
||||
* next data description.
|
||||
*/
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
pos_desc++;
|
||||
pElems++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems->type == DT_LOOP ) {
|
||||
remoteLength[pConvertor->stack_pos + 1] = 0;
|
||||
totalDisp = pElems->disp;
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pData->desc.desc[pos_desc].count,
|
||||
totalDisp, pos_desc + pElems->disp );
|
||||
pos_desc++;
|
||||
pElems++;
|
||||
loop_length = 0; /* starting a new loop */
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems->type;
|
||||
lastLength = pElems->count * basicDatatypes[type].size;
|
||||
if( resting_place > lastLength ) {
|
||||
resting_place -= lastLength;
|
||||
loop_length += lastLength;
|
||||
} else {
|
||||
int cnt = resting_place / basicDatatypes[type].size;
|
||||
resting_place -= cnt * basicDatatypes[type].size;
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pElems->count - cnt,
|
||||
totalDisp + pElems->disp + cnt * pElems->extent,
|
||||
pos_desc );
|
||||
pConvertor->bConverted += (starting_point - resting_place);
|
||||
return 0;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
pElems++;
|
||||
}
|
||||
|
||||
/* if( lastLength != 0 ) */
|
||||
/* SAVE_DESC( pElemDesc, lastDisp, lastLength ); */
|
||||
return 0;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user