1
1

Moving some functions around and declare some as static.

Correct and improuve pack/unpack homogeneous functions.
In dt_add.c add an empty (not counted) element at the end of the
   data description. Now we can detect the end of a data description
   without an additional if.

This commit was SVN r995.
Этот коммит содержится в:
George Bosilca 2004-03-28 07:14:18 +00:00
родитель ebb13bc42f
Коммит 43c35c42e7
7 изменённых файлов: 643 добавлений и 571 удалений

Просмотреть файл

@ -6,26 +6,26 @@
#if defined(VERBOSE)
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) \
dump_stack( (PSTACK), (STACK_POS), (PDESC), (NAME) )
# if defined(__GNUC__)
# define DUMP(ARGS...) printf(ARGS)
# if defined(ACCEPT_C99)
# define DUMP( ARGS... ) printf(__VA_ARGS__)
# else
# if defined(__GNUC__) && !defined(__STDC__)
# define DUMP(ARGS...) printf(ARGS)
# else
# if defined(ACCEPT_C99)
# define DUMP( ARGS... ) printf(__VA_ARGS__)
# else
# define DUMP printf
# endif /* ACCEPT_C99 */
# endif /* __GNUC__ */
# endif /* __GNUC__ && !__STDC__ */
# endif /* ACCEPT_C99 */
#else
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME )
# if defined(__GNUC__)
# if defined(ACCEPT_C99)
# define DUMP(ARGS...)
# else
# if defined(ACCEPT_C99)
# if defined(__GNUC__) && !defined(__STDC__)
# define DUMP(ARGS...)
# else
static void DUMP() { /* empty hopefully removed by the compiler */ }
# endif /* ACCEPT_C99 */
# endif /* __GNUC__ */
static inline void DUMP() { /* empty hopefully removed by the compiler */ }
# endif /* __GNUC__ && !__STDC__ */
# endif /* ACCEPT_C99 */
#endif /* VERBOSE */
#define DT_LOOP 0x00
@ -65,7 +65,9 @@
#define DT_2INTEGER 0x22
#define DT_LONGDBL_INT 0x23
#define DT_WCHAR 0x24
/* If the number of basic datatype should change update DT_MAX_PREDEFINED in datatype.h */
/* If the number of basic datatype should change update
* DT_MAX_PREDEFINED in datatype.h
*/
/* flags for the datatypes. */
#define DT_FLAG_DESTROYED 0x0001 /**< user destroyed but some other layers still have a reference */
@ -120,8 +122,8 @@ extern dt_desc_t basicDatatypes[DT_MAX_PREDEFINED];
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) SET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) UNSET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
#if defined(__GNUC__)
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a); })
#if defined(__GNUC__) && !defined(__STDC__)
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a) })
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
@ -151,7 +153,9 @@ do { \
(PSTACK) = pTempStack; \
} while(0)
#define MEMCPY( DST, SRC, BLENGTH ) memcpy( (DST), (SRC), (BLENGTH) )
#define MEMCPY( DST, SRC, BLENGTH ) { \
/*printf( "memcpy dest = %p src = %p length = %d\n", (void*)(DST), (void*)(SRC), (int)(BLENGTH) );*/ \
memcpy( (DST), (SRC), (BLENGTH) ); }
#ifdef USELESS
#define MEMCPY_LIMIT 1

Просмотреть файл

@ -79,8 +79,8 @@ int lam_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, unsigned int count, long
pLast->extent = extent;
pdtBase->desc.used++;
pdtBase->btypes[pdtAdd->id] += count;
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
if( extent == pdtAdd->size )
pLast->flags = pdtAdd->flags ^ (DT_FLAG_FOREVER | DT_FLAG_COMMITED);
if( extent != pdtAdd->size )
pLast->flags |= DT_FLAG_CONTIGUOUS;
} else {
/* now we add a complex datatype */
@ -131,6 +131,13 @@ int lam_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, unsigned int count, long
/* should I add some space until the extent of this datatype ? */
}
/* let's add a fake element at the end just to avoid useless comparaisons
* in pack/unpack functions.
*/
pLast++;
pLast->type = 0;
pLast->flags = 0;
pdtBase->size += count * pdtAdd->size;
pdtBase->true_lb = LMIN( pdtBase->true_lb, pdtAdd->true_lb + disp );
pdtBase->true_ub = LMAX( pdtBase->true_ub,

Просмотреть файл

@ -48,18 +48,6 @@ int lam_ddt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t*
return 0;
}
int lam_ddt_commit( dt_desc_t** data )
{
dt_desc_t* pData = (dt_desc_t*)*data;
if( pData->flags & DT_FLAG_COMMITED ) return -1;
pData->flags |= DT_FLAG_COMMITED;
/* If the data is contiguous is useless to generate an optimized version. */
if( pData->size != (pData->true_ub - pData->true_lb) )
(void)lam_ddt_optimize_short( pData, 1, &(pData->opt_desc) );
return 0;
}
static void _dump_data_flags( unsigned short usflags )
{
char flags[12] = "-----------";

Просмотреть файл

@ -12,7 +12,7 @@
/* printf( "save in %s:%d at %p DT_BYTE disp %ld count %d\n", __FILE__, __LINE__, (PELEM), (DISP), (COUNT) ); \ */
#define SAVE_DESC( PELEM, DISP, COUNT ) \
do { \
(PELEM)->flags = DT_FLAG_BASIC; \
(PELEM)->flags = DT_FLAG_BASIC | DT_FLAG_DATA; \
(PELEM)->type = DT_BYTE; \
(PELEM)->count = (COUNT); \
(PELEM)->disp = (DISP); \
@ -155,3 +155,136 @@ int lam_ddt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDe
pTypeDesc->used = nbElems;
return 0;
}
#define PRINT_MEMCPY( DST, SRC, LENGTH ) \
{ \
printf( "%5d: memcpy dst = %p src %p length %ld bytes (so far %d)[%d]\n", \
__index++, (DST), (SRC), (long)(LENGTH), __sofar, __LINE__ ); \
__sofar += (LENGTH); \
}
static int lam_ddt_unroll( dt_desc_t* pData, int count )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pDestBuf;
int bConverted = 0, __index = 0, __sofar = 0;
dt_elem_desc_t* pElems;
pDestBuf = NULL;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrc = (char*)pData->true_lb;
type = count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
bConverted += (pData->size * count);
} else {
char* pSrcBuf = (char*)pData->true_lb;
long extent = pData->ub - pData->lb;
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
pSrcBuf += extent;
pDestBuf += pData->size;
}
bConverted += type;
}
return (bConverted == (pData->size * count));
}
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
i = 1;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
i = 0;
}
lastLength = pLast->extent;
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += lastLength;
bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
return 0;
}
int lam_ddt_commit( dt_desc_t** data )
{
dt_desc_t* pData = (dt_desc_t*)*data;
if( pData->flags & DT_FLAG_COMMITED ) return -1;
pData->flags |= DT_FLAG_COMMITED;
/* If the data is contiguous is useless to generate an optimized version. */
if( pData->size != (pData->true_ub - pData->true_lb) )
(void)lam_ddt_optimize_short( pData, 1, &(pData->opt_desc) );
return 0;
}

Просмотреть файл

@ -132,241 +132,144 @@ static int convertor_pack_general( lam_convertor_t* pConvertor, struct iovec* ou
return 0;
}
static int convertor_pack_homogeneous( lam_convertor_t* pConv, struct iovec* iov, unsigned int out_size )
/* We suppose here that we work with an already optimized version of the data
*/
static int convertor_pack_homogeneous( lam_convertor_t* pConv,
struct iovec* iov, unsigned int out_size )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pDestBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
long lastDisp = 0, last_count = 0;
int space = iov[0].iov_len;
char* pDestBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
int next_length;
int init_bconvert = pConv->bConverted;
int end_desc;
pDestBuf = iov[0].iov_base;
pDestBuf = iov[0].iov_base;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrc = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrcBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
type = pConv->count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
MEMCPY( pDestBuf, pSrc, iov[0].iov_len );
pConv->bConverted += iov[0].iov_len;
} else {
char* pSrcBuf = pConv->pBaseBuf + pData->true_lb;
long extent = pData->ub - pData->lb;
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
MEMCPY( pDestBuf, pSrcBuf, pData->size );
pSrcBuf += extent;
pDestBuf += pData->size;
}
pConv->bConverted += type;
}
return (pConv->bConverted == (pData->size * pConv->count));
}
pStack = pConv->pStack;
pStack->count = pConv->count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
type = pConv->count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
MEMCPY( pDestBuf, pSrcBuf, iov[0].iov_len );
space -= iov[0].iov_len;
pConv->bConverted += iov[0].iov_len;
} else {
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
MEMCPY( pDestBuf, pSrcBuf, pData->size );
space -= pData->size;
pSrcBuf += extent;
pDestBuf += pData->size;
}
pConv->bConverted += type;
}
return (pConv->bConverted == (pData->size * pConv->count));
}
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
end_desc = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
end_desc = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength + pLast->extent );
i = 1;
pStack = pConv->pStack + pConv->stack_pos;
pos_desc = pStack->index;
lastDisp = pStack->disp;
last_count = pStack->count;
if( pElems[pos_desc].flags & DT_FLAG_DATA ) {
pStack--;
pConv->stack_pos--;
}
DUMP_STACK( pStack, pConv->stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConv->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
while( pos_desc < end_desc ) {
while( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --(pConv->stack_pos) == -1 ) break;
} else {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
i = 0;
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pDestBuf += lastLength;
lastLength = pLast->extent;
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
pDestBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
pos_desc++;
last_count = pElems[pos_desc].count;
}
while( pElems[pos_desc].type == DT_LOOP ) {
int stop_in_loop = 0;
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
last_count = pElems[pos_desc].count;
if( (pLast->extent * last_count) > space ) {
last_count = space / pLast->extent;
stop_in_loop = 1;
}
for( i = 0; i < last_count; i++ ) {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
pDestBuf += pLast->extent; /* size of the contiguous data */
lastDisp += pElems[pos_desc].extent;
}
space -= (pLast->extent * last_count);
pConv->bConverted += (pLast->extent * last_count);
if( stop_in_loop != 0 ) {
pos_desc += pElems[pos_desc].disp + 1;
last_count = pElems[pos_desc].count;
continue;
}
last_count = space;
next_length = pLast->extent - space;
/* Save the stack with the correct last_count value. */
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
pDestBuf += lastLength;
pConv->bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
pConv->bConverted += lastLength;
/* cleanup the stack */
return 0;
}
#define PRINT_MEMCPY( DST, SRC, LENGTH ) \
{ \
printf( "%5d: memcpy dst = %p src %p length %ld bytes (so far %d)[%d]\n", \
__index++, (DST), (SRC), (long)(LENGTH), __sofar, __LINE__ ); \
__sofar += (LENGTH); \
}
int dt_unroll( dt_desc_t* pData, int count )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* index for basic elements with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pDestBuf;
int bConverted = 0, __index = 0, __sofar = 0;
dt_elem_desc_t* pElems;
pDestBuf = NULL;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pSrc = (char*)pData->true_lb;
type = count * pData->size;
if( pData->size == extent /* true extent at this point */ ) {
/* we can do it with just one memcpy */
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
bConverted += (pData->size * count);
} else {
char* pSrcBuf = (char*)pData->true_lb;
long extent = pData->ub - pData->lb;
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
pSrcBuf += extent;
pDestBuf += pData->size;
}
bConverted += type;
}
return (bConverted == (pData->size * count));
}
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
pStack->count = count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
i = 1;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
i = 0;
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
last_count = pElems[pos_desc].count;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
/* do we have enough space in the buffer ? */
if( space < last_count ) {
next_length = last_count - space;
last_count = space;
goto end_loop; /* or break whatever but go out of this while */
}
lastLength = pLast->extent;
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
pDestBuf += lastLength;
bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
return 0;
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, last_count );
pConv->bConverted += last_count;
space -= last_count;
pDestBuf += last_count;
lastDisp = pStack->disp + pElems[pos_desc].disp;
pos_desc++; /* advance to the next data */
last_count = pElems[pos_desc].count;
}
}
last_count = 0; /* complete the data */
end_loop:
if( last_count != 0 ) { /* save the internal state */
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, last_count );
pConv->bConverted += last_count;
lastDisp += last_count;
}
if( pos_desc <= pStack->end_loop ) /* cleanup the stack */
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, next_length,
lastDisp, pos_desc );
iov[0].iov_len = pConv->bConverted - init_bconvert;
return 0;
}
/* The pack routines should do 2 things:
@ -418,44 +321,33 @@ int lam_convertor_init_for_send( lam_convertor_t* pConv, unsigned int flags,
dt_desc_t* dt, int count,
void* pUserBuf, int local_starting_point )
{
OBJ_RETAIN( dt );
if( pConv->pDesc != dt ) {
pConv->pDesc = dt;
pConv->flags = CONVERTOR_SEND;
if( pConv->pStack != NULL ) free( pConv->pStack );
pConv->pStack = NULL;
}
if( pConv->pStack == NULL ) {
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (dt->btypes[DT_LOOP] + 2) );
pConv->stack_pos = 0; /* just to be sure */
}
if( local_starting_point == 0 ) {
pConv->stack_pos = 0;
pConv->pStack[0].index = -1; /* fake entry for the first step */
pConv->pStack[0].count = count; /* fake entry for the first step */
pConv->pStack[0].disp = 0;
/* first hre we should select which data representation will be used for
* this operation: normal one or the optimized version ? */
pConv->pStack[0].end_loop = dt->desc.used;
} else {
if( pConv->bConverted != local_starting_point ) {
lam_create_stack_with_pos( pConv, local_starting_point, local_sizes );
} /* else we just continue from the previsious point */
}
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (dt->ub - dt->lb);
pConv->count = count;
pConv->pFunctions = copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
if( (dt->flags & DT_FLAG_CONTIGUOUS) && (dt->size == (dt->ub - dt->lb)) )
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = convertor_pack_homogeneous;
if( pConv->freebuf != NULL ) {
free( pConv->freebuf );
pConv->freebuf = NULL;
}
return 0;
OBJ_RETAIN( dt );
if( pConv->pDesc != dt ) {
pConv->pDesc = dt;
pConv->flags = CONVERTOR_SEND;
if( pConv->pStack != NULL ) free( pConv->pStack );
pConv->pStack = NULL;
}
if( pConv->pStack == NULL ) {
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (dt->btypes[DT_LOOP] + 2) );
pConv->stack_pos = 0; /* just to be sure */
}
lam_create_stack_with_pos( pConv, local_starting_point, local_sizes );
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (dt->ub - dt->lb);
pConv->count = count;
pConv->pFunctions = copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
if( (dt->flags & DT_FLAG_CONTIGUOUS) && (dt->size == (dt->ub - dt->lb)) )
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = convertor_pack_homogeneous;
if( pConv->freebuf != NULL ) {
free( pConv->freebuf );
pConv->freebuf = NULL;
}
return 0;
}
lam_convertor_t* lam_convertor_create( int remote_arch, int mode )

Просмотреть файл

@ -16,19 +16,19 @@ static int convertor_unpack_general( lam_convertor_t* pConvertor,
void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name )
{
printf( "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
for( ;stack_pos >= 0; stack_pos-- ) {
printf( "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
if( pStack[stack_pos].index != -1 )
printf( "[desc count %d disp %ld extent %d]\n",
pDesc[pStack[stack_pos].index].count,
pDesc[pStack[stack_pos].index].disp,
pDesc[pStack[stack_pos].index].extent );
else
printf( "\n" );
}
printf( "\n" );
printf( "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
for( ;stack_pos >= 0; stack_pos-- ) {
printf( "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
if( pStack[stack_pos].index != -1 )
printf( "[desc count %d disp %ld extent %d]\n",
pDesc[pStack[stack_pos].index].count,
pDesc[pStack[stack_pos].index].disp,
pDesc[pStack[stack_pos].index].extent );
else
printf( "\n" );
}
printf( "\n" );
}
/*
@ -47,273 +47,295 @@ static int convertor_unpack_general( lam_convertor_t* pConvertor,
struct iovec* pInputv,
unsigned int inputCount )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */
int end_loop; /* last element in the actual loop */
int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */
int rc;
long disp_desc = 0; /* compute displacement for truncated data */
long disp; /* displacement at the beging of the last loop */
dt_desc_t *pData = pConvertor->pDesc;
dt_elem_desc_t* pElems;
char* pOutput = pConvertor->pBaseBuf;
int oCount = (pData->ub - pData->lb) * pConvertor->count;
char* pInput = pInputv[0].iov_base;
int iCount = pInputv[0].iov_len;
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */
int end_loop; /* last element in the actual loop */
int type; /* type at current position */
unsigned int advance; /* number of bytes that we should advance the buffer */
int rc;
long disp_desc = 0; /* compute displacement for truncated data */
long disp; /* displacement at the beging of the last loop */
dt_desc_t *pData = pConvertor->pDesc;
dt_elem_desc_t* pElems;
char* pOutput = pConvertor->pBaseBuf;
int oCount = (pData->ub - pData->lb) * pConvertor->count;
char* pInput = pInputv[0].iov_base;
int iCount = pInputv[0].iov_len;
if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc;
else pElems = pData->desc.desc;
if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc;
else pElems = pData->desc.desc;
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
pInputv[0].iov_base, pInputv[0].iov_len, inputCount );
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp = 0;
if( pos_desc == -1 ) {
pos_desc = 0;
count_desc = pElems[0].count;
disp_desc = pElems[0].disp;
} else {
count_desc = pStack->count;
if( pElems[pos_desc].type != DT_LOOP ) {
pConvertor->stack_pos--;
pStack--;
disp = pStack->disp;
disp_desc = ( pElems[pos_desc].disp +
(pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent);
}
}
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
end_loop = pStack->end_loop;
while( pConvertor->stack_pos >= 0 ) {
if( pos_desc == end_loop ) { /* end of the current loop */
while( --(pStack->count) == 0 ) { /* end of loop */
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
pInputv[0].iov_base, pInputv[0].iov_len, inputCount );
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp = 0;
if( pos_desc == -1 ) {
pos_desc = 0;
count_desc = pElems[0].count;
disp_desc = pElems[0].disp;
} else {
count_desc = pStack->count;
if( pElems[pos_desc].type != DT_LOOP ) {
pConvertor->stack_pos--;
pStack--;
if( pConvertor->stack_pos == -1 )
return 1; /* completed */
}
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
pos_desc++;
disp = pStack->disp;
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElems[pos_desc].count,
disp, pos_desc + pElems[pos_desc].disp + 1 );
disp = pStack->disp;
disp_desc = ( pElems[pos_desc].disp +
(pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent);
}
}
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
end_loop = pStack->end_loop;
while( pConvertor->stack_pos >= 0 ) {
if( pos_desc == end_loop ) { /* end of the current loop */
while( --(pStack->count) == 0 ) { /* end of loop */
pConvertor->stack_pos--;
pStack--;
if( pConvertor->stack_pos == -1 )
return 1; /* completed */
}
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
/* update the current state */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
rc = pConvertor->pFunctions[type]( count_desc,
pInput, iCount, pElems[pos_desc].extent,
pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent,
&advance );
if( rc <= 0 ) {
printf( "trash in the input buffer\n" );
return -1;
}
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
pConvertor->bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, count_desc - rc,
disp + rc * pElems[pos_desc].extent, pos_desc );
if( iCount != 0 )
printf( "there is still room in the input buffer %d bytes\n", iCount );
return 0;
}
pConvertor->converted += rc; /* number of elementd converted so far */
pos_desc++; /* advance to the next data */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
}
disp = pStack->disp;
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, pElems[pos_desc].count,
disp, pos_desc + pElems[pos_desc].disp + 1 );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
/* update the current state */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
goto next_loop;
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
rc = pConvertor->pFunctions[type]( count_desc,
pInput, iCount, pElems[pos_desc].extent,
pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent,
&advance );
if( rc <= 0 ) {
printf( "trash in the input buffer\n" );
return -1;
}
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
pConvertor->bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, count_desc - rc,
disp + rc * pElems[pos_desc].extent, pos_desc );
if( iCount != 0 )
printf( "there is still room in the input buffer %d bytes\n", iCount );
return 0;
}
pConvertor->converted += rc; /* number of elementd converted so far */
pos_desc++; /* advance to the next data */
count_desc = pElems[pos_desc].count;
disp_desc = pElems[pos_desc].disp;
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
}
/* out of the loop: we have complete the data conversion or no more space
* in the buffer.
*/
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
/* out of the loop: we have complete the data conversion or no more space
* in the buffer.
*/
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pElems[pos_desc].count, disp, pos_desc );
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
pElems[pos_desc].count, disp, pos_desc );
return 0;
return 0;
}
static int convertor_unpack_homogeneous( lam_convertor_t* pConv, struct iovec* iov, unsigned int out_size )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* counter for basic datatype with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, lastLength = 0;
char* pSrcBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int type; /* type at current position */
int i; /* counter for basic datatype with extent */
int stack_pos = 0; /* position on the stack */
long lastDisp = 0, last_count = 0;
int space = iov[0].iov_len;
char* pSrcBuf;
dt_desc_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
int next_length;
int init_bconvert = pConv->bConverted;
int end_desc;
pSrcBuf = iov[0].iov_base;
pSrcBuf = iov[0].iov_base;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
long extent = pData->ub - pData->lb;
char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
if( pData->size == extent ) {
long length = pConv->count * pData->size;
if( pData->size == extent ) {
long length = pConv->count * pData->size;
if( length > iov[0].iov_len )
length = iov[0].iov_len;
/* contiguous data or basic datatype with count */
MEMCPY( pDstBuf, pSrcBuf, length );
pConv->bConverted += length;
} else {
type = iov[0].iov_len;
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
MEMCPY( pDstBuf, pSrcBuf, pData->size );
pSrcBuf += pData->size;
pDstBuf += extent;
type -= pData->size;
}
pConv->bConverted += type;
}
return (pConv->bConverted == (pData->size * pConv->count));
}
pStack = pConv->pStack;
pStack->count = pConv->count;
pStack->index = -1;
pStack->disp = 0;
pos_desc = 0;
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
pStack->end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pStack->end_loop = pData->desc.used;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
next_loop:
while( pos_desc <= pStack->end_loop ) {
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --stack_pos == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc++;
goto next_loop;
}
if( pElems[pos_desc].type == DT_LOOP ) {
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
if( lastLength == 0 ) {
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pSrcBuf += lastLength;
if( length > iov[0].iov_len )
length = iov[0].iov_len;
/* contiguous data or basic datatype with count */
MEMCPY( pDstBuf, pSrcBuf, length );
pConv->bConverted += length;
} else {
type = iov[0].iov_len;
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
MEMCPY( pDstBuf, pSrcBuf, pData->size );
pSrcBuf += pData->size;
pDstBuf += extent;
type -= pData->size;
}
lastLength = pLast->extent;
for( i = 0; i < (pElems[pos_desc].count - 1); i++ ) {
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pSrcBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
pConv->bConverted += type;
}
return (pConv->bConverted == (pData->size * pConv->count));
}
if( pData->opt_desc.desc != NULL ) {
pElems = pData->opt_desc.desc;
end_desc = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
end_desc = pData->desc.used;
}
pStack = pConv->pStack + pConv->stack_pos;
pos_desc = pStack->index;
lastDisp = pStack->disp;
last_count = pStack->count;
if( pElems[pos_desc].flags & DT_FLAG_DATA ) {
pStack--;
pConv->stack_pos--;
}
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
while( pos_desc < end_desc ) {
while( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
pStack--;
if( --(pConv->stack_pos) == -1 ) break;
} else {
pos_desc = pStack->index;
if( pos_desc == -1 )
pStack->disp += (pData->ub - pData->lb);
else
pStack->disp += pElems[pos_desc].extent;
}
pos_desc += pElems[pos_desc].disp + 1;
goto next_loop;
} else {
do {
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
}
}
/* now here we have a basic datatype */
type = pElems[pos_desc].type;
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
} else {
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pSrcBuf += lastLength;
printf( "increase by %ld bytes\n", lastLength );
pConv->bConverted += lastLength;
lastDisp = pStack->disp + pElems[pos_desc].disp;
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
}
pos_desc++; /* advance to the next data */
}
pos_desc++;
last_count = pElems[pos_desc].count;
}
while( pElems[pos_desc].type == DT_LOOP ) {
int stop_in_loop = 0;
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
last_count = pElems[pos_desc].count;
if( (pLast->extent * last_count) > space ) {
last_count = space / pLast->extent;
stop_in_loop = 1;
}
for( i = 0; i < (pElems[pos_desc].count - 1); i++ ) {
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, pLast->extent );
pSrcBuf += pLast->extent;
lastDisp += pElems[pos_desc].extent;
}
space -= (pLast->extent * last_count);
pConv->bConverted += (pLast->extent * last_count);
if( stop_in_loop != 0 ) {
pos_desc += pElems[pos_desc].disp + 1;
last_count = pElems[pos_desc].count;
continue;
}
last_count = space;
next_length = pLast->extent - space;
/* Save the stack with the correct last_count value. */
}
PUSH_STACK( pStack, stack_pos, pos_desc, last_count,
pStack->disp, pos_desc + pElems[pos_desc].disp );
pos_desc++;
last_count = pElems[pos_desc].count;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
/* do we have enough space in the buffer ? */
if( space < last_count ) {
next_length = last_count - space;
last_count = space;
goto end_loop; /* or break whatever but go out of this while */
}
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_count );
pConv->bConverted += last_count;
space -= last_count;
pSrcBuf += last_count;
lastDisp = pStack->disp + pElems[pos_desc].disp;
pos_desc++; /* advance to the next data */
last_count = pElems[pos_desc].count;
}
}
last_count = 0; /* complete the data */
end_loop:
if( last_count != 0 ) { /* save the internal state */
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_count );
pConv->bConverted += last_count;
lastDisp += last_count;
}
if( pos_desc <= pStack->end_loop ) /* cleanup the stack */
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, next_length,
lastDisp, pos_desc );
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
pConv->bConverted += lastLength;
/* cleanup the stack */
return 0;
iov[0].iov_len = pConv->bConverted - init_bconvert;
return 0;
}
int lam_convertor_unpack( lam_convertor_t* pConvertor,
struct iovec* pInputv,
unsigned int inputCount )
{
dt_desc_t *pData = pConvertor->pDesc;
char* pOutput = pConvertor->pBaseBuf;
char* pInput = pInputv[0].iov_base;
int rc;
dt_desc_t *pData = pConvertor->pDesc;
char* pOutput = pConvertor->pBaseBuf;
char* pInput = pInputv[0].iov_base;
int rc;
if( pConvertor->count == 0 ) return 1; /* nothing to do */
if( pConvertor->count == 0 ) return 1; /* nothing to do */
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
if( pInputv[0].iov_base == NULL ) {
rc = pConvertor->count * pData->size;
if( pInputv[0].iov_len == 0 ) { /* give me the whole buffer */
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb;
pInputv[0].iov_len = rc;
return 1;
} else { /* what about the next chunk ? */
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted;
if( pInputv[0].iov_len > (rc - pConvertor->bConverted) )
pInputv[0].iov_len = rc - pConvertor->bConverted;
pConvertor->bConverted += pInputv[0].iov_len;
return (pConvertor->bConverted == rc);
}
}
}
if( (pInput >= pOutput) && (pInput < (pOutput + pConvertor->count * (pData->ub - pData->lb))) ) {
return 1;
}
return lam_convertor_progress( pConvertor, pInputv, inputCount );
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
if( pInputv[0].iov_base == NULL ) {
rc = pConvertor->count * pData->size;
if( pInputv[0].iov_len == 0 ) { /* give me the whole buffer */
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb;
pInputv[0].iov_len = rc;
return 1;
} else { /* what about the next chunk ? */
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted;
if( pInputv[0].iov_len > (rc - pConvertor->bConverted) )
pInputv[0].iov_len = rc - pConvertor->bConverted;
pConvertor->bConverted += pInputv[0].iov_len;
return (pConvertor->bConverted == rc);
}
}
}
if( (pInput >= pOutput) && (pInput < (pOutput + pConvertor->count * (pData->ub - pData->lb))) ) {
return 1;
}
return lam_convertor_progress( pConvertor, pInputv, inputCount );
}
/* Return value:
@ -436,30 +458,29 @@ int lam_convertor_init_for_recv( lam_convertor_t* pConv, unsigned int flags,
dt_desc_t* pData, int count,
void* pUserBuf, int starting_point )
{
OBJ_RETAIN( pData );
pConv->pDesc = pData;
pConv->flags = CONVERTOR_RECV;
if( pConv->pStack != NULL ) free( pConv->pStack );
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (pData->btypes[DT_LOOP] + 2) );
if( starting_point == 0 ) {
pConv->stack_pos = 0;
pConv->pStack[0].index = -1; /* fake entry for the first step */
pConv->pStack[0].count = count; /* fake entry for the first step */
pConv->pStack[0].disp = 0;
/* first we should decide which data representation will be used TODO */
pConv->pStack[0].end_loop = pData->desc.used;
} else {
}
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (pData->ub - pData->lb);
pConv->count = count;
pConv->pFunctions = copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
if( (pData->flags & DT_FLAG_CONTIGUOUS) && (pData->size == (pData->ub - pData->lb)) )
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = convertor_unpack_homogeneous;
return 0;
OBJ_RETAIN( pData );
if( pConv->pDesc != pData ) {
pConv->pDesc = pData;
pConv->flags = CONVERTOR_RECV;
if( pConv->pStack != NULL ) free( pConv->pStack );
pConv->pStack = NULL;
}
if( pConv->pStack == NULL ) {
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (pData->btypes[DT_LOOP] + 2) );
pConv->stack_pos = 0;
}
lam_create_stack_with_pos( pConv, starting_point, local_sizes );
pConv->pBaseBuf = pUserBuf;
pConv->available_space = count * (pData->ub - pData->lb);
pConv->count = count;
pConv->pFunctions = copy_functions;
pConv->converted = 0;
pConv->bConverted = 0;
if( (pData->flags & DT_FLAG_CONTIGUOUS) && (pData->size == (pData->ub - pData->lb)) )
pConv->flags |= DT_FLAG_CONTIGUOUS;
pConv->fAdvance = convertor_unpack_homogeneous;
return 0;
}
/* Get the number of elements from the data associated with this convertor that can be

Просмотреть файл

@ -31,6 +31,33 @@ int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
int* remoteLength;
int loop_length;
if( starting_point == 0 ) {
dt_elem_desc_t* pElems;
pConvertor->stack_pos = 1;
pConvertor->pStack[0].index = 0;
pConvertor->pStack[0].count = pConvertor->count;
pConvertor->pStack[0].disp = 0;
/* first here we should select which data representation will be used for
* this operation: normal one or the optimized version ? */
if( pData->opt_desc.used > 0 ) {
pElems = pData->opt_desc.desc;
pConvertor->pStack[0].end_loop = pData->opt_desc.used;
} else {
pElems = pData->desc.desc;
pConvertor->pStack[0].end_loop = pData->desc.used;
}
pConvertor->pStack[1].index = 0;
pConvertor->pStack[1].count = pElems->count;
pConvertor->pStack[1].disp = pElems->disp;
pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
return 0;
}
/* if the convertor continue from the last position
* there is nothing to do.
*/
if( pConvertor->bConverted != starting_point ) return 0;
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
pStack = pConvertor->pStack;
pStack->count = pConvertor->count;