Moving some functions around and declare some as static.
Correct and improuve pack/unpack homogeneous functions. In dt_add.c add an empty (not counted) element at the end of the data description. Now we can detect the end of a data description without an additional if. This commit was SVN r995.
Этот коммит содержится в:
родитель
ebb13bc42f
Коммит
43c35c42e7
@ -6,26 +6,26 @@
|
||||
#if defined(VERBOSE)
|
||||
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME ) \
|
||||
dump_stack( (PSTACK), (STACK_POS), (PDESC), (NAME) )
|
||||
# if defined(__GNUC__)
|
||||
# define DUMP(ARGS...) printf(ARGS)
|
||||
# if defined(ACCEPT_C99)
|
||||
# define DUMP( ARGS... ) printf(__VA_ARGS__)
|
||||
# else
|
||||
# if defined(__GNUC__) && !defined(__STDC__)
|
||||
# define DUMP(ARGS...) printf(ARGS)
|
||||
# else
|
||||
# if defined(ACCEPT_C99)
|
||||
# define DUMP( ARGS... ) printf(__VA_ARGS__)
|
||||
# else
|
||||
# define DUMP printf
|
||||
# endif /* ACCEPT_C99 */
|
||||
# endif /* __GNUC__ */
|
||||
# endif /* __GNUC__ && !__STDC__ */
|
||||
# endif /* ACCEPT_C99 */
|
||||
#else
|
||||
# define DUMP_STACK( PSTACK, STACK_POS, PDESC, NAME )
|
||||
# if defined(__GNUC__)
|
||||
# if defined(ACCEPT_C99)
|
||||
# define DUMP(ARGS...)
|
||||
# else
|
||||
# if defined(ACCEPT_C99)
|
||||
# if defined(__GNUC__) && !defined(__STDC__)
|
||||
# define DUMP(ARGS...)
|
||||
# else
|
||||
static void DUMP() { /* empty hopefully removed by the compiler */ }
|
||||
# endif /* ACCEPT_C99 */
|
||||
# endif /* __GNUC__ */
|
||||
static inline void DUMP() { /* empty hopefully removed by the compiler */ }
|
||||
# endif /* __GNUC__ && !__STDC__ */
|
||||
# endif /* ACCEPT_C99 */
|
||||
#endif /* VERBOSE */
|
||||
|
||||
#define DT_LOOP 0x00
|
||||
@ -65,7 +65,9 @@
|
||||
#define DT_2INTEGER 0x22
|
||||
#define DT_LONGDBL_INT 0x23
|
||||
#define DT_WCHAR 0x24
|
||||
/* If the number of basic datatype should change update DT_MAX_PREDEFINED in datatype.h */
|
||||
/* If the number of basic datatype should change update
|
||||
* DT_MAX_PREDEFINED in datatype.h
|
||||
*/
|
||||
|
||||
/* flags for the datatypes. */
|
||||
#define DT_FLAG_DESTROYED 0x0001 /**< user destroyed but some other layers still have a reference */
|
||||
@ -120,8 +122,8 @@ extern dt_desc_t basicDatatypes[DT_MAX_PREDEFINED];
|
||||
#define SET_CONTIGUOUS_FLAG( INT_VALUE ) SET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
|
||||
#define UNSET_CONTIGUOUS_FLAG( INT_VALUE ) UNSET_FLAG(INT_VALUE, DT_FLAG_CONTIGUOUS)
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a); })
|
||||
#if defined(__GNUC__) && !defined(__STDC__)
|
||||
#define LMAX(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _b : _a) })
|
||||
#define LMIN(A,B) ({ long _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
#define IMAX(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _b : _a); })
|
||||
#define IMIN(A,B) ({ int _a = (A), _b = (B); (_a < _b ? _a : _b); })
|
||||
@ -151,7 +153,9 @@ do { \
|
||||
(PSTACK) = pTempStack; \
|
||||
} while(0)
|
||||
|
||||
#define MEMCPY( DST, SRC, BLENGTH ) memcpy( (DST), (SRC), (BLENGTH) )
|
||||
#define MEMCPY( DST, SRC, BLENGTH ) { \
|
||||
/*printf( "memcpy dest = %p src = %p length = %d\n", (void*)(DST), (void*)(SRC), (int)(BLENGTH) );*/ \
|
||||
memcpy( (DST), (SRC), (BLENGTH) ); }
|
||||
|
||||
#ifdef USELESS
|
||||
#define MEMCPY_LIMIT 1
|
||||
|
@ -79,8 +79,8 @@ int lam_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, unsigned int count, long
|
||||
pLast->extent = extent;
|
||||
pdtBase->desc.used++;
|
||||
pdtBase->btypes[pdtAdd->id] += count;
|
||||
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
|
||||
if( extent == pdtAdd->size )
|
||||
pLast->flags = pdtAdd->flags ^ (DT_FLAG_FOREVER | DT_FLAG_COMMITED);
|
||||
if( extent != pdtAdd->size )
|
||||
pLast->flags |= DT_FLAG_CONTIGUOUS;
|
||||
} else {
|
||||
/* now we add a complex datatype */
|
||||
@ -131,6 +131,13 @@ int lam_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd, unsigned int count, long
|
||||
/* should I add some space until the extent of this datatype ? */
|
||||
}
|
||||
|
||||
/* let's add a fake element at the end just to avoid useless comparaisons
|
||||
* in pack/unpack functions.
|
||||
*/
|
||||
pLast++;
|
||||
pLast->type = 0;
|
||||
pLast->flags = 0;
|
||||
|
||||
pdtBase->size += count * pdtAdd->size;
|
||||
pdtBase->true_lb = LMIN( pdtBase->true_lb, pdtAdd->true_lb + disp );
|
||||
pdtBase->true_ub = LMAX( pdtBase->true_ub,
|
||||
|
@ -48,18 +48,6 @@ int lam_ddt_create_resized( dt_desc_t* oldType, long lb, long extent, dt_desc_t*
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lam_ddt_commit( dt_desc_t** data )
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)*data;
|
||||
|
||||
if( pData->flags & DT_FLAG_COMMITED ) return -1;
|
||||
pData->flags |= DT_FLAG_COMMITED;
|
||||
/* If the data is contiguous is useless to generate an optimized version. */
|
||||
if( pData->size != (pData->true_ub - pData->true_lb) )
|
||||
(void)lam_ddt_optimize_short( pData, 1, &(pData->opt_desc) );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void _dump_data_flags( unsigned short usflags )
|
||||
{
|
||||
char flags[12] = "-----------";
|
||||
|
@ -12,7 +12,7 @@
|
||||
/* printf( "save in %s:%d at %p DT_BYTE disp %ld count %d\n", __FILE__, __LINE__, (PELEM), (DISP), (COUNT) ); \ */
|
||||
#define SAVE_DESC( PELEM, DISP, COUNT ) \
|
||||
do { \
|
||||
(PELEM)->flags = DT_FLAG_BASIC; \
|
||||
(PELEM)->flags = DT_FLAG_BASIC | DT_FLAG_DATA; \
|
||||
(PELEM)->type = DT_BYTE; \
|
||||
(PELEM)->count = (COUNT); \
|
||||
(PELEM)->disp = (DISP); \
|
||||
@ -155,3 +155,136 @@ int lam_ddt_optimize_short( dt_desc_t* pData, int count, dt_type_desc_t* pTypeDe
|
||||
pTypeDesc->used = nbElems;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PRINT_MEMCPY( DST, SRC, LENGTH ) \
|
||||
{ \
|
||||
printf( "%5d: memcpy dst = %p src %p length %ld bytes (so far %d)[%d]\n", \
|
||||
__index++, (DST), (SRC), (long)(LENGTH), __sofar, __LINE__ ); \
|
||||
__sofar += (LENGTH); \
|
||||
}
|
||||
|
||||
static int lam_ddt_unroll( dt_desc_t* pData, int count )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pDestBuf;
|
||||
int bConverted = 0, __index = 0, __sofar = 0;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pDestBuf = NULL;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrc = (char*)pData->true_lb;
|
||||
|
||||
type = count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
|
||||
bConverted += (pData->size * count);
|
||||
} else {
|
||||
char* pSrcBuf = (char*)pData->true_lb;
|
||||
long extent = pData->ub - pData->lb;
|
||||
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
bConverted += type;
|
||||
}
|
||||
return (bConverted == (pData->size * count));
|
||||
}
|
||||
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
|
||||
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
|
||||
i = 1;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
|
||||
i = 0;
|
||||
}
|
||||
lastLength = pLast->extent;
|
||||
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += lastLength;
|
||||
bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lam_ddt_commit( dt_desc_t** data )
|
||||
{
|
||||
dt_desc_t* pData = (dt_desc_t*)*data;
|
||||
|
||||
if( pData->flags & DT_FLAG_COMMITED ) return -1;
|
||||
pData->flags |= DT_FLAG_COMMITED;
|
||||
/* If the data is contiguous is useless to generate an optimized version. */
|
||||
if( pData->size != (pData->true_ub - pData->true_lb) )
|
||||
(void)lam_ddt_optimize_short( pData, 1, &(pData->opt_desc) );
|
||||
return 0;
|
||||
}
|
||||
|
@ -132,241 +132,144 @@ static int convertor_pack_general( lam_convertor_t* pConvertor, struct iovec* ou
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int convertor_pack_homogeneous( lam_convertor_t* pConv, struct iovec* iov, unsigned int out_size )
|
||||
/* We suppose here that we work with an already optimized version of the data
|
||||
*/
|
||||
static int convertor_pack_homogeneous( lam_convertor_t* pConv,
|
||||
struct iovec* iov, unsigned int out_size )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pDestBuf;
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
long lastDisp = 0, last_count = 0;
|
||||
int space = iov[0].iov_len;
|
||||
char* pDestBuf;
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
int next_length;
|
||||
int init_bconvert = pConv->bConverted;
|
||||
int end_desc;
|
||||
|
||||
pDestBuf = iov[0].iov_base;
|
||||
pDestBuf = iov[0].iov_base;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrc = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrcBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
|
||||
|
||||
type = pConv->count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
MEMCPY( pDestBuf, pSrc, iov[0].iov_len );
|
||||
pConv->bConverted += iov[0].iov_len;
|
||||
} else {
|
||||
char* pSrcBuf = pConv->pBaseBuf + pData->true_lb;
|
||||
long extent = pData->ub - pData->lb;
|
||||
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
|
||||
MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
pConv->bConverted += type;
|
||||
}
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
}
|
||||
pStack = pConv->pStack;
|
||||
pStack->count = pConv->count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
type = pConv->count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
MEMCPY( pDestBuf, pSrcBuf, iov[0].iov_len );
|
||||
space -= iov[0].iov_len;
|
||||
pConv->bConverted += iov[0].iov_len;
|
||||
} else {
|
||||
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
|
||||
MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
space -= pData->size;
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
pConv->bConverted += type;
|
||||
}
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
}
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
end_desc = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
end_desc = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength + pLast->extent );
|
||||
i = 1;
|
||||
pStack = pConv->pStack + pConv->stack_pos;
|
||||
pos_desc = pStack->index;
|
||||
lastDisp = pStack->disp;
|
||||
last_count = pStack->count;
|
||||
if( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
pStack--;
|
||||
pConv->stack_pos--;
|
||||
}
|
||||
DUMP_STACK( pStack, pConv->stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", pConv->stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
while( pos_desc < end_desc ) {
|
||||
while( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --(pConv->stack_pos) == -1 ) break;
|
||||
} else {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
i = 0;
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pDestBuf += lastLength;
|
||||
lastLength = pLast->extent;
|
||||
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
pDestBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
pos_desc++;
|
||||
last_count = pElems[pos_desc].count;
|
||||
}
|
||||
while( pElems[pos_desc].type == DT_LOOP ) {
|
||||
int stop_in_loop = 0;
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
last_count = pElems[pos_desc].count;
|
||||
if( (pLast->extent * last_count) > space ) {
|
||||
last_count = space / pLast->extent;
|
||||
stop_in_loop = 1;
|
||||
}
|
||||
for( i = 0; i < last_count; i++ ) {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, pLast->extent );
|
||||
pDestBuf += pLast->extent; /* size of the contiguous data */
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
space -= (pLast->extent * last_count);
|
||||
pConv->bConverted += (pLast->extent * last_count);
|
||||
if( stop_in_loop != 0 ) {
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
last_count = pElems[pos_desc].count;
|
||||
continue;
|
||||
}
|
||||
last_count = space;
|
||||
next_length = pLast->extent - space;
|
||||
/* Save the stack with the correct last_count value. */
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
pDestBuf += lastLength;
|
||||
pConv->bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, lastLength );
|
||||
pConv->bConverted += lastLength;
|
||||
/* cleanup the stack */
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PRINT_MEMCPY( DST, SRC, LENGTH ) \
|
||||
{ \
|
||||
printf( "%5d: memcpy dst = %p src %p length %ld bytes (so far %d)[%d]\n", \
|
||||
__index++, (DST), (SRC), (long)(LENGTH), __sofar, __LINE__ ); \
|
||||
__sofar += (LENGTH); \
|
||||
}
|
||||
|
||||
int dt_unroll( dt_desc_t* pData, int count )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* index for basic elements with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pDestBuf;
|
||||
int bConverted = 0, __index = 0, __sofar = 0;
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pDestBuf = NULL;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pSrc = (char*)pData->true_lb;
|
||||
|
||||
type = count * pData->size;
|
||||
if( pData->size == extent /* true extent at this point */ ) {
|
||||
/* we can do it with just one memcpy */
|
||||
PRINT_MEMCPY( pDestBuf, pSrc, pData->size * count );
|
||||
bConverted += (pData->size * count);
|
||||
} else {
|
||||
char* pSrcBuf = (char*)pData->true_lb;
|
||||
long extent = pData->ub - pData->lb;
|
||||
for( pos_desc = 0; pos_desc < count; pos_desc++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += extent;
|
||||
pDestBuf += pData->size;
|
||||
}
|
||||
bConverted += type;
|
||||
}
|
||||
return (bConverted == (pData->size * count));
|
||||
}
|
||||
pStack = alloca( sizeof(dt_stack_t) * pData->btypes[DT_LOOP] );
|
||||
pStack->count = count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc+1].disp) ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength + pLast->extent );
|
||||
lastDisp = pStack->disp + pElems[pos_desc+1].disp + pLast->extent;
|
||||
i = 1;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
lastDisp = pStack->disp + pElems[pos_desc + 1].disp;
|
||||
i = 0;
|
||||
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
last_count = pElems[pos_desc].count;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
/* do we have enough space in the buffer ? */
|
||||
if( space < last_count ) {
|
||||
next_length = last_count - space;
|
||||
last_count = space;
|
||||
goto end_loop; /* or break whatever but go out of this while */
|
||||
}
|
||||
lastLength = pLast->extent;
|
||||
for( ; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
pDestBuf += lastLength;
|
||||
bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
PRINT_MEMCPY( pDestBuf, (char*)lastDisp, lastLength );
|
||||
return 0;
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, last_count );
|
||||
pConv->bConverted += last_count;
|
||||
space -= last_count;
|
||||
pDestBuf += last_count;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
pos_desc++; /* advance to the next data */
|
||||
last_count = pElems[pos_desc].count;
|
||||
}
|
||||
}
|
||||
last_count = 0; /* complete the data */
|
||||
end_loop:
|
||||
if( last_count != 0 ) { /* save the internal state */
|
||||
MEMCPY( pDestBuf, pConv->pBaseBuf + lastDisp, last_count );
|
||||
pConv->bConverted += last_count;
|
||||
lastDisp += last_count;
|
||||
}
|
||||
if( pos_desc <= pStack->end_loop ) /* cleanup the stack */
|
||||
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, next_length,
|
||||
lastDisp, pos_desc );
|
||||
|
||||
iov[0].iov_len = pConv->bConverted - init_bconvert;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The pack routines should do 2 things:
|
||||
@ -418,44 +321,33 @@ int lam_convertor_init_for_send( lam_convertor_t* pConv, unsigned int flags,
|
||||
dt_desc_t* dt, int count,
|
||||
void* pUserBuf, int local_starting_point )
|
||||
{
|
||||
OBJ_RETAIN( dt );
|
||||
if( pConv->pDesc != dt ) {
|
||||
pConv->pDesc = dt;
|
||||
pConv->flags = CONVERTOR_SEND;
|
||||
if( pConv->pStack != NULL ) free( pConv->pStack );
|
||||
pConv->pStack = NULL;
|
||||
}
|
||||
if( pConv->pStack == NULL ) {
|
||||
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (dt->btypes[DT_LOOP] + 2) );
|
||||
pConv->stack_pos = 0; /* just to be sure */
|
||||
}
|
||||
if( local_starting_point == 0 ) {
|
||||
pConv->stack_pos = 0;
|
||||
pConv->pStack[0].index = -1; /* fake entry for the first step */
|
||||
pConv->pStack[0].count = count; /* fake entry for the first step */
|
||||
pConv->pStack[0].disp = 0;
|
||||
/* first hre we should select which data representation will be used for
|
||||
* this operation: normal one or the optimized version ? */
|
||||
pConv->pStack[0].end_loop = dt->desc.used;
|
||||
} else {
|
||||
if( pConv->bConverted != local_starting_point ) {
|
||||
lam_create_stack_with_pos( pConv, local_starting_point, local_sizes );
|
||||
} /* else we just continue from the previsious point */
|
||||
}
|
||||
pConv->pBaseBuf = pUserBuf;
|
||||
pConv->available_space = count * (dt->ub - dt->lb);
|
||||
pConv->count = count;
|
||||
pConv->pFunctions = copy_functions;
|
||||
pConv->converted = 0;
|
||||
pConv->bConverted = 0;
|
||||
if( (dt->flags & DT_FLAG_CONTIGUOUS) && (dt->size == (dt->ub - dt->lb)) )
|
||||
pConv->flags |= DT_FLAG_CONTIGUOUS;
|
||||
pConv->fAdvance = convertor_pack_homogeneous;
|
||||
if( pConv->freebuf != NULL ) {
|
||||
free( pConv->freebuf );
|
||||
pConv->freebuf = NULL;
|
||||
}
|
||||
return 0;
|
||||
OBJ_RETAIN( dt );
|
||||
if( pConv->pDesc != dt ) {
|
||||
pConv->pDesc = dt;
|
||||
pConv->flags = CONVERTOR_SEND;
|
||||
if( pConv->pStack != NULL ) free( pConv->pStack );
|
||||
pConv->pStack = NULL;
|
||||
}
|
||||
if( pConv->pStack == NULL ) {
|
||||
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (dt->btypes[DT_LOOP] + 2) );
|
||||
pConv->stack_pos = 0; /* just to be sure */
|
||||
}
|
||||
lam_create_stack_with_pos( pConv, local_starting_point, local_sizes );
|
||||
|
||||
pConv->pBaseBuf = pUserBuf;
|
||||
pConv->available_space = count * (dt->ub - dt->lb);
|
||||
pConv->count = count;
|
||||
pConv->pFunctions = copy_functions;
|
||||
pConv->converted = 0;
|
||||
pConv->bConverted = 0;
|
||||
if( (dt->flags & DT_FLAG_CONTIGUOUS) && (dt->size == (dt->ub - dt->lb)) )
|
||||
pConv->flags |= DT_FLAG_CONTIGUOUS;
|
||||
pConv->fAdvance = convertor_pack_homogeneous;
|
||||
if( pConv->freebuf != NULL ) {
|
||||
free( pConv->freebuf );
|
||||
pConv->freebuf = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
lam_convertor_t* lam_convertor_create( int remote_arch, int mode )
|
||||
|
@ -16,19 +16,19 @@ static int convertor_unpack_general( lam_convertor_t* pConvertor,
|
||||
|
||||
void dump_stack( dt_stack_t* pStack, int stack_pos, dt_elem_desc_t* pDesc, char* name )
|
||||
{
|
||||
printf( "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
|
||||
for( ;stack_pos >= 0; stack_pos-- ) {
|
||||
printf( "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
|
||||
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
|
||||
if( pStack[stack_pos].index != -1 )
|
||||
printf( "[desc count %d disp %ld extent %d]\n",
|
||||
pDesc[pStack[stack_pos].index].count,
|
||||
pDesc[pStack[stack_pos].index].disp,
|
||||
pDesc[pStack[stack_pos].index].extent );
|
||||
else
|
||||
printf( "\n" );
|
||||
}
|
||||
printf( "\n" );
|
||||
printf( "\nStack %p stack_pos %d name %s\n", (void*)pStack, stack_pos, name );
|
||||
for( ;stack_pos >= 0; stack_pos-- ) {
|
||||
printf( "%d: pos %d count %d disp %ld end_loop %d ", stack_pos, pStack[stack_pos].index,
|
||||
pStack[stack_pos].count, pStack[stack_pos].disp, pStack[stack_pos].end_loop );
|
||||
if( pStack[stack_pos].index != -1 )
|
||||
printf( "[desc count %d disp %ld extent %d]\n",
|
||||
pDesc[pStack[stack_pos].index].count,
|
||||
pDesc[pStack[stack_pos].index].disp,
|
||||
pDesc[pStack[stack_pos].index].extent );
|
||||
else
|
||||
printf( "\n" );
|
||||
}
|
||||
printf( "\n" );
|
||||
}
|
||||
|
||||
/*
|
||||
@ -47,273 +47,295 @@ static int convertor_unpack_general( lam_convertor_t* pConvertor,
|
||||
struct iovec* pInputv,
|
||||
unsigned int inputCount )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int count_desc; /* the number of items already done in the actual pos_desc */
|
||||
int end_loop; /* last element in the actual loop */
|
||||
int type; /* type at current position */
|
||||
unsigned int advance; /* number of bytes that we should advance the buffer */
|
||||
int rc;
|
||||
long disp_desc = 0; /* compute displacement for truncated data */
|
||||
long disp; /* displacement at the beging of the last loop */
|
||||
dt_desc_t *pData = pConvertor->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
char* pOutput = pConvertor->pBaseBuf;
|
||||
int oCount = (pData->ub - pData->lb) * pConvertor->count;
|
||||
char* pInput = pInputv[0].iov_base;
|
||||
int iCount = pInputv[0].iov_len;
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int count_desc; /* the number of items already done in the actual pos_desc */
|
||||
int end_loop; /* last element in the actual loop */
|
||||
int type; /* type at current position */
|
||||
unsigned int advance; /* number of bytes that we should advance the buffer */
|
||||
int rc;
|
||||
long disp_desc = 0; /* compute displacement for truncated data */
|
||||
long disp; /* displacement at the beging of the last loop */
|
||||
dt_desc_t *pData = pConvertor->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
char* pOutput = pConvertor->pBaseBuf;
|
||||
int oCount = (pData->ub - pData->lb) * pConvertor->count;
|
||||
char* pInput = pInputv[0].iov_base;
|
||||
int iCount = pInputv[0].iov_len;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc;
|
||||
else pElems = pData->desc.desc;
|
||||
if( pData->opt_desc.desc != NULL ) pElems = pData->opt_desc.desc;
|
||||
else pElems = pData->desc.desc;
|
||||
|
||||
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
|
||||
pInputv[0].iov_base, pInputv[0].iov_len, inputCount );
|
||||
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
||||
pos_desc = pStack->index;
|
||||
disp = 0;
|
||||
if( pos_desc == -1 ) {
|
||||
pos_desc = 0;
|
||||
count_desc = pElems[0].count;
|
||||
disp_desc = pElems[0].disp;
|
||||
} else {
|
||||
count_desc = pStack->count;
|
||||
if( pElems[pos_desc].type != DT_LOOP ) {
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
disp = pStack->disp;
|
||||
disp_desc = ( pElems[pos_desc].disp +
|
||||
(pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent);
|
||||
}
|
||||
}
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
next_loop:
|
||||
end_loop = pStack->end_loop;
|
||||
while( pConvertor->stack_pos >= 0 ) {
|
||||
if( pos_desc == end_loop ) { /* end of the current loop */
|
||||
while( --(pStack->count) == 0 ) { /* end of loop */
|
||||
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", pConvertor,
|
||||
pInputv[0].iov_base, pInputv[0].iov_len, inputCount );
|
||||
pStack = pConvertor->pStack + pConvertor->stack_pos;
|
||||
pos_desc = pStack->index;
|
||||
disp = 0;
|
||||
if( pos_desc == -1 ) {
|
||||
pos_desc = 0;
|
||||
count_desc = pElems[0].count;
|
||||
disp_desc = pElems[0].disp;
|
||||
} else {
|
||||
count_desc = pStack->count;
|
||||
if( pElems[pos_desc].type != DT_LOOP ) {
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
if( pConvertor->stack_pos == -1 )
|
||||
return 1; /* completed */
|
||||
}
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
pos_desc++;
|
||||
disp = pStack->disp;
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, pElems[pos_desc].count,
|
||||
disp, pos_desc + pElems[pos_desc].disp + 1 );
|
||||
disp = pStack->disp;
|
||||
disp_desc = ( pElems[pos_desc].disp +
|
||||
(pElems[pos_desc].count - count_desc) * pElems[pos_desc].extent);
|
||||
}
|
||||
}
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
next_loop:
|
||||
end_loop = pStack->end_loop;
|
||||
while( pConvertor->stack_pos >= 0 ) {
|
||||
if( pos_desc == end_loop ) { /* end of the current loop */
|
||||
while( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pConvertor->stack_pos--;
|
||||
pStack--;
|
||||
if( pConvertor->stack_pos == -1 )
|
||||
return 1; /* completed */
|
||||
}
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
|
||||
/* update the current state */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
rc = pConvertor->pFunctions[type]( count_desc,
|
||||
pInput, iCount, pElems[pos_desc].extent,
|
||||
pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent,
|
||||
&advance );
|
||||
if( rc <= 0 ) {
|
||||
printf( "trash in the input buffer\n" );
|
||||
return -1;
|
||||
}
|
||||
iCount -= advance; /* decrease the available space in the buffer */
|
||||
pInput += advance; /* increase the pointer to the buffer */
|
||||
pConvertor->bConverted += advance;
|
||||
if( rc != count_desc ) {
|
||||
/* not all data has been converted. Keep the state */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, count_desc - rc,
|
||||
disp + rc * pElems[pos_desc].extent, pos_desc );
|
||||
if( iCount != 0 )
|
||||
printf( "there is still room in the input buffer %d bytes\n", iCount );
|
||||
return 0;
|
||||
}
|
||||
pConvertor->converted += rc; /* number of elementd converted so far */
|
||||
pos_desc++; /* advance to the next data */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
|
||||
}
|
||||
disp = pStack->disp;
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
do {
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, pElems[pos_desc].count,
|
||||
disp, pos_desc + pElems[pos_desc].disp + 1 );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElems, "advance loops" );
|
||||
/* update the current state */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
goto next_loop;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
rc = pConvertor->pFunctions[type]( count_desc,
|
||||
pInput, iCount, pElems[pos_desc].extent,
|
||||
pOutput + disp + disp_desc, oCount, pElems[pos_desc].extent,
|
||||
&advance );
|
||||
if( rc <= 0 ) {
|
||||
printf( "trash in the input buffer\n" );
|
||||
return -1;
|
||||
}
|
||||
iCount -= advance; /* decrease the available space in the buffer */
|
||||
pInput += advance; /* increase the pointer to the buffer */
|
||||
pConvertor->bConverted += advance;
|
||||
if( rc != count_desc ) {
|
||||
/* not all data has been converted. Keep the state */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos,
|
||||
pos_desc, count_desc - rc,
|
||||
disp + rc * pElems[pos_desc].extent, pos_desc );
|
||||
if( iCount != 0 )
|
||||
printf( "there is still room in the input buffer %d bytes\n", iCount );
|
||||
return 0;
|
||||
}
|
||||
pConvertor->converted += rc; /* number of elementd converted so far */
|
||||
pos_desc++; /* advance to the next data */
|
||||
count_desc = pElems[pos_desc].count;
|
||||
disp_desc = pElems[pos_desc].disp;
|
||||
if( iCount == 0 ) break; /* break if there is no more data in the buffer */
|
||||
}
|
||||
|
||||
/* out of the loop: we have complete the data conversion or no more space
|
||||
* in the buffer.
|
||||
*/
|
||||
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
|
||||
/* out of the loop: we have complete the data conversion or no more space
|
||||
* in the buffer.
|
||||
*/
|
||||
if( pConvertor->pStack[0].count < 0 ) return 1; /* data succesfully converted */
|
||||
|
||||
/* I complete an element, next step I should go to the next one */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pElems[pos_desc].count, disp, pos_desc );
|
||||
/* I complete an element, next step I should go to the next one */
|
||||
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc,
|
||||
pElems[pos_desc].count, disp, pos_desc );
|
||||
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int convertor_unpack_homogeneous( lam_convertor_t* pConv, struct iovec* iov, unsigned int out_size )
|
||||
{
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* counter for basic datatype with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, lastLength = 0;
|
||||
char* pSrcBuf;
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
dt_stack_t* pStack; /* pointer to the position on the stack */
|
||||
int pos_desc; /* actual position in the description of the derived datatype */
|
||||
int type; /* type at current position */
|
||||
int i; /* counter for basic datatype with extent */
|
||||
int stack_pos = 0; /* position on the stack */
|
||||
long lastDisp = 0, last_count = 0;
|
||||
int space = iov[0].iov_len;
|
||||
char* pSrcBuf;
|
||||
dt_desc_t* pData = pConv->pDesc;
|
||||
dt_elem_desc_t* pElems;
|
||||
int next_length;
|
||||
int init_bconvert = pConv->bConverted;
|
||||
int end_desc;
|
||||
|
||||
pSrcBuf = iov[0].iov_base;
|
||||
pSrcBuf = iov[0].iov_base;
|
||||
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
|
||||
if( pData->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
long extent = pData->ub - pData->lb;
|
||||
char* pDstBuf = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
|
||||
|
||||
if( pData->size == extent ) {
|
||||
long length = pConv->count * pData->size;
|
||||
if( pData->size == extent ) {
|
||||
long length = pConv->count * pData->size;
|
||||
|
||||
if( length > iov[0].iov_len )
|
||||
length = iov[0].iov_len;
|
||||
/* contiguous data or basic datatype with count */
|
||||
MEMCPY( pDstBuf, pSrcBuf, length );
|
||||
pConv->bConverted += length;
|
||||
} else {
|
||||
type = iov[0].iov_len;
|
||||
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
|
||||
MEMCPY( pDstBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += pData->size;
|
||||
pDstBuf += extent;
|
||||
type -= pData->size;
|
||||
}
|
||||
pConv->bConverted += type;
|
||||
}
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
}
|
||||
|
||||
pStack = pConv->pStack;
|
||||
pStack->count = pConv->count;
|
||||
pStack->index = -1;
|
||||
pStack->disp = 0;
|
||||
pos_desc = 0;
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pStack->end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pStack->end_loop = pData->desc.used;
|
||||
}
|
||||
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
next_loop:
|
||||
while( pos_desc <= pStack->end_loop ) {
|
||||
if( pos_desc == pStack->end_loop ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --stack_pos == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc++;
|
||||
goto next_loop;
|
||||
}
|
||||
if( pElems[pos_desc].type == DT_LOOP ) {
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
if( lastLength == 0 ) {
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pSrcBuf += lastLength;
|
||||
if( length > iov[0].iov_len )
|
||||
length = iov[0].iov_len;
|
||||
/* contiguous data or basic datatype with count */
|
||||
MEMCPY( pDstBuf, pSrcBuf, length );
|
||||
pConv->bConverted += length;
|
||||
} else {
|
||||
type = iov[0].iov_len;
|
||||
for( pos_desc = 0; pos_desc < pConv->count; pos_desc++ ) {
|
||||
MEMCPY( pDstBuf, pSrcBuf, pData->size );
|
||||
pSrcBuf += pData->size;
|
||||
pDstBuf += extent;
|
||||
type -= pData->size;
|
||||
}
|
||||
lastLength = pLast->extent;
|
||||
for( i = 0; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pSrcBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
pConv->bConverted += type;
|
||||
}
|
||||
return (pConv->bConverted == (pData->size * pConv->count));
|
||||
}
|
||||
|
||||
if( pData->opt_desc.desc != NULL ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
end_desc = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
end_desc = pData->desc.used;
|
||||
}
|
||||
pStack = pConv->pStack + pConv->stack_pos;
|
||||
pos_desc = pStack->index;
|
||||
lastDisp = pStack->disp;
|
||||
last_count = pStack->count;
|
||||
if( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
pStack--;
|
||||
pConv->stack_pos--;
|
||||
}
|
||||
DUMP_STACK( pStack, stack_pos, pElems, "starting" );
|
||||
DUMP( "remember position on stack %d last_elem at %d\n", stack_pos, pos_desc );
|
||||
DUMP( "top stack info {index = %d, count = %d}\n",
|
||||
pStack->index, pStack->count );
|
||||
|
||||
while( pos_desc < end_desc ) {
|
||||
while( pElems[pos_desc].type == DT_END_LOOP ) { /* end of the current loop */
|
||||
if( --(pStack->count) == 0 ) { /* end of loop */
|
||||
pStack--;
|
||||
if( --(pConv->stack_pos) == -1 ) break;
|
||||
} else {
|
||||
pos_desc = pStack->index;
|
||||
if( pos_desc == -1 )
|
||||
pStack->disp += (pData->ub - pData->lb);
|
||||
else
|
||||
pStack->disp += pElems[pos_desc].extent;
|
||||
}
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
goto next_loop;
|
||||
} else {
|
||||
do {
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, pElems[pos_desc].count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
} while( pElems[pos_desc].type == DT_LOOP ); /* let's start another loop */
|
||||
}
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
type = pElems[pos_desc].type;
|
||||
if( (lastDisp + lastLength) == (pStack->disp + pElems[pos_desc].disp) ) {
|
||||
lastLength += pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
} else {
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pSrcBuf += lastLength;
|
||||
printf( "increase by %ld bytes\n", lastLength );
|
||||
pConv->bConverted += lastLength;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
lastLength = pElems[pos_desc].count * basicDatatypes[type].size;
|
||||
}
|
||||
pos_desc++; /* advance to the next data */
|
||||
}
|
||||
pos_desc++;
|
||||
last_count = pElems[pos_desc].count;
|
||||
}
|
||||
while( pElems[pos_desc].type == DT_LOOP ) {
|
||||
int stop_in_loop = 0;
|
||||
if( pElems[pos_desc].flags & DT_FLAG_CONTIGUOUS ) {
|
||||
dt_elem_desc_t* pLast = &( pElems[pos_desc + pElems[pos_desc].disp]);
|
||||
last_count = pElems[pos_desc].count;
|
||||
if( (pLast->extent * last_count) > space ) {
|
||||
last_count = space / pLast->extent;
|
||||
stop_in_loop = 1;
|
||||
}
|
||||
for( i = 0; i < (pElems[pos_desc].count - 1); i++ ) {
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, pLast->extent );
|
||||
pSrcBuf += pLast->extent;
|
||||
lastDisp += pElems[pos_desc].extent;
|
||||
}
|
||||
space -= (pLast->extent * last_count);
|
||||
pConv->bConverted += (pLast->extent * last_count);
|
||||
if( stop_in_loop != 0 ) {
|
||||
pos_desc += pElems[pos_desc].disp + 1;
|
||||
last_count = pElems[pos_desc].count;
|
||||
continue;
|
||||
}
|
||||
last_count = space;
|
||||
next_length = pLast->extent - space;
|
||||
/* Save the stack with the correct last_count value. */
|
||||
}
|
||||
PUSH_STACK( pStack, stack_pos, pos_desc, last_count,
|
||||
pStack->disp, pos_desc + pElems[pos_desc].disp );
|
||||
pos_desc++;
|
||||
last_count = pElems[pos_desc].count;
|
||||
}
|
||||
/* now here we have a basic datatype */
|
||||
while( pElems[pos_desc].flags & DT_FLAG_DATA ) {
|
||||
/* do we have enough space in the buffer ? */
|
||||
if( space < last_count ) {
|
||||
next_length = last_count - space;
|
||||
last_count = space;
|
||||
goto end_loop; /* or break whatever but go out of this while */
|
||||
}
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_count );
|
||||
pConv->bConverted += last_count;
|
||||
space -= last_count;
|
||||
pSrcBuf += last_count;
|
||||
lastDisp = pStack->disp + pElems[pos_desc].disp;
|
||||
pos_desc++; /* advance to the next data */
|
||||
last_count = pElems[pos_desc].count;
|
||||
}
|
||||
}
|
||||
last_count = 0; /* complete the data */
|
||||
end_loop:
|
||||
if( last_count != 0 ) { /* save the internal state */
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, last_count );
|
||||
pConv->bConverted += last_count;
|
||||
lastDisp += last_count;
|
||||
}
|
||||
if( pos_desc <= pStack->end_loop ) /* cleanup the stack */
|
||||
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, next_length,
|
||||
lastDisp, pos_desc );
|
||||
|
||||
MEMCPY( pConv->pBaseBuf + lastDisp, pSrcBuf, lastLength );
|
||||
pConv->bConverted += lastLength;
|
||||
|
||||
/* cleanup the stack */
|
||||
return 0;
|
||||
iov[0].iov_len = pConv->bConverted - init_bconvert;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int lam_convertor_unpack( lam_convertor_t* pConvertor,
|
||||
struct iovec* pInputv,
|
||||
unsigned int inputCount )
|
||||
{
|
||||
dt_desc_t *pData = pConvertor->pDesc;
|
||||
char* pOutput = pConvertor->pBaseBuf;
|
||||
char* pInput = pInputv[0].iov_base;
|
||||
int rc;
|
||||
dt_desc_t *pData = pConvertor->pDesc;
|
||||
char* pOutput = pConvertor->pBaseBuf;
|
||||
char* pInput = pInputv[0].iov_base;
|
||||
int rc;
|
||||
|
||||
if( pConvertor->count == 0 ) return 1; /* nothing to do */
|
||||
if( pConvertor->count == 0 ) return 1; /* nothing to do */
|
||||
|
||||
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
if( pInputv[0].iov_base == NULL ) {
|
||||
rc = pConvertor->count * pData->size;
|
||||
if( pInputv[0].iov_len == 0 ) { /* give me the whole buffer */
|
||||
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb;
|
||||
pInputv[0].iov_len = rc;
|
||||
return 1;
|
||||
} else { /* what about the next chunk ? */
|
||||
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted;
|
||||
if( pInputv[0].iov_len > (rc - pConvertor->bConverted) )
|
||||
pInputv[0].iov_len = rc - pConvertor->bConverted;
|
||||
pConvertor->bConverted += pInputv[0].iov_len;
|
||||
return (pConvertor->bConverted == rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
if( (pInput >= pOutput) && (pInput < (pOutput + pConvertor->count * (pData->ub - pData->lb))) ) {
|
||||
return 1;
|
||||
}
|
||||
return lam_convertor_progress( pConvertor, pInputv, inputCount );
|
||||
if( pConvertor->flags & DT_FLAG_CONTIGUOUS ) {
|
||||
if( pInputv[0].iov_base == NULL ) {
|
||||
rc = pConvertor->count * pData->size;
|
||||
if( pInputv[0].iov_len == 0 ) { /* give me the whole buffer */
|
||||
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb;
|
||||
pInputv[0].iov_len = rc;
|
||||
return 1;
|
||||
} else { /* what about the next chunk ? */
|
||||
pInputv[0].iov_base = pConvertor->pBaseBuf + pData->true_lb + pConvertor->bConverted;
|
||||
if( pInputv[0].iov_len > (rc - pConvertor->bConverted) )
|
||||
pInputv[0].iov_len = rc - pConvertor->bConverted;
|
||||
pConvertor->bConverted += pInputv[0].iov_len;
|
||||
return (pConvertor->bConverted == rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
if( (pInput >= pOutput) && (pInput < (pOutput + pConvertor->count * (pData->ub - pData->lb))) ) {
|
||||
return 1;
|
||||
}
|
||||
return lam_convertor_progress( pConvertor, pInputv, inputCount );
|
||||
}
|
||||
|
||||
/* Return value:
|
||||
@ -436,30 +458,29 @@ int lam_convertor_init_for_recv( lam_convertor_t* pConv, unsigned int flags,
|
||||
dt_desc_t* pData, int count,
|
||||
void* pUserBuf, int starting_point )
|
||||
{
|
||||
OBJ_RETAIN( pData );
|
||||
pConv->pDesc = pData;
|
||||
pConv->flags = CONVERTOR_RECV;
|
||||
if( pConv->pStack != NULL ) free( pConv->pStack );
|
||||
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (pData->btypes[DT_LOOP] + 2) );
|
||||
if( starting_point == 0 ) {
|
||||
pConv->stack_pos = 0;
|
||||
pConv->pStack[0].index = -1; /* fake entry for the first step */
|
||||
pConv->pStack[0].count = count; /* fake entry for the first step */
|
||||
pConv->pStack[0].disp = 0;
|
||||
/* first we should decide which data representation will be used TODO */
|
||||
pConv->pStack[0].end_loop = pData->desc.used;
|
||||
} else {
|
||||
}
|
||||
pConv->pBaseBuf = pUserBuf;
|
||||
pConv->available_space = count * (pData->ub - pData->lb);
|
||||
pConv->count = count;
|
||||
pConv->pFunctions = copy_functions;
|
||||
pConv->converted = 0;
|
||||
pConv->bConverted = 0;
|
||||
if( (pData->flags & DT_FLAG_CONTIGUOUS) && (pData->size == (pData->ub - pData->lb)) )
|
||||
pConv->flags |= DT_FLAG_CONTIGUOUS;
|
||||
pConv->fAdvance = convertor_unpack_homogeneous;
|
||||
return 0;
|
||||
OBJ_RETAIN( pData );
|
||||
if( pConv->pDesc != pData ) {
|
||||
pConv->pDesc = pData;
|
||||
pConv->flags = CONVERTOR_RECV;
|
||||
if( pConv->pStack != NULL ) free( pConv->pStack );
|
||||
pConv->pStack = NULL;
|
||||
}
|
||||
if( pConv->pStack == NULL ) {
|
||||
pConv->pStack = (dt_stack_t*)malloc(sizeof(dt_stack_t) * (pData->btypes[DT_LOOP] + 2) );
|
||||
pConv->stack_pos = 0;
|
||||
}
|
||||
lam_create_stack_with_pos( pConv, starting_point, local_sizes );
|
||||
|
||||
pConv->pBaseBuf = pUserBuf;
|
||||
pConv->available_space = count * (pData->ub - pData->lb);
|
||||
pConv->count = count;
|
||||
pConv->pFunctions = copy_functions;
|
||||
pConv->converted = 0;
|
||||
pConv->bConverted = 0;
|
||||
if( (pData->flags & DT_FLAG_CONTIGUOUS) && (pData->size == (pData->ub - pData->lb)) )
|
||||
pConv->flags |= DT_FLAG_CONTIGUOUS;
|
||||
pConv->fAdvance = convertor_unpack_homogeneous;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Get the number of elements from the data associated with this convertor that can be
|
||||
|
@ -31,6 +31,33 @@ int lam_create_stack_with_pos( lam_convertor_t* pConvertor,
|
||||
int* remoteLength;
|
||||
int loop_length;
|
||||
|
||||
if( starting_point == 0 ) {
|
||||
dt_elem_desc_t* pElems;
|
||||
|
||||
pConvertor->stack_pos = 1;
|
||||
pConvertor->pStack[0].index = 0;
|
||||
pConvertor->pStack[0].count = pConvertor->count;
|
||||
pConvertor->pStack[0].disp = 0;
|
||||
/* first here we should select which data representation will be used for
|
||||
* this operation: normal one or the optimized version ? */
|
||||
if( pData->opt_desc.used > 0 ) {
|
||||
pElems = pData->opt_desc.desc;
|
||||
pConvertor->pStack[0].end_loop = pData->opt_desc.used;
|
||||
} else {
|
||||
pElems = pData->desc.desc;
|
||||
pConvertor->pStack[0].end_loop = pData->desc.used;
|
||||
}
|
||||
pConvertor->pStack[1].index = 0;
|
||||
pConvertor->pStack[1].count = pElems->count;
|
||||
pConvertor->pStack[1].disp = pElems->disp;
|
||||
pConvertor->pStack[1].end_loop = pConvertor->pStack[0].end_loop;
|
||||
return 0;
|
||||
}
|
||||
/* if the convertor continue from the last position
|
||||
* there is nothing to do.
|
||||
*/
|
||||
if( pConvertor->bConverted != starting_point ) return 0;
|
||||
|
||||
remoteLength = (int*)alloca( sizeof(int) * pConvertor->pDesc->btypes[DT_LOOP] );
|
||||
pStack = pConvertor->pStack;
|
||||
pStack->count = pConvertor->count;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user