1
1
- add more comments on the pack and unpack functions.
- remove all pack/unpack versions that are not used anymore.
- other various cleanups.
- update the safeguard macro (which compute theboundaries of the
  datatype in order to protect us from accessing memory locations
  outside of the data).
- for the contiguous (with or without gaps) pack and unpack correctly
  compute the starting point.

This commit was SVN r10327.
Этот коммит содержится в:
George Bosilca 2006-06-13 07:23:43 +00:00
родитель 3fb5dafdb3
Коммит 88a363fe34
5 изменённых файлов: 42 добавлений и 591 удалений

Просмотреть файл

@ -253,8 +253,9 @@ OMPI_DECLSPEC int ompi_ddt_safeguard_pointer_debug_breakpoint( const void* actua
{ \
char *__lower_bound = (char*)(INITPTR), *__upper_bound; \
assert( ((LENGTH) != 0) && ((COUNT) != 0) ); \
__lower_bound += (PDATA)->true_lb - (PDATA)->lb; \
__upper_bound = (INITPTR) + ((PDATA)->ub - (PDATA)->lb) * ((COUNT) - 1) + (PDATA)->true_ub - (PDATA)->lb; \
__lower_bound += (PDATA)->true_lb; \
__upper_bound = (INITPTR) + (PDATA)->true_ub + \
((PDATA)->ub - (PDATA)->lb) * ((COUNT) - 1); \
if( ((ACTPTR) < __lower_bound) || ((ACTPTR) >= __upper_bound) ) { \
ompi_ddt_safeguard_pointer_debug_breakpoint( (ACTPTR), (LENGTH), (INITPTR), (PDATA), (COUNT) ); \
opal_output( 0, "%s:%d\n\tPointer %p size %d is outside [%p,%p] for\n\tbase ptr %p count %d and data \n", \

Просмотреть файл

@ -33,570 +33,18 @@ extern int ompi_pack_debug;
#include "ompi/datatype/datatype_prototypes.h"
#if defined(CHECKSUM)
#define ompi_pack_general_function ompi_pack_general_checksum
#define ompi_pack_homogeneous_with_memcpy_function ompi_pack_homogeneous_with_memcpy_checksum
#define ompi_pack_no_conversion_function ompi_pack_no_conversion_checksum
#define ompi_pack_homogeneous_contig_function ompi_pack_homogeneous_contig_checksum
#define ompi_pack_homogeneous_contig_with_gaps_function ompi_pack_homogeneous_contig_with_gaps_checksum
#define ompi_generic_simple_pack_function ompi_generic_simple_pack_checksum
#define ompi_generic_simple_pack_function ompi_generic_simple_pack_checksum
#else
#define ompi_pack_general_function ompi_pack_general
#define ompi_pack_homogeneous_with_memcpy_function ompi_pack_homogeneous_with_memcpy
#define ompi_pack_no_conversion_function ompi_pack_no_conversion
#define ompi_pack_homogeneous_contig_function ompi_pack_homogeneous_contig
#define ompi_pack_homogeneous_contig_with_gaps_function ompi_pack_homogeneous_contig_with_gaps
#define ompi_generic_simple_pack_function ompi_generic_simple_pack
#define ompi_generic_simple_pack_function ompi_generic_simple_pack
#endif /* defined(CHECKSUM) */
int32_t
ompi_pack_general_function( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data,
int32_t* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int count_desc; /* the number of items already done in the actual pos_desc */
int type = DT_CHAR; /* type at current position */
uint32_t advance; /* number of bytes that we should advance the buffer */
long disp_desc = 0; /* compute displacement for truncated data */
int bConverted = 0; /* number of bytes converted this time */
const ompi_datatype_t *pData = pConvertor->pDesc;
const ompi_convertor_master_t* master = pConvertor->master;
dt_elem_desc_t* pElem;
char* pOutput = pConvertor->pBaseBuf;
char* pInput;
int iCount, rc;
uint32_t iov_count, total_bytes_converted = 0;
DUMP( "convertor_decode( %p, {%p, %d}, %d )\n", (void*)pConvertor,
iov[0].iov_base, iov[0].iov_len, *out_size );
pElem = pData->desc.desc;
pStack = pConvertor->pStack + pConvertor->stack_pos;
pos_desc = pStack->index;
disp_desc = pStack->disp;
count_desc = pStack->count;
pStack--;
pConvertor->stack_pos--;
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "starting" );
DUMP( "remember position on stack %d last_elem at %d\n", pConvertor->stack_pos, pos_desc );
DUMP( "top stack info {index = %d, count = %d}\n",
pStack->index, pStack->count );
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
bConverted = 0;
if( iov[iov_count].iov_base == NULL ) {
size_t length = iov[iov_count].iov_len;
if( length <= 0 )
length = pConvertor->local_size - pConvertor->bConverted - bConverted;
if( (*max_data) < length )
length = *max_data;
iov[iov_count].iov_base = pConvertor->memAlloc_fn( &length, pConvertor->memAlloc_userdata );
iov[iov_count].iov_len = length;
*freeAfter = (*freeAfter) | ( 1 << iov_count);
}
pInput = iov[iov_count].iov_base;
iCount = iov[iov_count].iov_len;
while( 1 ) {
if( DT_END_LOOP == pElem[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConvertor->stack_pos == 0 )
goto complete_loop; /* completed */
pConvertor->stack_pos--;
pStack--;
pos_desc++;
} else {
pos_desc = pStack->index + 1;
if( pStack->index == -1 ) {
pStack->disp += (pData->ub - pData->lb);
} else {
assert( DT_LOOP == pElem[pStack->index].elem.common.type );
pStack->disp += pElem[pStack->index].loop.extent;
}
}
count_desc = pElem[pos_desc].elem.count;
disp_desc = pElem[pos_desc].elem.disp;
}
if( DT_LOOP == pElem[pos_desc].elem.common.type ) {
do {
PUSH_STACK( pStack, pConvertor->stack_pos,
pos_desc, DT_LOOP, pElem[pos_desc].loop.loops,
pStack->disp, pos_desc + pElem[pos_desc].loop.items + 1);
pos_desc++;
} while( DT_LOOP == pElem[pos_desc].elem.common.type ); /* let's start another loop */
DDT_DUMP_STACK( pConvertor->pStack, pConvertor->stack_pos, pElem, "advance loops" );
/* update the current state */
count_desc = pElem[pos_desc].elem.count;
disp_desc = pElem[pos_desc].elem.disp;
continue;
}
while( pElem[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* now here we have a basic datatype */
type = pElem[pos_desc].elem.common.type;
rc = master->pFunctions[type]( pConvertor, count_desc,
pOutput + pStack->disp + disp_desc,
iCount, pElem[pos_desc].elem.extent,
pInput, iCount, BASIC_DDT_FROM_ELEM(pElem[pos_desc])->size, &advance );
iCount -= advance; /* decrease the available space in the buffer */
pInput += advance; /* increase the pointer to the buffer */
bConverted += advance;
if( rc != count_desc ) {
/* not all data has been converted. Keep the state */
count_desc -= rc;
disp_desc += rc * pElem[pos_desc].elem.extent;
if( iCount != 0 )
printf( "pack there is still room in the input buffer %d bytes\n", iCount );
goto complete_loop;
}
pos_desc++; /* advance to the next data */
count_desc = pElem[pos_desc].elem.count;
disp_desc = pElem[pos_desc].elem.disp;
if( iCount == 0 ) goto complete_loop; /* break if there is no more data in the buffer */
}
}
complete_loop:
pConvertor->bConverted += bConverted; /* update the already converted bytes */
iov[iov_count].iov_len = bConverted; /* update the length in the iovec */
total_bytes_converted += bConverted;
}
*max_data = total_bytes_converted;
/* out of the loop: we have complete the data conversion or no more space
* in the buffer.
*/
if( pConvertor->local_size == pConvertor->bConverted ) {
pConvertor->flags |= CONVERTOR_COMPLETED;
return 1;
}
/* I complete an element, next step I should go to the next one */
PUSH_STACK( pStack, pConvertor->stack_pos, pos_desc, type, count_desc,
disp_desc, pos_desc );
return 0;
}
/* We suppose here that we work with an already optimized version of the data
*/
int32_t
ompi_pack_homogeneous_with_memcpy_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t* out_size,
size_t* max_data,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
uint32_t pos_desc; /* actual position in the description of the derived datatype */
int i; /* index for basic elements with extent */
int bConverted = 0; /* number of bytes converted/moved this time */
long lastDisp = 0, last_count = 0;
uint32_t space = iov[0].iov_len, last_blength = 0;
char* pDestBuf;
const ompi_datatype_t* pData = pConv->pDesc;
dt_elem_desc_t* pElems;
pDestBuf = iov[0].iov_base;
pElems = pConv->use_desc->desc;
pStack = pConv->pStack + pConv->stack_pos;
pos_desc = pStack->index;
lastDisp = pStack->disp;
last_count = pStack->count;
pStack--;
pConv->stack_pos--;
while( 1 ) {
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConv->stack_pos == 0 ) { /* finish everything */
last_count = 0;
pos_desc = -1;
goto end_loop;
}
pStack--;
pConv->stack_pos--;
pos_desc++; /* go to the next element */
} else {
if( pStack->index == -1 ) {
pStack->disp += (pData->ub - pData->lb);
pos_desc = 0;
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
pos_desc = pStack->index + 1;
}
}
last_count = pElems[pos_desc].elem.count;
last_blength = last_count;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
continue;
}
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
int stop_in_loop = 0;
if( pElems[pos_desc].elem.common.flags & DT_FLAG_CONTIGUOUS ) {
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
if( (end_loop->size * last_count) > space ) {
stop_in_loop = last_count;
last_count = space / end_loop->size;
}
for( i = 0; i < last_count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY_CSUM( pDestBuf, pConv->pBaseBuf + lastDisp, end_loop->size, pConv );
pDestBuf += end_loop->size; /* size of the contiguous data */
lastDisp += pElems[pos_desc].loop.extent;
}
space -= (end_loop->size * last_count);
bConverted += (end_loop->size * last_count);
if( stop_in_loop == 0 ) {
pos_desc += pElems[pos_desc].loop.items + 1;
last_count = pElems[pos_desc].elem.count;
continue;
}
/* mark some of the iterations as completed */
last_count = stop_in_loop - last_count;
last_blength = 0;
/* Save the stack with the correct last_count value. */
}
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, DT_LOOP, last_count,
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
pos_desc++;
last_count = pElems[pos_desc].elem.count;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* do we have enough space in the buffer ? */
last_blength = last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
if( space < last_blength ) {
last_blength = last_count;
last_count = space / BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
space -= (last_count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size);
last_blength -= last_count;
goto end_loop; /* or break whatever but go out of this while */
}
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_count,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY_CSUM( pDestBuf, pConv->pBaseBuf + lastDisp, last_count, pConv );
bConverted += last_blength;
space -= last_blength;
pDestBuf += last_blength;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
last_count = pElems[pos_desc].elem.count;
}
}
end_loop:
if( last_count != 0 ) { /* save the internal state */
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, last_count,
pConv->pBaseBuf, pData, pConv->count );
MEMCPY_CSUM( pDestBuf, pConv->pBaseBuf + lastDisp, last_count, pConv );
bConverted += last_count;
lastDisp += last_count;
}
pConv->bConverted += bConverted; /* update the byte converted field in the convertor */
iov[0].iov_len = bConverted; /* update the length in the iovec */
*max_data = bConverted;
if( pConv->bConverted == pConv->local_size ) {
pConv->flags |= CONVERTOR_COMPLETED;
return 1;
}
/* update the current stack position */
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, last_blength, pElems[pos_desc].elem.common.type,
lastDisp, pos_desc );
return 0;
}
#define IOVEC_MEM_LIMIT 8192
/* The basic idea is to pack or return iovec depending on the datatype shape. If the data
* is scattered in memory using small chuncks then we have to allocate some space (unless the upper
* level provide some) and pack the data inside. If the chunks of data are large enough
* then is useless to allocate additional memory and do the memcpy operation. We can simply
* return the pointer to the contiguous piece of memory to the upper level.
*/
int32_t
ompi_pack_no_conversion_function( ompi_convertor_t* pConv,
struct iovec* iov,
uint32_t *out_size,
size_t* max_data,
int* freeAfter )
{
dt_stack_t* pStack; /* pointer to the position on the stack */
int pos_desc; /* actual position in the description of the derived datatype */
int i; /* index for basic elements with extent */
uint32_t iov_pos = 0; /* index in the iovec where we put data inside */
int bConverted = 0; /* number of bytes converted/moved this time */
uint32_t space_on_iovec; /* amount of free space on the current iovec */
long lastDisp = 0;
uint32_t space = *max_data, last_blength = 0, saveLength;
char *destination, *source;
const ompi_datatype_t* pData = pConv->pDesc;
ddt_elem_desc_t pack_elem;
dt_elem_desc_t* pElems;
pElems = pConv->use_desc->desc;
pStack = pConv->pStack + pConv->stack_pos;
destination = iov[0].iov_base;
source = (char*)pConv->pBaseBuf + pStack->disp;
/* retrieve the context of the last call */
pos_desc = pStack->index;
pack_elem.count = pStack->count;
pack_elem.common.type = pElems[pos_desc].elem.common.type;
last_blength = pack_elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
lastDisp = pStack->disp;
DO_DEBUG( opal_output( 0, "pack_no_conversion stack_pos %d index %d count %d last_blength %ld lastDisp %ld bConverted %d\n",
pConv->stack_pos, pStack->index, pStack->count, last_blength, lastDisp,
pConv->bConverted ); );
saveLength = 0;
pStack--;
pConv->stack_pos--;
*freeAfter = (*freeAfter) & ~((1 << (*out_size)) - 1);
space_on_iovec = iov[0].iov_len;
while( pos_desc >= 0 ) {
if( DT_END_LOOP == pElems[pos_desc].elem.common.type ) { /* end of the current loop */
if( --(pStack->count) == 0 ) { /* end of loop */
if( pConv->stack_pos == 0 ) { /* finish everything */
if( saveLength != 0 ) {
/* there is still a chunk of memory to be handled, but here we dont allocate more
* memory. We just copy what we can in the right place and update the values to be
* saved on the next round.
*/
if( iov_pos < (*out_size) ) { /* still some place in the iovec */
if( iov[iov_pos].iov_base == NULL ) {
/* prepare a new iovec */
iov[iov_pos].iov_base = source;
iov[iov_pos].iov_len = saveLength;
bConverted += saveLength;
saveLength = 0;
iov_pos++;
space_on_iovec = 0;
COMPUTE_CSUM( iov[iov_pos].iov_base, iov[iov_pos].iov_len, pConv );
/* let's go out of here */
} else {
uint32_t copy_length = saveLength;
if( space_on_iovec < saveLength ) {
copy_length = space_on_iovec;
}
OMPI_DDT_SAFEGUARD_POINTER( source, copy_length,
pConv->pBaseBuf, pData, pConv->count );
DO_DEBUG( opal_output( 0, "1. memcpy( %p, %p, %ld ) bConverted %ld space %ld pConv->bConverted %ld\n", destination, source,
copy_length, bConverted, space_on_iovec, pConv->bConverted ); );
MEMCPY_CSUM( destination, source, copy_length, pConv );
source += copy_length;
destination += copy_length;
bConverted += copy_length;
space_on_iovec -= copy_length;
saveLength -= copy_length;
}
}
}
iov[iov_pos].iov_len -= space_on_iovec;
pack_elem.count = 0;
pos_desc = -1;
last_blength = 0;
goto end_loop;
}
pConv->stack_pos--;
pStack--;
} else {
pos_desc = pStack->index; /* DT_LOOP index */
if( pos_desc == -1 ) {
pStack->disp += (pData->ub - pData->lb);
} else {
assert( DT_LOOP == pElems[pos_desc].elem.common.type );
pStack->disp += pElems[pos_desc].loop.extent;
}
}
pos_desc++; /* go to the next element */
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
pack_elem.count = pElems[pos_desc].elem.count;
last_blength = pack_elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
continue; /* next loop */
}
while( DT_LOOP == pElems[pos_desc].elem.common.type ) {
int stop_in_loop = 0;
/* If the loop container is contiguous then we can do some
* optimizations.
*/
if( pElems[pos_desc].loop.common.flags & DT_FLAG_CONTIGUOUS ) {
/* point to the end of loop element */
ddt_endloop_desc_t* end_loop = &(pElems[pos_desc + pElems[pos_desc].loop.items].end_loop);
if( iov[iov_pos].iov_base == NULL ) {
size_t length = iov[iov_pos].iov_len;
iov[iov_pos].iov_base = pConv->memAlloc_fn( &length, pConv->memAlloc_userdata );
iov[iov_pos].iov_len = length;
space_on_iovec = iov[iov_pos].iov_len;
destination = iov[iov_pos].iov_base;
(*freeAfter) |= (1 << iov_pos);
}
/* compute the maximum amount of data to be packed */
if( (end_loop->size * pack_elem.count) > space_on_iovec ) {
stop_in_loop = pack_elem.count;
pack_elem.count = space_on_iovec / end_loop->size;
}
/* Now let's do it */
for( i = 0; i < (int)pack_elem.count; i++ ) {
OMPI_DDT_SAFEGUARD_POINTER( pConv->pBaseBuf + lastDisp, end_loop->size,
pConv->pBaseBuf, pData, pConv->count );
DO_DEBUG (opal_output( 0, "2. memcpy( %p, %p, %ld )\n", destination, pConv->pBaseBuf + lastDisp,
end_loop->size ); );
MEMCPY_CSUM( destination, pConv->pBaseBuf + lastDisp, end_loop->size, pConv );
lastDisp += pElems[pos_desc].loop.extent;
destination += end_loop->size;
}
DO_DEBUG( opal_output( 0, "\t\tbConverted %d space %d pConv->bConverted %d\n",
bConverted, space_on_iovec, pConv->bConverted ); );
i = end_loop->size * pack_elem.count; /* temporary value */
space_on_iovec -= i;
space -= i;
bConverted += i;
if( stop_in_loop == 0 ) { /* did I stop before the end */
/* the pElems point to the LOOP struct in the begining */
pos_desc += pElems[pos_desc].loop.items + 1;
pack_elem.count = pElems[pos_desc].elem.count;
last_blength = pack_elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
continue;
}
/* mark some of the iterations as completed */
pack_elem.count = stop_in_loop - pack_elem.count;
last_blength = 0;
/* Save the stack with the correct count value. */
}
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, DT_LOOP, pack_elem.count,
pStack->disp, pos_desc + pElems[pos_desc].loop.items );
pos_desc++;
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
pack_elem.count = pElems[pos_desc].elem.count;
last_blength = pack_elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
}
/* now here we have a basic datatype */
while( pElems[pos_desc].elem.common.flags & DT_FLAG_DATA ) {
/* first let's see if it's contiguous with the previous chunk of memory and
* we still have enough room in the buffer...
*/
if( ((source + saveLength) == (pConv->pBaseBuf + lastDisp))
&& ((saveLength + last_blength) <= space_on_iovec)
&& (pElems[pos_desc].elem.extent == (long)BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size) ) {
/* ok still contiguous and we still have some space on the buffer */
saveLength += last_blength;
/* nothing else to do, we act the next time */
} else {
/* Now we have 2 piece of non contiguous memory. One start at source
* with a length of saveLength, the other start at
* pConv->pBaseBuf + lastDisp with a length of last_blength bytes.
* First we have to pack the old buffer and then we should decide
* what we do with the new one.
*/
do {
if( iov[iov_pos].iov_base == NULL ) {
size_t length;
if( saveLength > IOVEC_MEM_LIMIT ) {
/* If the user didn't provide any memory, then we are free
* to handle this case as we want.
*/
iov[iov_pos].iov_base = source;
iov[iov_pos].iov_len = saveLength;
source = pConv->pBaseBuf + lastDisp;
/* update the pack counters values */
bConverted += saveLength;
space -= saveLength;
saveLength = last_blength;
last_blength = 0;
if( ++iov_pos == (*out_size) ) goto end_loop;
destination = iov[iov_pos].iov_base;
space_on_iovec = iov[iov_pos].iov_len;
break;
}
length = iov[iov_pos].iov_len;
/* Let's allocate some. */
iov[iov_pos].iov_base = pConv->memAlloc_fn( &length, pConv->memAlloc_userdata );
iov[iov_pos].iov_len = length;
(*freeAfter) |= (1 << iov_pos);
destination = iov[iov_pos].iov_base;
space_on_iovec = iov[iov_pos].iov_len;
}
/* In all the others cases we simply copy as much data as possible */
if( space_on_iovec > saveLength ) {
OMPI_DDT_SAFEGUARD_POINTER( source, saveLength,
pConv->pBaseBuf, pData, pConv->count );
DO_DEBUG( opal_output( 0, "3. memcpy( %p, %p, %ld ) bConverted %ld space %ld pConv->bConverted %ld\n", destination, source,
saveLength, bConverted, space_on_iovec, pConv->bConverted ); );
MEMCPY_CSUM( destination, source, saveLength, pConv );
destination += saveLength;
/* update the pack counters values */
bConverted += saveLength;
space -= saveLength;
space_on_iovec -= saveLength;
source = pConv->pBaseBuf + lastDisp;
saveLength = last_blength;
last_blength = 0;
break;
}
OMPI_DDT_SAFEGUARD_POINTER( source, space_on_iovec,
pConv->pBaseBuf, pData, pConv->count );
DO_DEBUG( opal_output( 0, "4. memcpy( %p, %p, %ld ) bConverted %ld space %ld pConv->bConverted %ld\n", destination, source,
space_on_iovec, bConverted, space_on_iovec, pConv->bConverted ); );
MEMCPY_CSUM( destination, source, space_on_iovec, pConv );
/* let's prepare for the next round. As I keep trace of the amount that I still
* have to pack, the next time when I came here, I'll try to append something.
* If I already fill-up the amount of data required by the upper level, I will
* simply save all informations in the stack, if not I'll take care of allocating
* new memory and packing the data inside.
*/
source += space_on_iovec;
saveLength -= space_on_iovec;
/* update the pack counters values */
bConverted += space_on_iovec;
space -= space_on_iovec;
lastDisp += space_on_iovec;
/* check for the next step */
if( ++iov_pos == (*out_size) ) { /* are there more iovecs to fill ? */
if( saveLength == 0 ) {
lastDisp -= space_on_iovec;
saveLength = last_blength;
last_blength = 0;
}
goto end_loop;
}
destination = iov[iov_pos].iov_base;
space_on_iovec = iov[iov_pos].iov_len;
} while(1); /* continue forever */
}
if( saveLength > space ) /* this will be the last element copied this time */
continue;
pos_desc++; /* advance to the next data */
lastDisp = pStack->disp + pElems[pos_desc].elem.disp;
pack_elem.count = pElems[pos_desc].elem.count;
last_blength = pack_elem.count * BASIC_DDT_FROM_ELEM(pElems[pos_desc])->size;
}
}
end_loop:
assert( last_blength == 0 );
pConv->bConverted += bConverted; /* update the byte converted field in the convertor */
*max_data = bConverted; /* update the length in the iovec */
if( ((*out_size) == iov_pos) || (iov[iov_pos].iov_base == NULL) ) *out_size = iov_pos;
else *out_size = iov_pos + 1;
if( pConv->bConverted == pConv->local_size ) {
pConv->flags |= CONVERTOR_COMPLETED;
return 1;
}
PUSH_STACK( pStack, pConv->stack_pos, pos_desc, pElems[pos_desc].elem.common.type,
saveLength, lastDisp, pos_desc );
return 0;
}
/* the contig versions does not use the stack. They can easily retrieve
* the status with just the informations from pConvertor->bConverted.
@ -612,9 +60,11 @@ ompi_pack_homogeneous_contig_function( ompi_convertor_t* pConv,
char *source_base = NULL;
size_t length = pConv->local_size - pConv->bConverted;
uint32_t iov_count, initial_amount = pConv->bConverted;
ddt_endloop_desc_t* _end_loop = &(pConv->use_desc->desc[pConv->use_desc->used].end_loop);
long initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
*freeAfter = 0;
source_base = (pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp);
/* There are some optimizations that can be done if the upper level
* does not provide a buffer.
*/
@ -622,8 +72,6 @@ ompi_pack_homogeneous_contig_function( ompi_convertor_t* pConv,
if( 0 == length ) break;
if( (size_t)iov[iov_count].iov_len > length )
iov[iov_count].iov_len = length;
source_base = (pConv->pBaseBuf + _end_loop->first_elem_disp
+ pStack[0].disp + pStack[1].disp);
if( iov[iov_count].iov_base == NULL ) {
iov[iov_count].iov_base = source_base;
COMPUTE_CSUM( iov[iov_count].iov_base, iov[iov_count].iov_len, pConv );
@ -636,6 +84,7 @@ ompi_pack_homogeneous_contig_function( ompi_convertor_t* pConv,
length -= iov[iov_count].iov_len;
pConv->bConverted += iov[iov_count].iov_len;
pStack[0].disp += iov[iov_count].iov_len;
source_base += iov[iov_count].iov_len;
}
/* update the return value */
@ -661,6 +110,7 @@ ompi_pack_homogeneous_contig_with_gaps_function( ompi_convertor_t* pConv,
long extent;
uint32_t max_allowed, i, index;
uint32_t iov_count, total_bytes_converted = 0;
long initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
extent = pData->ub - pData->lb;
assert( (pData->flags & DT_FLAG_CONTIGUOUS) && ((long)pData->size != extent) );
@ -676,7 +126,7 @@ ompi_pack_homogeneous_contig_with_gaps_function( ompi_convertor_t* pConv,
/* There are some optimizations that can be done if the upper level
* does not provide a buffer.
*/
user_memory = pConv->pBaseBuf + pData->true_lb + pStack[0].disp + pStack[1].disp;
user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp + pStack[1].disp;
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
if( 0 == max_allowed ) break; /* we're done this time */
if( iov[iov_count].iov_base == NULL ) {
@ -687,12 +137,13 @@ ompi_pack_homogeneous_contig_with_gaps_function( ompi_convertor_t* pConv,
if( (uint32_t)pStack->count < ((*out_size) - iov_count) ) {
pStack[1].count = pData->size - (pConv->bConverted % pData->size);
for( index = iov_count; i < pConv->count; i++, index++ ) {
iov[index].iov_base = user_memory + pStack[0].disp + pStack[1].disp;
iov[index].iov_base = user_memory;
iov[index].iov_len = pStack[1].count;
pStack[0].disp += extent;
total_bytes_converted += pStack[1].count;
pStack[1].disp = 0; /* reset it for the next round */
pStack[1].count = pData->size;
user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp;
COMPUTE_CSUM( iov[index].iov_base, iov[index].iov_len, pConv );
}
*out_size = iov_count + index;
@ -760,7 +211,7 @@ ompi_pack_homogeneous_contig_with_gaps_function( ompi_convertor_t* pConv,
i++; /* just to compute the correct source pointer */
total_bytes_converted += done;
}
user_memory = pConv->pBaseBuf + pData->true_lb + i * extent;
user_memory = pConv->pBaseBuf + initial_displ + i * extent;
counter = max_allowed / pData->size;
if( counter > pConv->count ) counter = pConv->count;
for( i = 0; i < counter; i++ ) {
@ -777,7 +228,7 @@ ompi_pack_homogeneous_contig_with_gaps_function( ompi_convertor_t* pConv,
* the pStack[0].disp field. BEWARE here we remove the pStack[1].disp as
* it's supposed to be useless from now.
*/
user_memory = pConv->pBaseBuf + pData->true_lb + pStack[0].disp;
user_memory = pConv->pBaseBuf + initial_displ + pStack[0].disp;
}
*max_data = total_bytes_converted;
pConv->bConverted += total_bytes_converted;

Просмотреть файл

@ -15,30 +15,6 @@
#include "ompi_config.h"
OMPI_DECLSPEC int32_t
ompi_pack_general( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data, int32_t* freeAfter );
OMPI_DECLSPEC int32_t
ompi_pack_general_checksum( ompi_convertor_t* pConvertor,
struct iovec* iov, uint32_t* out_size,
size_t* max_data, int32_t* freeAfter );
OMPI_DECLSPEC int32_t
ompi_pack_homogeneous_with_memcpy( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
size_t* max_data, int32_t* freeAfter );
OMPI_DECLSPEC int32_t
ompi_pack_homogeneous_with_memcpy_checksum( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,
size_t* max_data, int32_t* freeAfter );
int32_t
ompi_pack_no_conversion( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t *out_size,
size_t* max_data, int32_t* freeAfter );
int32_t
ompi_pack_no_conversion_checksum( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t *out_size,
size_t* max_data, int32_t* freeAfter );
OMPI_DECLSPEC int32_t
ompi_pack_homogeneous_contig( ompi_convertor_t* pConv,
struct iovec* iov, uint32_t* out_size,

Просмотреть файл

@ -70,7 +70,8 @@ ompi_unpack_general_function( ompi_convertor_t* pConvertor,
int bConverted = 0; /* number of bytes converted this time */
const ompi_convertor_master_t* master = pConvertor->master;
dt_elem_desc_t* pElems;
int oCount = (pConvertor->pDesc->ub - pConvertor->pDesc->lb) * pConvertor->count;
long extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb;
int oCount = extent * pConvertor->count;
char* pInput;
int iCount, rc;
uint32_t iov_count, total_bytes_converted = 0;
@ -104,7 +105,7 @@ ompi_unpack_general_function( ompi_convertor_t* pConvertor,
}
if( pStack->index == -1 ) {
pStack->disp += (pConvertor->pDesc->ub - pConvertor->pDesc->lb);
pStack->disp += extent;
} else {
assert( DT_LOOP == pElems[pStack->index].elem.common.type );
pStack->disp += pElems[pStack->index].loop.extent;
@ -171,6 +172,15 @@ ompi_unpack_general_function( ompi_convertor_t* pConvertor,
return 0;
}
/**
* This function will be used to unpack all datatypes that have the contiguous flag set.
* Several types of datatypes match this criterion, not only the contiguous one, but
* the ones that have gaps in the beginning and/or at the end but where the data to
* be unpacked is contiguous. However, this function only work for homogeneous cases
* and the datatype that are contiguous and where the extent is equal to the size are
* taken in account directly in the ompi_convertor_unpack function (in convertor.c) for
* the homogeneous case.
*/
int32_t
ompi_unpack_homogeneous_contig_function( ompi_convertor_t* pConv,
struct iovec* iov,
@ -184,7 +194,7 @@ ompi_unpack_homogeneous_contig_function( ompi_convertor_t* pConv,
long extent = pData->ub - pData->lb;
uint32_t bConverted, length, remaining, i;
dt_stack_t* stack = &(pConv->pStack[1]);
ddt_endloop_desc_t* _end_loop = &(pConv->use_desc->desc[pConv->use_desc->used].end_loop);
long initial_displ = pConv->use_desc->desc[pConv->use_desc->used].end_loop.first_elem_disp;
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
packed_buffer = (char*)iov[iov_count].iov_base;
@ -192,7 +202,7 @@ ompi_unpack_homogeneous_contig_function( ompi_convertor_t* pConv,
if( remaining > (uint32_t)iov[iov_count].iov_len )
remaining = iov[iov_count].iov_len;
bConverted = remaining; /* how much will get unpacked this time */
user_memory = pConv->pBaseBuf + _end_loop->first_elem_disp;
user_memory = pConv->pBaseBuf + initial_displ;
/*opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %d\n",
user_memory, packed_buffer, remaining );*/
@ -254,6 +264,18 @@ ompi_unpack_homogeneous_contig_function( ompi_convertor_t* pConv,
return 0;
}
/**
* This function handle partial types. Depending on the send operation it might happens
* that we receive only a partial type (always predefined type). In fact the outcome is
* that the unpack has to be done in 2 steps. As there is no way to know if the other
* part of the datatype is already received, we need to use a trick to handle this special
* case. The trick is to fill the missing part with some well known value, unpack the data
* as if it was completely received, and then move into the user memory only the bytes
* that don't match th wekk known value. This approach work as long as there is no need
* for more than structural changes. They will not work for cases where we will have to
* change the content of the data (as in all conversions that require changing the size
* of the exponent or mantissa).
*/
static inline uint32_t
ompi_unpack_partial_datatype( ompi_convertor_t* pConvertor, dt_elem_desc_t* pElem,
char* partial_data,

Просмотреть файл

@ -166,6 +166,7 @@ int32_t ompi_ddt_add( ompi_datatype_t* pdtBase, const ompi_datatype_t* pdtAdd,
*/
pdtBase->lb = lb;
pdtBase->ub = ub;
if( 0 == pdtBase->nbElems ) old_true_ub = disp;
else old_true_ub = pdtBase->true_ub;
pdtBase->true_lb = LMIN( true_lb, pdtBase->true_lb );