Several minor & major bugfixes:
- reorder the code to accept LB and UB with ZERO count (it's MPI legal) - improuve the LB & UB computation, It's still not completly clear how to handle them in some specific cases. This commit was SVN r3328.
Этот коммит содержится в:
родитель
2390f30522
Коммит
317c894a26
@ -21,188 +21,187 @@
|
||||
int ompi_ddt_add( dt_desc_t* pdtBase, dt_desc_t* pdtAdd,
|
||||
unsigned int count, long disp, long extent )
|
||||
{
|
||||
u_int32_t newLength, place_needed = 0, i;
|
||||
short localFlags = 0; /* no specific options yet */
|
||||
dt_elem_desc_t *pLast, *pLoop = NULL;
|
||||
long lb, ub;
|
||||
u_int32_t newLength, place_needed = 0, i;
|
||||
short localFlags = 0; /* no specific options yet */
|
||||
dt_elem_desc_t *pLast, *pLoop = NULL;
|
||||
long lb, ub;
|
||||
|
||||
if( count == 0 ) {
|
||||
if( pdtBase->desc.used == 0 ) { /* empty datatype */
|
||||
pdtBase->lb = 0;
|
||||
pdtBase->ub = 0;
|
||||
pdtBase->true_lb = 0;
|
||||
pdtBase->true_ub = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
/* the extent should be always be positive. So a negative
|
||||
* value here have a special meaning ie. default extent as
|
||||
* computed by ub - lb
|
||||
*/
|
||||
if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb);
|
||||
/* the extent should be always be positive. So a negative
|
||||
* value here have a special meaning ie. default extent as
|
||||
* computed by ub - lb
|
||||
*/
|
||||
if( extent == -1 ) extent = (pdtAdd->ub - pdtAdd->lb);
|
||||
|
||||
/* first make sure that we have enought place to
|
||||
* put the new element inside */
|
||||
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) {
|
||||
place_needed = 1;
|
||||
/* handle special cases for DT_LB and DT_UB */
|
||||
if( pdtAdd == ompi_ddt_basicDatatypes[DT_LB] ) {
|
||||
pdtBase->bdt_used |= (1<< DT_LB);
|
||||
if( pdtBase->flags & DT_FLAG_USER_LB ) {
|
||||
pdtBase->lb = LMIN( pdtBase->lb, disp );
|
||||
} else {
|
||||
pdtBase->lb = disp;
|
||||
pdtBase->flags |= DT_FLAG_USER_LB;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
} else if( pdtAdd == ompi_ddt_basicDatatypes[DT_UB] ) {
|
||||
pdtBase->bdt_used |= (1<< DT_UB);
|
||||
if( pdtBase->flags & DT_FLAG_USER_UB ) {
|
||||
pdtBase->ub = LMAX( pdtBase->ub, disp );
|
||||
} else {
|
||||
pdtBase->ub = disp;
|
||||
pdtBase->flags |= DT_FLAG_USER_UB;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
place_needed = pdtAdd->desc.used;
|
||||
if( count != 1 ) place_needed += 2;
|
||||
}
|
||||
/* first make sure that we have enought place to
|
||||
* put the new element inside */
|
||||
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) {
|
||||
place_needed = 1;
|
||||
/* handle special cases for DT_LB and DT_UB */
|
||||
if( pdtAdd == ompi_ddt_basicDatatypes[DT_LB] ) {
|
||||
pdtBase->bdt_used |= (1<< DT_LB);
|
||||
if( pdtBase->flags & DT_FLAG_USER_LB ) {
|
||||
pdtBase->lb = LMIN( pdtBase->lb, disp );
|
||||
} else {
|
||||
pdtBase->lb = disp;
|
||||
pdtBase->flags |= DT_FLAG_USER_LB;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
} else if( pdtAdd == ompi_ddt_basicDatatypes[DT_UB] ) {
|
||||
pdtBase->bdt_used |= (1<< DT_UB);
|
||||
if( pdtBase->flags & DT_FLAG_USER_UB ) {
|
||||
pdtBase->ub = LMAX( pdtBase->ub, disp );
|
||||
} else {
|
||||
pdtBase->ub = disp;
|
||||
pdtBase->flags |= DT_FLAG_USER_UB;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
place_needed = pdtAdd->desc.used;
|
||||
if( count != 1 ) place_needed += 2;
|
||||
}
|
||||
|
||||
OBJ_RETAIN( pdtAdd );
|
||||
/* the count == 0 is LEGAL for MPI_UB and MPI_LB */
|
||||
if( count == 0 ) {
|
||||
if( pdtBase->desc.used == 0 ) { /* empty datatype */
|
||||
pdtBase->lb = 0;
|
||||
pdtBase->ub = 0;
|
||||
pdtBase->true_lb = 0;
|
||||
pdtBase->true_ub = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* compute the new memory alignement */
|
||||
pdtBase->align = IMAX( pdtBase->align, pdtAdd->align );
|
||||
OBJ_RETAIN( pdtAdd );
|
||||
|
||||
pdtBase->bdt_used |= pdtAdd->bdt_used;
|
||||
newLength = pdtBase->desc.used + place_needed;
|
||||
if( newLength > pdtBase->desc.length ) {
|
||||
newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK;
|
||||
printf( "increase the size of the data desc array from %d to %d (old ptr = %p ",
|
||||
pdtBase->desc.length, newLength, (void*)pdtBase->desc.desc );
|
||||
pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc,
|
||||
sizeof(dt_elem_desc_t) * newLength );
|
||||
printf( "new ptr = %p\n", (void*)pdtBase->desc.desc );
|
||||
pdtBase->desc.length = newLength;
|
||||
}
|
||||
pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
|
||||
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { /* add a basic datatype */
|
||||
pLast->type = pdtAdd->id;
|
||||
pLast->count = count;
|
||||
pLast->disp = disp;
|
||||
pLast->extent = extent;
|
||||
pdtBase->desc.used++;
|
||||
pdtBase->btypes[pdtAdd->id] += count;
|
||||
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
|
||||
if( extent == (int)pdtAdd->size )
|
||||
pLast->flags |= DT_FLAG_CONTIGUOUS;
|
||||
} else {
|
||||
/* now we add a complex datatype */
|
||||
if( disp != pdtBase->ub ) { /* add the initial gap */
|
||||
if( disp < pdtBase->ub ) pdtBase->flags |= DT_FLAG_OVERLAP;
|
||||
}
|
||||
/* keep trace of the total number of basic datatypes in the datatype definition */
|
||||
pdtBase->btypes[DT_LOOP] |= pdtAdd->btypes[DT_LOOP];
|
||||
pdtBase->btypes[DT_END_LOOP] |= pdtAdd->btypes[DT_END_LOOP];
|
||||
pdtBase->btypes[DT_LB] |= pdtAdd->btypes[DT_LB];
|
||||
pdtBase->btypes[DT_UB] |= pdtAdd->btypes[DT_UB];
|
||||
for( i = 4; i < DT_MAX_PREDEFINED; i++ )
|
||||
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
|
||||
/* compute the new memory alignement */
|
||||
pdtBase->align = IMAX( pdtBase->align, pdtAdd->align );
|
||||
|
||||
/* if the extent of the datatype if the same as the extent of the loop
|
||||
* description of the datatype then we simply have to update the main loop.
|
||||
*/
|
||||
if( count != 1 ) {
|
||||
pLoop = pLast;
|
||||
pLast->type = DT_LOOP;
|
||||
pLast->count = count;
|
||||
pLast->disp = (long)pdtAdd->desc.used + 1;
|
||||
pLast->extent = extent;
|
||||
pLast->flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
|
||||
localFlags = DT_FLAG_IN_LOOP;
|
||||
pdtBase->btypes[DT_LOOP] += 2;
|
||||
pdtBase->desc.used += 2;
|
||||
pLast++;
|
||||
}
|
||||
pdtBase->bdt_used |= pdtAdd->bdt_used;
|
||||
newLength = pdtBase->desc.used + place_needed;
|
||||
if( newLength > pdtBase->desc.length ) {
|
||||
newLength = ((newLength / DT_INCREASE_STACK) + 1 ) * DT_INCREASE_STACK;
|
||||
printf( "increase the size of the data desc array from %d to %d (old ptr = %p ",
|
||||
pdtBase->desc.length, newLength, (void*)pdtBase->desc.desc );
|
||||
pdtBase->desc.desc = (dt_elem_desc_t*)realloc( pdtBase->desc.desc,
|
||||
sizeof(dt_elem_desc_t) * newLength );
|
||||
printf( "new ptr = %p\n", (void*)pdtBase->desc.desc );
|
||||
pdtBase->desc.length = newLength;
|
||||
}
|
||||
pLast = &(pdtBase->desc.desc[pdtBase->desc.used]);
|
||||
if( (pdtAdd->flags & DT_FLAG_BASIC) == DT_FLAG_BASIC ) { /* add a basic datatype */
|
||||
pLast->type = pdtAdd->id;
|
||||
pLast->count = count;
|
||||
pLast->disp = disp;
|
||||
pLast->extent = extent;
|
||||
pdtBase->desc.used++;
|
||||
pdtBase->btypes[pdtAdd->id] += count;
|
||||
pLast->flags = pdtAdd->flags & ~(DT_FLAG_FOREVER | DT_FLAG_COMMITED | DT_FLAG_CONTIGUOUS);
|
||||
if( extent == (int)pdtAdd->size )
|
||||
pLast->flags |= DT_FLAG_CONTIGUOUS;
|
||||
} else {
|
||||
/* now we add a complex datatype */
|
||||
if( disp != pdtBase->true_ub ) { /* add the initial gap */
|
||||
if( disp < pdtBase->true_ub ) pdtBase->flags |= DT_FLAG_OVERLAP;
|
||||
}
|
||||
/* keep trace of the total number of basic datatypes in the datatype definition */
|
||||
pdtBase->btypes[DT_LOOP] |= pdtAdd->btypes[DT_LOOP];
|
||||
pdtBase->btypes[DT_END_LOOP] |= pdtAdd->btypes[DT_END_LOOP];
|
||||
pdtBase->btypes[DT_LB] |= pdtAdd->btypes[DT_LB];
|
||||
pdtBase->btypes[DT_UB] |= pdtAdd->btypes[DT_UB];
|
||||
for( i = 4; i < DT_MAX_PREDEFINED; i++ )
|
||||
if( pdtAdd->btypes[i] != 0 ) pdtBase->btypes[i] += (count * pdtAdd->btypes[i]);
|
||||
|
||||
for( i = 0; i < pdtAdd->desc.used; i++ ) {
|
||||
pLast->type = pdtAdd->desc.desc[i].type;
|
||||
pLast->flags = pdtAdd->desc.desc[i].flags | localFlags;
|
||||
pLast->count = pdtAdd->desc.desc[i].count;
|
||||
pLast->extent = pdtAdd->desc.desc[i].extent;
|
||||
pLast->disp = pdtAdd->desc.desc[i].disp;
|
||||
if( pdtAdd->desc.desc[i].type != DT_LOOP )
|
||||
pLast->disp += disp /* + pdtAdd->lb */;
|
||||
pLast++;
|
||||
}
|
||||
pdtBase->desc.used += pdtAdd->desc.used;
|
||||
if( pLoop != NULL ) {
|
||||
pLast->type = DT_END_LOOP;
|
||||
pLast->count = pdtAdd->desc.used + 1; /* where the loop start */
|
||||
pLast->disp = disp + (count - 1) * extent
|
||||
+ (pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
|
||||
pLast->extent = pdtAdd->size; /* the size of the data inside the loop */
|
||||
pLast->flags = pLoop->flags;
|
||||
}
|
||||
/* should I add some space until the extent of this datatype ? */
|
||||
}
|
||||
/* if the extent of the datatype if the same as the extent of the loop
|
||||
* description of the datatype then we simply have to update the main loop.
|
||||
*/
|
||||
if( count != 1 ) {
|
||||
pLoop = pLast;
|
||||
pLast->type = DT_LOOP;
|
||||
pLast->count = count;
|
||||
pLast->disp = (long)pdtAdd->desc.used + 1;
|
||||
pLast->extent = extent;
|
||||
pLast->flags = (pdtAdd->flags & ~(DT_FLAG_COMMITED | DT_FLAG_FOREVER));
|
||||
localFlags = DT_FLAG_IN_LOOP;
|
||||
pdtBase->btypes[DT_LOOP] += 2;
|
||||
pdtBase->desc.used += 2;
|
||||
pLast++;
|
||||
}
|
||||
|
||||
pdtBase->size += count * pdtAdd->size;
|
||||
pdtBase->true_lb = LMIN( pdtBase->true_lb, pdtAdd->true_lb + disp );
|
||||
pdtBase->true_ub = LMAX( pdtBase->true_ub,
|
||||
disp + pdtAdd->true_lb +
|
||||
(count - 1) * extent + pdtAdd->true_ub );
|
||||
for( i = 0; i < pdtAdd->desc.used; i++ ) {
|
||||
pLast->type = pdtAdd->desc.desc[i].type;
|
||||
pLast->flags = pdtAdd->desc.desc[i].flags | localFlags;
|
||||
pLast->count = pdtAdd->desc.desc[i].count;
|
||||
pLast->extent = pdtAdd->desc.desc[i].extent;
|
||||
pLast->disp = pdtAdd->desc.desc[i].disp;
|
||||
if( pdtAdd->desc.desc[i].type != DT_LOOP )
|
||||
pLast->disp += disp /* + pdtAdd->lb */;
|
||||
pLast++;
|
||||
}
|
||||
pdtBase->desc.used += pdtAdd->desc.used;
|
||||
if( pLoop != NULL ) {
|
||||
pLast->type = DT_END_LOOP;
|
||||
pLast->count = pdtAdd->desc.used + 1; /* where the loop start */
|
||||
pLast->disp = disp + (count - 1) * extent
|
||||
+ (pdtAdd->true_ub - pdtAdd->true_lb) ; /* the final extent for the loop */
|
||||
pLast->extent = pdtAdd->size; /* the size of the data inside the loop */
|
||||
pLast->flags = pLoop->flags;
|
||||
}
|
||||
/* should I add some space until the extent of this datatype ? */
|
||||
}
|
||||
|
||||
/* the lower bound should be inherited from the parents if and only
|
||||
* if the USER has explicitly set it. The result lb is the MIN between
|
||||
* the all lb + disp if and only if all or nobody flags's contain the LB.
|
||||
*/
|
||||
if( (pdtAdd->flags ^ pdtBase->flags) & DT_FLAG_USER_LB ) {
|
||||
pdtBase->flags |= DT_FLAG_USER_LB;
|
||||
if( pdtAdd->flags & DT_FLAG_USER_LB )
|
||||
lb = pdtAdd->lb + disp;
|
||||
else
|
||||
lb = pdtBase->lb;
|
||||
} else {
|
||||
lb = LMIN( pdtBase->lb, pdtAdd->lb + disp );
|
||||
}
|
||||
pdtBase->size += count * pdtAdd->size;
|
||||
pdtBase->true_lb = LMIN( pdtBase->true_lb, pdtAdd->true_lb + disp );
|
||||
pdtBase->true_ub = LMAX( pdtBase->true_ub,
|
||||
disp + pdtAdd->true_lb +
|
||||
(count - 1) * extent + pdtAdd->true_ub );
|
||||
|
||||
/* the same apply for the upper bound except for the case where
|
||||
* either of them has the flag UB, in which case we should
|
||||
* compute the UB including the natural alignement of the data.
|
||||
*/
|
||||
if( (pdtBase->flags ^ pdtAdd->flags) & DT_FLAG_USER_UB ) {
|
||||
if( pdtBase->flags & DT_FLAG_USER_UB )
|
||||
ub = pdtBase->ub;
|
||||
else {
|
||||
pdtBase->flags |= DT_FLAG_USER_UB;
|
||||
ub = disp + pdtAdd->lb + count * extent;
|
||||
}
|
||||
} else {
|
||||
if( pdtBase->flags & DT_FLAG_USER_UB )
|
||||
ub = LMAX( pdtBase->ub, disp + pdtAdd->lb + count * (extent) );
|
||||
else {
|
||||
/* we should compute the extent depending on the alignement */
|
||||
long ubN = (disp + pdtAdd->lb + count * (extent));
|
||||
ub = LMAX( ((pdtBase->ub / pdtBase->align) * pdtBase->align),
|
||||
(((ubN + pdtBase->align - 1)/ pdtBase->align) * pdtBase->align) );
|
||||
}
|
||||
}
|
||||
/* update the extent and size */
|
||||
pdtBase->lb = lb;
|
||||
pdtBase->ub = ub;
|
||||
pdtBase->nbElems += (count * pdtAdd->nbElems);
|
||||
/* the lower bound should be inherited from the parents if and only
|
||||
* if the USER has explicitly set it. The result lb is the MIN between
|
||||
* the all lb + disp if and only if all or nobody flags's contain the LB.
|
||||
*/
|
||||
if( (pdtAdd->flags ^ pdtBase->flags) & DT_FLAG_USER_LB ) {
|
||||
if( pdtAdd->flags & DT_FLAG_USER_LB )
|
||||
lb = pdtAdd->lb + disp;
|
||||
else
|
||||
lb = pdtBase->lb;
|
||||
pdtBase->flags |= DT_FLAG_USER_LB;
|
||||
} else {
|
||||
/* both of them have the LB flag or both of them dont have it */
|
||||
lb = LMIN( pdtBase->lb, pdtAdd->lb + disp );
|
||||
}
|
||||
|
||||
/* Is the data still contiguous ?
|
||||
* The only way for the data to be contiguous is to have the true extent equal to his size.
|
||||
* In other words to avoid having internal gaps between elements.
|
||||
*/
|
||||
if( ((int)pdtBase->size != (pdtBase->true_ub - pdtBase->true_lb)) ||
|
||||
!(pdtBase->flags & DT_FLAG_CONTIGUOUS) || !(pdtAdd->flags & DT_FLAG_CONTIGUOUS) )
|
||||
UNSET_CONTIGUOUS_FLAG(pdtBase->flags);
|
||||
/* the same apply for the upper bound except for the case where
|
||||
* either of them has the flag UB, in which case we should
|
||||
* compute the UB including the natural alignement of the data.
|
||||
*/
|
||||
if( (pdtBase->flags ^ pdtAdd->flags) & DT_FLAG_USER_UB ) {
|
||||
if( pdtAdd->flags & DT_FLAG_USER_UB )
|
||||
ub = disp + pdtAdd->lb + count * extent;
|
||||
else {
|
||||
ub = pdtBase->ub;
|
||||
}
|
||||
pdtBase->flags |= DT_FLAG_USER_UB;
|
||||
} else {
|
||||
/* both of them have the UB flag or both of them dont have it */
|
||||
/* we should compute the extent depending on the alignement */
|
||||
ub = LMAX( pdtBase->ub,
|
||||
(disp + pdtAdd->lb + count * (extent)) );
|
||||
}
|
||||
/* update the extent and size */
|
||||
pdtBase->lb = lb;
|
||||
pdtBase->ub = ub;
|
||||
pdtBase->nbElems += (count * pdtAdd->nbElems);
|
||||
|
||||
OBJ_RELEASE( pdtAdd );
|
||||
/* Is the data still contiguous ?
|
||||
* The only way for the data to be contiguous is to have the true extent equal to his size.
|
||||
* In other words to avoid having internal gaps between elements.
|
||||
*/
|
||||
if( ((int)pdtBase->size != (pdtBase->true_ub - pdtBase->true_lb)) ||
|
||||
!(pdtBase->flags & DT_FLAG_CONTIGUOUS) || !(pdtAdd->flags & DT_FLAG_CONTIGUOUS) )
|
||||
UNSET_CONTIGUOUS_FLAG(pdtBase->flags);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
OBJ_RELEASE( pdtAdd );
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user