Smallest overhead for contiguous predefined datatypes. There is now a special path
in the convertor code for predefined contiguous types. This commit was SVN r9821.
Этот коммит содержится в:
родитель
71d328c086
Коммит
c53914ce31
@ -188,6 +188,45 @@ int32_t ompi_convertor_pack( ompi_convertor_t* pConv,
|
|||||||
{
|
{
|
||||||
OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );
|
OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );
|
||||||
|
|
||||||
|
if( (pConv->flags & (DT_FLAG_PREDEFINED | CONVERTOR_HOMOGENEOUS)) ==
|
||||||
|
(DT_FLAG_PREDEFINED | CONVERTOR_HOMOGENEOUS) ) {
|
||||||
|
/* We are doing conversion on a predefined datatype. The convertor contain
|
||||||
|
* minimal informations, we only use the bConverted to manage the conversion.
|
||||||
|
*/
|
||||||
|
uint32_t i;
|
||||||
|
char* base_pointer;
|
||||||
|
|
||||||
|
*max_data = pConv->bConverted;
|
||||||
|
for( i = 0; i < *out_size; i++ ) {
|
||||||
|
base_pointer = pConv->pBaseBuf + pConv->bConverted;
|
||||||
|
if( NULL == iov[i].iov_base ) {
|
||||||
|
iov[i].iov_base = base_pointer;
|
||||||
|
pConv->bConverted += iov[i].iov_len;
|
||||||
|
if( pConv->bConverted > pConv->local_size ) {
|
||||||
|
iov[i].iov_len -= (pConv->bConverted - pConv->local_size);
|
||||||
|
goto predefined_data_pack;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pConv->bConverted += iov[i].iov_len;
|
||||||
|
if( pConv->bConverted > pConv->local_size ) {
|
||||||
|
iov[i].iov_len -= (pConv->bConverted - pConv->local_size);
|
||||||
|
MEMCPY( iov[i].iov_base, base_pointer, iov[i].iov_len );
|
||||||
|
goto predefined_data_pack;
|
||||||
|
}
|
||||||
|
MEMCPY( iov[i].iov_base, base_pointer,
|
||||||
|
iov[i].iov_len );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*max_data = pConv->bConverted - (*max_data);
|
||||||
|
return 0;
|
||||||
|
predefined_data_pack:
|
||||||
|
*out_size = i;
|
||||||
|
*max_data = pConv->bConverted - (*max_data);
|
||||||
|
pConv->bConverted = pConv->local_size;
|
||||||
|
pConv->flags |= CONVERTOR_COMPLETED;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* There is no specific memory allocation. If the convertor notice that some memory
|
/* There is no specific memory allocation. If the convertor notice that some memory
|
||||||
* is required in order to perform the operation (depend on the way the convertor
|
* is required in order to perform the operation (depend on the way the convertor
|
||||||
* was configured) it will call the attached function to get some memory. Any failure
|
* was configured) it will call the attached function to get some memory. Any failure
|
||||||
@ -202,6 +241,36 @@ inline int32_t ompi_convertor_unpack( ompi_convertor_t* pConv,
|
|||||||
{
|
{
|
||||||
OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );
|
OMPI_CONVERTOR_SET_STATUS_BEFORE_PACK_UNPACK( pConv, iov, out_size, max_data );
|
||||||
|
|
||||||
|
if( (pConv->flags & (DT_FLAG_PREDEFINED | CONVERTOR_HOMOGENEOUS)) ==
|
||||||
|
(DT_FLAG_PREDEFINED | CONVERTOR_HOMOGENEOUS) ) {
|
||||||
|
/* We are doing conversion on a predefined datatype. The convertor contain
|
||||||
|
* minimal informations, we only use the bConverted to manage the conversion.
|
||||||
|
*/
|
||||||
|
uint32_t i;
|
||||||
|
char* base_pointer;
|
||||||
|
|
||||||
|
*max_data = pConv->bConverted;
|
||||||
|
for( i = 0; i < *out_size; i++ ) {
|
||||||
|
base_pointer = pConv->pBaseBuf + pConv->bConverted;
|
||||||
|
pConv->bConverted += iov[i].iov_len;
|
||||||
|
if( pConv->bConverted > pConv->local_size ) {
|
||||||
|
pConv->bConverted = pConv->local_size;
|
||||||
|
iov[i].iov_len -= (pConv->bConverted - pConv->local_size);
|
||||||
|
MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );
|
||||||
|
goto predefined_data_unpack;
|
||||||
|
}
|
||||||
|
MEMCPY( base_pointer, iov[i].iov_base, iov[i].iov_len );
|
||||||
|
}
|
||||||
|
*max_data = pConv->bConverted - (*max_data);
|
||||||
|
return 0;
|
||||||
|
predefined_data_unpack:
|
||||||
|
*out_size = i;
|
||||||
|
*max_data = pConv->bConverted - (*max_data);
|
||||||
|
pConv->bConverted = pConv->local_size;
|
||||||
|
pConv->flags |= CONVERTOR_COMPLETED;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
|
return pConv->fAdvance( pConv, iov, out_size, max_data, freeAfter );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -291,20 +360,6 @@ int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,
|
|||||||
{
|
{
|
||||||
int32_t rc;
|
int32_t rc;
|
||||||
|
|
||||||
/*
|
|
||||||
* Do not allow the convertor to go outside the data boundaries. This test include
|
|
||||||
* the check for datatype with size zero as well as for convertors with a count of zero.
|
|
||||||
*/
|
|
||||||
if( convertor->local_size <= *position) {
|
|
||||||
convertor->flags |= CONVERTOR_COMPLETED;
|
|
||||||
convertor->bConverted = convertor->local_size;
|
|
||||||
*position = convertor->bConverted;
|
|
||||||
return OMPI_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Remove the completed flag if it's already set */
|
|
||||||
convertor->flags &= ~CONVERTOR_COMPLETED;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we plan to rollback the convertor then first we have to set it
|
* If we plan to rollback the convertor then first we have to set it
|
||||||
* at the beginning.
|
* at the beginning.
|
||||||
@ -323,17 +378,6 @@ int32_t ompi_convertor_set_position_nocheck( ompi_convertor_t* convertor,
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t
|
|
||||||
ompi_convertor_personalize( ompi_convertor_t* convertor, uint32_t flags,
|
|
||||||
size_t* position, memalloc_fct_t allocfn, void* userdata )
|
|
||||||
{
|
|
||||||
convertor->flags |= flags;
|
|
||||||
convertor->memAlloc_fn = allocfn;
|
|
||||||
convertor->memAlloc_userdata = userdata;
|
|
||||||
|
|
||||||
return ompi_convertor_set_position( convertor, position );
|
|
||||||
}
|
|
||||||
|
|
||||||
/* This function will initialize a convertor based on a previously created convertor. The idea
|
/* This function will initialize a convertor based on a previously created convertor. The idea
|
||||||
* is the move outside these function the heavy selection of architecture features for the convertors.
|
* is the move outside these function the heavy selection of architecture features for the convertors.
|
||||||
*
|
*
|
||||||
@ -365,16 +409,22 @@ int ompi_convertor_prepare( ompi_convertor_t* convertor,
|
|||||||
convertor->local_size = convertor->count * datatype->size;
|
convertor->local_size = convertor->count * datatype->size;
|
||||||
/* If the data is empty we don't have to anything except mark the convertor as
|
/* If the data is empty we don't have to anything except mark the convertor as
|
||||||
* completed. With this flag set the pack and unpack functions will not do
|
* completed. With this flag set the pack and unpack functions will not do
|
||||||
* anything.
|
* anything. In order to decrease the data dependencies (and to speed-up this code)
|
||||||
|
* we will not test the convertor->local_size but we can test the 2 components.
|
||||||
*/
|
*/
|
||||||
if( 0 == convertor->local_size ) {
|
if( (0 == convertor->count) || (0 == datatype->size) ) {
|
||||||
convertor->flags |= CONVERTOR_COMPLETED;
|
convertor->flags |= CONVERTOR_COMPLETED;
|
||||||
convertor->remote_size = 0;
|
convertor->local_size = convertor->remote_size = 0;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( convertor->remoteArch == ompi_mpi_local_arch ) {
|
if( convertor->remoteArch == ompi_mpi_local_arch ) {
|
||||||
convertor->remote_size = convertor->local_size;
|
convertor->remote_size = convertor->local_size;
|
||||||
|
/* For predefined datatypes (contiguous) do nothing more */
|
||||||
|
if( (convertor->flags & DT_FLAG_PREDEFINED) == DT_FLAG_PREDEFINED ) {
|
||||||
|
convertor->bConverted = 0;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
int i;
|
int i;
|
||||||
uint64_t bdt_mask = datatype->bdt_used >> DT_CHAR;
|
uint64_t bdt_mask = datatype->bdt_used >> DT_CHAR;
|
||||||
|
@ -204,15 +204,6 @@ OMPI_DECLSPEC ompi_convertor_t* ompi_convertor_create( int32_t remote_arch, int3
|
|||||||
*/
|
*/
|
||||||
OMPI_DECLSPEC int ompi_convertor_cleanup( ompi_convertor_t* convertor );
|
OMPI_DECLSPEC int ompi_convertor_cleanup( ompi_convertor_t* convertor );
|
||||||
|
|
||||||
/*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
OMPI_DECLSPEC int32_t
|
|
||||||
ompi_convertor_personalize( ompi_convertor_t* pConv, uint32_t flags,
|
|
||||||
size_t* starting_point,
|
|
||||||
memalloc_fct_t allocfn,
|
|
||||||
void* userdata );
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -312,9 +303,46 @@ ompi_convertor_set_position( ompi_convertor_t* convertor,
|
|||||||
* If the convertor is already at the correct position we are happy.
|
* If the convertor is already at the correct position we are happy.
|
||||||
*/
|
*/
|
||||||
if( (*position) == convertor->bConverted ) return OMPI_SUCCESS;
|
if( (*position) == convertor->bConverted ) return OMPI_SUCCESS;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Do not allow the convertor to go outside the data boundaries. This test include
|
||||||
|
* the check for datatype with size zero as well as for convertors with a count of zero.
|
||||||
|
*/
|
||||||
|
if( convertor->local_size <= *position) {
|
||||||
|
convertor->flags |= CONVERTOR_COMPLETED;
|
||||||
|
convertor->bConverted = convertor->local_size;
|
||||||
|
*position = convertor->bConverted;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remove the completed flag if it's already set */
|
||||||
|
convertor->flags &= ~CONVERTOR_COMPLETED;
|
||||||
|
|
||||||
|
if( (convertor->flags & (DT_FLAG_PREDEFINED | CONVERTOR_HOMOGENEOUS)) ==
|
||||||
|
(DT_FLAG_PREDEFINED | CONVERTOR_HOMOGENEOUS) ) {
|
||||||
|
/* basic predefined datatype (contiguous) */
|
||||||
|
convertor->bConverted = *position;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
return ompi_convertor_set_position_nocheck( convertor, position );
|
return ompi_convertor_set_position_nocheck( convertor, position );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
static inline int32_t
|
||||||
|
ompi_convertor_personalize( ompi_convertor_t* convertor, uint32_t flags,
|
||||||
|
size_t* position,
|
||||||
|
memalloc_fct_t allocfn, void* userdata )
|
||||||
|
{
|
||||||
|
convertor->flags |= flags;
|
||||||
|
convertor->memAlloc_fn = allocfn;
|
||||||
|
convertor->memAlloc_userdata = userdata;
|
||||||
|
|
||||||
|
return ompi_convertor_set_position( convertor, position );
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user