Use the macro to compute the number of elements in a segment in both
bcast and reduce and update the default values for the variables as required by the comment in the coll_tuned.h file. This commit was SVN r12546.
Этот коммит содержится в:
родитель
476b922074
Коммит
af68171253
@ -267,7 +267,7 @@ ompi_coll_tuned_bcast_intra_pipeline( void* buffer,
|
|||||||
struct ompi_communicator_t* comm,
|
struct ompi_communicator_t* comm,
|
||||||
uint32_t segsize )
|
uint32_t segsize )
|
||||||
{
|
{
|
||||||
int segcount;
|
int segcount = count;
|
||||||
size_t typelng;
|
size_t typelng;
|
||||||
|
|
||||||
COLL_TUNED_UPDATE_PIPELINE( comm, root );
|
COLL_TUNED_UPDATE_PIPELINE( comm, root );
|
||||||
@ -293,7 +293,7 @@ ompi_coll_tuned_bcast_intra_chain( void* buffer,
|
|||||||
struct ompi_communicator_t* comm,
|
struct ompi_communicator_t* comm,
|
||||||
uint32_t segsize, int32_t chains )
|
uint32_t segsize, int32_t chains )
|
||||||
{
|
{
|
||||||
int segcount;
|
int segcount = count;
|
||||||
size_t typelng;
|
size_t typelng;
|
||||||
|
|
||||||
COLL_TUNED_UPDATE_CHAIN( comm, root, chains );
|
COLL_TUNED_UPDATE_CHAIN( comm, root, chains );
|
||||||
@ -319,7 +319,7 @@ ompi_coll_tuned_bcast_intra_binomial( void* buffer,
|
|||||||
struct ompi_communicator_t* comm,
|
struct ompi_communicator_t* comm,
|
||||||
uint32_t segsize )
|
uint32_t segsize )
|
||||||
{
|
{
|
||||||
int segcount;
|
int segcount = count;
|
||||||
size_t typelng;
|
size_t typelng;
|
||||||
|
|
||||||
COLL_TUNED_UPDATE_BMTREE( comm, root );
|
COLL_TUNED_UPDATE_BMTREE( comm, root );
|
||||||
|
@ -213,7 +213,7 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
|
|||||||
int root, ompi_communicator_t* comm, uint32_t segsize,
|
int root, ompi_communicator_t* comm, uint32_t segsize,
|
||||||
int fanout)
|
int fanout)
|
||||||
{
|
{
|
||||||
int segcount;
|
int segcount = count;
|
||||||
size_t typelng;
|
size_t typelng;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));
|
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_chain rank %d fo %d ss %5d", ompi_comm_rank(comm), fanout, segsize));
|
||||||
@ -224,11 +224,7 @@ int ompi_coll_tuned_reduce_intra_chain( void *sendbuf, void *recvbuf, int count,
|
|||||||
* sent per operation
|
* sent per operation
|
||||||
*/
|
*/
|
||||||
ompi_ddt_type_size( datatype, &typelng );
|
ompi_ddt_type_size( datatype, &typelng );
|
||||||
if( segsize > typelng ) {
|
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
|
||||||
segcount = (int)(segsize / typelng);
|
|
||||||
} else {
|
|
||||||
segcount = count;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
||||||
comm->c_coll_selected_data->cached_chain, segcount );
|
comm->c_coll_selected_data->cached_chain, segcount );
|
||||||
@ -240,7 +236,7 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
|
|||||||
ompi_op_t* op, int root,
|
ompi_op_t* op, int root,
|
||||||
ompi_communicator_t* comm, uint32_t segsize )
|
ompi_communicator_t* comm, uint32_t segsize )
|
||||||
{
|
{
|
||||||
int segcount;
|
int segcount = count;
|
||||||
size_t typelng;
|
size_t typelng;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d",
|
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_pipeline rank %d ss %5d",
|
||||||
@ -253,11 +249,7 @@ int ompi_coll_tuned_reduce_intra_pipeline( void *sendbuf, void *recvbuf,
|
|||||||
* sent per operation
|
* sent per operation
|
||||||
*/
|
*/
|
||||||
ompi_ddt_type_size( datatype, &typelng );
|
ompi_ddt_type_size( datatype, &typelng );
|
||||||
if( segsize > typelng ) {
|
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
|
||||||
segcount = (int)(segsize / typelng);
|
|
||||||
} else {
|
|
||||||
segcount = count;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
||||||
comm->c_coll_selected_data->cached_pipeline, segcount );
|
comm->c_coll_selected_data->cached_pipeline, segcount );
|
||||||
@ -268,7 +260,7 @@ int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf,
|
|||||||
ompi_op_t* op, int root,
|
ompi_op_t* op, int root,
|
||||||
ompi_communicator_t* comm, uint32_t segsize )
|
ompi_communicator_t* comm, uint32_t segsize )
|
||||||
{
|
{
|
||||||
int segcount;
|
int segcount = count;
|
||||||
size_t typelng;
|
size_t typelng;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binary rank %d ss %5d",
|
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binary rank %d ss %5d",
|
||||||
@ -281,11 +273,7 @@ int ompi_coll_tuned_reduce_intra_binary( void *sendbuf, void *recvbuf,
|
|||||||
* sent per operation
|
* sent per operation
|
||||||
*/
|
*/
|
||||||
ompi_ddt_type_size( datatype, &typelng );
|
ompi_ddt_type_size( datatype, &typelng );
|
||||||
if( segsize > typelng ) {
|
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
|
||||||
segcount = (int)(segsize / typelng);
|
|
||||||
} else {
|
|
||||||
segcount = count;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
||||||
comm->c_coll_selected_data->cached_bintree, segcount );
|
comm->c_coll_selected_data->cached_bintree, segcount );
|
||||||
@ -296,7 +284,7 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
|
|||||||
ompi_op_t* op, int root,
|
ompi_op_t* op, int root,
|
||||||
ompi_communicator_t* comm, uint32_t segsize )
|
ompi_communicator_t* comm, uint32_t segsize )
|
||||||
{
|
{
|
||||||
int segcount;
|
int segcount = count;
|
||||||
size_t typelng;
|
size_t typelng;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binomial rank %d ss %5d",
|
OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:reduce_intra_binomial rank %d ss %5d",
|
||||||
@ -309,11 +297,7 @@ int ompi_coll_tuned_reduce_intra_binomial( void *sendbuf, void *recvbuf,
|
|||||||
* sent per operation
|
* sent per operation
|
||||||
*/
|
*/
|
||||||
ompi_ddt_type_size( datatype, &typelng );
|
ompi_ddt_type_size( datatype, &typelng );
|
||||||
if( segsize > typelng ) {
|
COLL_TUNED_COMPUTED_SEGCOUNT( segsize, typelng, segcount );
|
||||||
segcount = (int)(segsize / typelng);
|
|
||||||
} else {
|
|
||||||
segcount = count;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
return ompi_coll_tuned_reduce_generic( sendbuf, recvbuf, count, datatype, op, root, comm,
|
||||||
comm->c_coll_selected_data->cached_bmtree, segcount );
|
comm->c_coll_selected_data->cached_bmtree, segcount );
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user