2013-03-28 01:09:41 +04:00
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2005-09-13 08:28:18 +04:00
/*
2005-11-05 22:57:48 +03:00
* Copyright ( c ) 2004 - 2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation . All rights reserved .
2012-03-06 02:23:44 +04:00
* Copyright ( c ) 2004 - 2012 The University of Tennessee and The University
2005-11-05 22:57:48 +03:00
* of Tennessee Research Foundation . All rights
* reserved .
2005-09-13 08:28:18 +04:00
* Copyright ( c ) 2004 - 2005 High Performance Computing Center Stuttgart ,
* University of Stuttgart . All rights reserved .
* Copyright ( c ) 2004 - 2005 The Regents of the University of California .
* All rights reserved .
2013-03-28 01:09:41 +04:00
* Copyright ( c ) 2013 Los Alamos National Security , LLC . All Rights
* reserved .
2005-09-13 08:28:18 +04:00
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# include "ompi_config.h"
# include "mpi.h"
2006-02-12 04:33:29 +03:00
# include "ompi/constants.h"
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
# include "ompi/datatype/ompi_datatype.h"
2006-02-12 04:33:29 +03:00
# include "ompi/communicator/communicator.h"
# include "ompi/mca/coll/coll.h"
# include "ompi/mca/coll/base/coll_tags.h"
# include "ompi/mca/pml/pml.h"
2005-09-13 08:28:18 +04:00
# include "coll_tuned.h"
# include "coll_tuned_topo.h"
# include "coll_tuned_util.h"
2013-03-28 01:09:41 +04:00
/* alltoall algorithm variables */
static int coll_tuned_alltoall_algorithm_count = 5 ;
static int coll_tuned_alltoall_forced_algorithm = 0 ;
static int coll_tuned_alltoall_segment_size = 0 ;
static int coll_tuned_alltoall_max_requests ;
static int coll_tuned_alltoall_tree_fanout ;
static int coll_tuned_alltoall_chain_fanout ;
/* valid values for coll_tuned_alltoall_forced_algorithm */
static mca_base_var_enum_value_t alltoall_algorithms [ ] = {
{ 0 , " ignore " } ,
{ 1 , " linear " } ,
{ 2 , " pairwise " } ,
{ 3 , " modified_bruck " } ,
{ 4 , " linear_sync " } ,
{ 5 , " two_proc " } ,
{ 0 , NULL }
} ;
2013-07-25 23:19:41 +04:00
/* MPI_IN_PLACE all to all algorithm. TODO: implement a better one. */
static int
mca_coll_tuned_alltoall_intra_basic_inplace ( void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
struct ompi_communicator_t * comm ,
mca_coll_base_module_t * module )
{
mca_coll_tuned_module_t * tuned_module = ( mca_coll_tuned_module_t * ) module ;
int i , j , size , rank , err ;
MPI_Request * preq ;
char * tmp_buffer ;
size_t max_size ;
ptrdiff_t ext ;
/* Initialize. */
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
/* If only one process, we're done. */
if ( 1 = = size ) {
return MPI_SUCCESS ;
}
/* Find the largest receive amount */
ompi_datatype_type_extent ( rdtype , & ext ) ;
max_size = ext * rcount ;
/* Allocate a temporary buffer */
tmp_buffer = calloc ( max_size , 1 ) ;
if ( NULL = = tmp_buffer ) {
return OMPI_ERR_OUT_OF_RESOURCE ;
}
/* in-place alltoall slow algorithm (but works) */
for ( i = 0 ; i < size ; + + i ) {
for ( j = i + 1 ; j < size ; + + j ) {
/* Initiate all send/recv to/from others. */
preq = tuned_module - > tuned_data - > mcct_reqs ;
if ( i = = rank ) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt ( rdtype , rcount , tmp_buffer ,
( char * ) rbuf + j * max_size ) ;
if ( MPI_SUCCESS ! = err ) { goto error_hndl ; }
/* Exchange data with the peer */
err = MCA_PML_CALL ( irecv ( ( char * ) rbuf + max_size * j , rcount , rdtype ,
j , MCA_COLL_BASE_TAG_ALLTOALL , comm , preq + + ) ) ;
if ( MPI_SUCCESS ! = err ) { goto error_hndl ; }
err = MCA_PML_CALL ( isend ( ( char * ) tmp_buffer , rcount , rdtype ,
j , MCA_COLL_BASE_TAG_ALLTOALL , MCA_PML_BASE_SEND_STANDARD ,
comm , preq + + ) ) ;
if ( MPI_SUCCESS ! = err ) { goto error_hndl ; }
} else if ( j = = rank ) {
/* Copy the data into the temporary buffer */
err = ompi_datatype_copy_content_same_ddt ( rdtype , rcount , tmp_buffer ,
( char * ) rbuf + i * max_size ) ;
if ( MPI_SUCCESS ! = err ) { goto error_hndl ; }
/* Exchange data with the peer */
err = MCA_PML_CALL ( irecv ( ( char * ) rbuf + max_size * i , rcount , rdtype ,
i , MCA_COLL_BASE_TAG_ALLTOALL , comm , preq + + ) ) ;
if ( MPI_SUCCESS ! = err ) { goto error_hndl ; }
err = MCA_PML_CALL ( isend ( ( char * ) tmp_buffer , rcount , rdtype ,
i , MCA_COLL_BASE_TAG_ALLTOALL , MCA_PML_BASE_SEND_STANDARD ,
comm , preq + + ) ) ;
if ( MPI_SUCCESS ! = err ) { goto error_hndl ; }
} else {
continue ;
}
/* Wait for the requests to complete */
err = ompi_request_wait_all ( 2 , tuned_module - > tuned_data - > mcct_reqs , MPI_STATUS_IGNORE ) ;
if ( MPI_SUCCESS ! = err ) { goto error_hndl ; }
/* Free the requests. */
mca_coll_tuned_free_reqs ( tuned_module - > tuned_data - > mcct_reqs , 2 ) ;
}
}
error_hndl :
/* Free the temporary buffer */
free ( tmp_buffer ) ;
/* All done */
return err ;
}
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_alltoall_intra_pairwise ( void * sbuf , int scount ,
2006-10-18 06:00:46 +04:00
struct ompi_datatype_t * sdtype ,
void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
2007-08-19 07:37:49 +04:00
struct ompi_communicator_t * comm ,
2012-04-06 19:48:07 +04:00
mca_coll_base_module_t * module )
2005-09-13 08:28:18 +04:00
{
2012-04-06 19:48:07 +04:00
int line = - 1 , err = 0 , rank , size , step , sendto , recvfrom ;
2005-09-13 08:28:18 +04:00
void * tmpsend , * tmprecv ;
2006-10-18 00:20:58 +04:00
ptrdiff_t lb , sext , rext ;
2005-09-13 08:28:18 +04:00
2013-07-25 23:19:41 +04:00
if ( MPI_IN_PLACE = = sbuf ) {
return mca_coll_tuned_alltoall_intra_basic_inplace ( rbuf , rcount , rdtype ,
comm , module ) ;
}
2005-09-13 08:28:18 +04:00
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" coll:tuned:alltoall_intra_pairwise rank %d " , rank ) ) ;
2005-09-13 08:28:18 +04:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( sdtype , & lb , & sext ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( rdtype , & lb , & rext ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
2007-07-07 05:06:19 +04:00
2006-10-18 06:00:46 +04:00
/* Perform pairwise exchange - starting from 1 so the local copy is last */
2007-07-07 05:06:19 +04:00
for ( step = 1 ; step < size + 1 ; step + + ) {
2005-09-13 08:28:18 +04:00
2007-07-07 05:06:19 +04:00
/* Determine sender and receiver for this step. */
sendto = ( rank + step ) % size ;
recvfrom = ( rank + size - step ) % size ;
2005-09-13 08:28:18 +04:00
2007-07-07 05:06:19 +04:00
/* Determine sending and receiving locations */
2012-03-06 02:23:44 +04:00
tmpsend = ( char * ) sbuf + ( ptrdiff_t ) sendto * sext * ( ptrdiff_t ) scount ;
tmprecv = ( char * ) rbuf + ( ptrdiff_t ) recvfrom * rext * ( ptrdiff_t ) rcount ;
2005-09-13 08:28:18 +04:00
2006-10-18 06:00:46 +04:00
/* send and receive */
2007-07-07 05:06:19 +04:00
err = ompi_coll_tuned_sendrecv ( tmpsend , scount , sdtype , sendto ,
MCA_COLL_BASE_TAG_ALLTOALL ,
tmprecv , rcount , rdtype , recvfrom ,
MCA_COLL_BASE_TAG_ALLTOALL ,
2006-10-18 06:00:46 +04:00
comm , MPI_STATUS_IGNORE , rank ) ;
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
}
2005-09-13 08:28:18 +04:00
2006-10-18 06:00:46 +04:00
return MPI_SUCCESS ;
2005-09-13 08:28:18 +04:00
err_hndl :
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" %s:%4d \t Error occurred %d, rank %2d " , __FILE__ , line ,
err , rank ) ) ;
2006-10-18 06:00:46 +04:00
return err ;
2005-09-13 08:28:18 +04:00
}
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_alltoall_intra_bruck ( void * sbuf , int scount ,
2006-10-18 06:00:46 +04:00
struct ompi_datatype_t * sdtype ,
void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
2007-08-19 07:37:49 +04:00
struct ompi_communicator_t * comm ,
2012-04-06 19:48:07 +04:00
mca_coll_base_module_t * module )
2005-09-13 08:28:18 +04:00
{
2012-04-06 19:48:07 +04:00
int i , k , line = - 1 , rank , size , err = 0 , weallocated = 0 ;
2007-07-07 05:06:19 +04:00
int sendto , recvfrom , distance , * displs = NULL , * blen = NULL ;
char * tmpbuf = NULL , * tmpbuf_free = NULL ;
ptrdiff_t rlb , slb , tlb , sext , rext , tsext ;
struct ompi_datatype_t * new_ddt ;
2007-08-19 07:37:49 +04:00
# ifdef blahblah
mca_coll_tuned_module_t * tuned_module = ( mca_coll_tuned_module_t * ) module ;
mca_coll_tuned_comm_t * data = tuned_module - > tuned_data ;
# endif
2005-09-13 08:28:18 +04:00
2013-07-25 23:19:41 +04:00
if ( MPI_IN_PLACE = = sbuf ) {
return mca_coll_tuned_alltoall_intra_basic_inplace ( rbuf , rcount , rdtype ,
comm , module ) ;
}
2005-09-13 08:28:18 +04:00
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" coll:tuned:alltoall_intra_bruck rank %d " , rank ) ) ;
2005-09-13 08:28:18 +04:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( sdtype , & slb , & sext ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_true_extent ( sdtype , & tlb , & tsext ) ;
2007-07-07 05:06:19 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( rdtype , & rlb , & rext ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
# ifdef blahblah
2007-07-07 05:06:19 +04:00
/* try and SAVE memory by using the data segment hung off
the communicator if possible */
2007-08-19 07:37:49 +04:00
if ( data - > mcct_num_reqs > = size ) {
2005-09-13 08:28:18 +04:00
/* we have enought preallocated for displments and lengths */
2007-08-19 07:37:49 +04:00
displs = ( int * ) data - > mcct_reqs ;
2005-09-13 08:28:18 +04:00
blen = ( int * ) ( displs + size ) ;
weallocated = 0 ;
}
else { /* allocate the buffers ourself */
# endif
2007-07-07 05:06:19 +04:00
displs = ( int * ) malloc ( size * sizeof ( int ) ) ;
2005-09-13 08:28:18 +04:00
if ( displs = = NULL ) { line = __LINE__ ; err = - 1 ; goto err_hndl ; }
2007-07-07 05:06:19 +04:00
blen = ( int * ) malloc ( size * sizeof ( int ) ) ;
2005-09-13 08:28:18 +04:00
if ( blen = = NULL ) { line = __LINE__ ; err = - 1 ; goto err_hndl ; }
weallocated = 1 ;
# ifdef blahblah
}
# endif
/* tmp buffer allocation for message data */
2012-03-06 02:23:44 +04:00
tmpbuf_free = ( char * ) malloc ( tsext + ( ( ptrdiff_t ) scount * ( ptrdiff_t ) size - 1 ) * sext ) ;
2007-07-07 05:06:19 +04:00
if ( tmpbuf_free = = NULL ) { line = __LINE__ ; err = - 1 ; goto err_hndl ; }
tmpbuf = tmpbuf_free - slb ;
2005-09-13 08:28:18 +04:00
/* Step 1 - local rotation - shift up by rank */
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_copy_content_same_ddt ( sdtype ,
2012-04-06 19:48:07 +04:00
( int32_t ) ( ( ptrdiff_t ) ( size - rank ) * ( ptrdiff_t ) scount ) ,
tmpbuf ,
( ( char * ) sbuf ) + ( ptrdiff_t ) rank * ( ptrdiff_t ) scount * sext ) ;
2005-09-13 08:28:18 +04:00
if ( err < 0 ) {
2006-10-18 06:00:46 +04:00
line = __LINE__ ; err = - 1 ; goto err_hndl ;
2005-09-13 08:28:18 +04:00
}
if ( rank ! = 0 ) {
2012-03-06 02:23:44 +04:00
err = ompi_datatype_copy_content_same_ddt ( sdtype , ( ptrdiff_t ) rank * ( ptrdiff_t ) scount ,
2012-04-06 19:48:07 +04:00
tmpbuf + ( ptrdiff_t ) ( size - rank ) * ( ptrdiff_t ) scount * sext ,
( char * ) sbuf ) ;
2005-09-13 08:28:18 +04:00
if ( err < 0 ) {
2006-10-18 06:00:46 +04:00
line = __LINE__ ; err = - 1 ; goto err_hndl ;
2005-09-13 08:28:18 +04:00
}
}
2006-10-18 06:00:46 +04:00
/* perform communication step */
for ( distance = 1 ; distance < size ; distance < < = 1 ) {
2007-07-07 05:06:19 +04:00
sendto = ( rank + distance ) % size ;
recvfrom = ( rank - distance + size ) % size ;
2006-10-18 06:00:46 +04:00
k = 0 ;
/* create indexed datatype */
for ( i = 1 ; i < size ; i + + ) {
2007-07-07 05:06:19 +04:00
if ( ( i & distance ) = = distance ) {
2012-03-06 02:23:44 +04:00
displs [ k ] = ( ptrdiff_t ) i * ( ptrdiff_t ) scount ;
2007-07-07 05:06:19 +04:00
blen [ k ] = scount ;
2006-10-18 06:00:46 +04:00
k + + ;
}
}
/* Set indexes and displacements */
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_create_indexed ( k , blen , displs , sdtype , & new_ddt ) ;
2006-10-18 06:00:46 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
/* Commit the new datatype */
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_commit ( & new_ddt ) ;
2006-10-18 06:00:46 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
/* Sendreceive */
2007-07-07 05:06:19 +04:00
err = ompi_coll_tuned_sendrecv ( tmpbuf , 1 , new_ddt , sendto ,
2006-10-18 06:00:46 +04:00
MCA_COLL_BASE_TAG_ALLTOALL ,
2007-07-07 05:06:19 +04:00
rbuf , 1 , new_ddt , recvfrom ,
2006-10-18 06:00:46 +04:00
MCA_COLL_BASE_TAG_ALLTOALL ,
2007-07-07 05:06:19 +04:00
comm , MPI_STATUS_IGNORE , rank ) ;
2006-10-18 06:00:46 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
2007-07-07 05:06:19 +04:00
/* Copy back new data from recvbuf to tmpbuf */
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_copy_content_same_ddt ( new_ddt , 1 , tmpbuf , ( char * ) rbuf ) ;
2007-07-07 05:06:19 +04:00
if ( err < 0 ) { line = __LINE__ ; err = - 1 ; goto err_hndl ; }
2005-09-13 08:28:18 +04:00
2006-10-18 06:00:46 +04:00
/* free ddt */
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_destroy ( & new_ddt ) ;
2006-10-18 06:00:46 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
} /* end of for (distance = 1... */
2005-09-13 08:28:18 +04:00
2006-10-18 06:00:46 +04:00
/* Step 3 - local rotation - */
for ( i = 0 ; i < size ; i + + ) {
2005-09-13 22:35:45 +04:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_copy_content_same_ddt ( rdtype , ( int32_t ) rcount ,
2012-04-06 19:48:07 +04:00
( ( char * ) rbuf ) + ( ( ptrdiff_t ) ( ( rank - i + size ) % size ) * ( ptrdiff_t ) rcount * rext ) ,
tmpbuf + ( ptrdiff_t ) i * ( ptrdiff_t ) rcount * rext ) ;
2007-07-07 05:06:19 +04:00
if ( err < 0 ) { line = __LINE__ ; err = - 1 ; goto err_hndl ; }
2006-10-18 06:00:46 +04:00
}
2005-09-13 08:28:18 +04:00
2006-10-18 06:00:46 +04:00
/* Step 4 - clean up */
2007-07-07 05:06:19 +04:00
if ( tmpbuf ! = NULL ) free ( tmpbuf_free ) ;
2006-10-18 06:00:46 +04:00
if ( weallocated ) {
if ( displs ! = NULL ) free ( displs ) ;
if ( blen ! = NULL ) free ( blen ) ;
}
return OMPI_SUCCESS ;
2005-09-13 08:28:18 +04:00
err_hndl :
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" %s:%4d \t Error occurred %d, rank %2d " , __FILE__ , line , err ,
rank ) ) ;
if ( tmpbuf ! = NULL ) free ( tmpbuf_free ) ;
2006-10-18 06:00:46 +04:00
if ( weallocated ) {
if ( displs ! = NULL ) free ( displs ) ;
if ( blen ! = NULL ) free ( blen ) ;
}
return err ;
2005-09-13 08:28:18 +04:00
}
2007-02-20 07:25:00 +03:00
/*
* alltoall_intra_linear_sync
*
* Function : Linear implementation of alltoall with limited number
* of outstanding requests .
* Accepts : Same as MPI_Alltoall ( ) , and the maximum number of
* outstanding requests ( actual number is 2 * max , since
* we count receive and send requests separately ) .
* Returns : MPI_SUCCESS or error code
*
* Description : Algorithm is the following :
* 1 ) post K irecvs , K < = N
* 2 ) post K isends , K < = N
* 3 ) while not done
* - wait for any request to complete
* - replace that request by the new one of the same type .
*/
int ompi_coll_tuned_alltoall_intra_linear_sync ( void * sbuf , int scount ,
struct ompi_datatype_t * sdtype ,
void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
struct ompi_communicator_t * comm ,
2012-04-06 19:48:07 +04:00
mca_coll_base_module_t * module ,
2007-02-20 07:25:00 +03:00
int max_outstanding_reqs )
{
2012-04-06 19:48:07 +04:00
int line , error , ri , si , rank , size , nreqs , nrreqs , nsreqs , total_reqs ;
char * psnd , * prcv ;
ptrdiff_t slb , sext , rlb , rext ;
2007-02-20 07:25:00 +03:00
ompi_request_t * * reqs = NULL ;
2013-07-25 23:19:41 +04:00
if ( MPI_IN_PLACE = = sbuf ) {
return mca_coll_tuned_alltoall_intra_basic_inplace ( rbuf , rcount , rdtype ,
comm , module ) ;
}
2007-02-20 07:25:00 +03:00
/* Initialize. */
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" ompi_coll_tuned_alltoall_intra_linear_sync rank %d " , rank ) ) ;
2007-02-20 07:25:00 +03:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
error = ompi_datatype_get_extent ( sdtype , & slb , & sext ) ;
2007-02-20 07:25:00 +03:00
if ( OMPI_SUCCESS ! = error ) {
return error ;
}
sext * = scount ;
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
error = ompi_datatype_get_extent ( rdtype , & rlb , & rext ) ;
2007-02-20 07:25:00 +03:00
if ( OMPI_SUCCESS ! = error ) {
return error ;
}
rext * = rcount ;
/* simple optimization */
2012-03-06 02:23:44 +04:00
psnd = ( ( char * ) sbuf ) + ( ptrdiff_t ) rank * sext ;
prcv = ( ( char * ) rbuf ) + ( ptrdiff_t ) rank * rext ;
2007-02-20 07:25:00 +03:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
error = ompi_datatype_sndrcv ( psnd , scount , sdtype , prcv , rcount , rdtype ) ;
2007-02-20 07:25:00 +03:00
if ( MPI_SUCCESS ! = error ) {
return error ;
}
/* If only one process, we're done. */
if ( 1 = = size ) {
return MPI_SUCCESS ;
}
/* Initiate send/recv to/from others. */
total_reqs = ( ( ( max_outstanding_reqs > ( size - 1 ) ) | |
( max_outstanding_reqs < = 0 ) ) ?
( size - 1 ) : ( max_outstanding_reqs ) ) ;
reqs = ( ompi_request_t * * ) malloc ( 2 * total_reqs *
sizeof ( ompi_request_t * ) ) ;
2010-05-20 01:19:45 +04:00
if ( NULL = = reqs ) { error = - 1 ; line = __LINE__ ; goto error_hndl ; }
2007-02-20 07:25:00 +03:00
prcv = ( char * ) rbuf ;
psnd = ( char * ) sbuf ;
/* Post first batch or ireceive and isend requests */
for ( nreqs = 0 , nrreqs = 0 , ri = ( rank + 1 ) % size ; nreqs < total_reqs ;
ri = ( ri + 1 ) % size , + + nreqs , + + nrreqs ) {
2012-04-06 19:48:07 +04:00
error =
MCA_PML_CALL ( irecv
( prcv + ( ptrdiff_t ) ri * rext , rcount , rdtype , ri ,
MCA_COLL_BASE_TAG_ALLTOALL , comm , & reqs [ nreqs ] ) ) ;
if ( MPI_SUCCESS ! = error ) { line = __LINE__ ; goto error_hndl ; }
2007-02-20 07:25:00 +03:00
}
for ( nsreqs = 0 , si = ( rank + size - 1 ) % size ; nreqs < 2 * total_reqs ;
si = ( si + size - 1 ) % size , + + nreqs , + + nsreqs ) {
2012-04-06 19:48:07 +04:00
error =
MCA_PML_CALL ( isend
( psnd + ( ptrdiff_t ) si * sext , scount , sdtype , si ,
MCA_COLL_BASE_TAG_ALLTOALL ,
MCA_PML_BASE_SEND_STANDARD , comm , & reqs [ nreqs ] ) ) ;
if ( MPI_SUCCESS ! = error ) { line = __LINE__ ; goto error_hndl ; }
2007-02-20 07:25:00 +03:00
}
/* Wait for requests to complete */
if ( nreqs = = 2 * ( size - 1 ) ) {
2012-04-06 19:48:07 +04:00
/* Optimization for the case when all requests have been posted */
error = ompi_request_wait_all ( nreqs , reqs , MPI_STATUSES_IGNORE ) ;
if ( MPI_SUCCESS ! = error ) { line = __LINE__ ; goto error_hndl ; }
2007-02-20 07:25:00 +03:00
} else {
2012-04-06 19:48:07 +04:00
/* As requests complete, replace them with corresponding requests:
- wait for any request to complete , mark the request as
MPI_REQUEST_NULL
- If it was a receive request , replace it with new irecv request
( if any )
- if it was a send request , replace it with new isend request ( if any )
*/
int ncreqs = 0 ;
while ( ncreqs < 2 * ( size - 1 ) ) {
int completed ;
error = ompi_request_wait_any ( 2 * total_reqs , reqs , & completed ,
MPI_STATUS_IGNORE ) ;
if ( MPI_SUCCESS ! = error ) { line = __LINE__ ; goto error_hndl ; }
reqs [ completed ] = MPI_REQUEST_NULL ;
ncreqs + + ;
if ( completed < total_reqs ) {
if ( nrreqs < ( size - 1 ) ) {
error =
MCA_PML_CALL ( irecv
( prcv + ( ptrdiff_t ) ri * rext , rcount , rdtype , ri ,
MCA_COLL_BASE_TAG_ALLTOALL , comm ,
2007-02-20 07:25:00 +03:00
& reqs [ completed ] ) ) ;
2012-04-06 19:48:07 +04:00
if ( MPI_SUCCESS ! = error ) { line = __LINE__ ; goto error_hndl ; }
+ + nrreqs ;
ri = ( ri + 1 ) % size ;
}
} else {
if ( nsreqs < ( size - 1 ) ) {
error = MCA_PML_CALL ( isend
( psnd + ( ptrdiff_t ) si * sext , scount , sdtype , si ,
MCA_COLL_BASE_TAG_ALLTOALL ,
MCA_PML_BASE_SEND_STANDARD , comm ,
& reqs [ completed ] ) ) ;
+ + nsreqs ;
si = ( si + size - 1 ) % size ;
}
}
}
2007-02-20 07:25:00 +03:00
}
/* Free the reqs */
free ( reqs ) ;
/* All done */
return MPI_SUCCESS ;
error_hndl :
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" %s:%4d \t Error occurred %d, rank %2d " , __FILE__ , line , error ,
rank ) ) ;
2007-02-20 07:25:00 +03:00
if ( NULL ! = reqs ) free ( reqs ) ;
return error ;
}
2005-09-13 08:28:18 +04:00
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_alltoall_intra_two_procs ( void * sbuf , int scount ,
2006-10-18 06:00:46 +04:00
struct ompi_datatype_t * sdtype ,
void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
2007-08-19 07:37:49 +04:00
struct ompi_communicator_t * comm ,
2012-04-06 19:48:07 +04:00
mca_coll_base_module_t * module )
2005-09-13 08:28:18 +04:00
{
2012-04-06 19:48:07 +04:00
int line = - 1 , err = 0 , rank , remote ;
2005-09-13 08:28:18 +04:00
void * tmpsend , * tmprecv ;
2006-10-18 00:20:58 +04:00
ptrdiff_t sext , rext , lb ;
2005-09-13 08:28:18 +04:00
2013-07-25 23:19:41 +04:00
if ( MPI_IN_PLACE = = sbuf ) {
return mca_coll_tuned_alltoall_intra_basic_inplace ( rbuf , rcount , rdtype ,
comm , module ) ;
}
2005-09-13 08:28:18 +04:00
rank = ompi_comm_rank ( comm ) ;
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" ompi_coll_tuned_alltoall_intra_two_procs rank %d " , rank ) ) ;
2005-09-13 08:28:18 +04:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( sdtype , & lb , & sext ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( rdtype , & lb , & rext ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
/* exchange data */
2007-07-07 05:06:19 +04:00
remote = rank ^ 1 ;
2005-09-13 08:28:18 +04:00
2012-03-06 02:23:44 +04:00
tmpsend = ( char * ) sbuf + ( ptrdiff_t ) remote * sext * ( ptrdiff_t ) scount ;
tmprecv = ( char * ) rbuf + ( ptrdiff_t ) remote * rext * ( ptrdiff_t ) rcount ;
2005-09-13 08:28:18 +04:00
/* send and receive */
2007-07-07 05:06:19 +04:00
err = ompi_coll_tuned_sendrecv ( tmpsend , scount , sdtype , remote ,
MCA_COLL_BASE_TAG_ALLTOALL ,
tmprecv , rcount , rdtype , remote ,
MCA_COLL_BASE_TAG_ALLTOALL ,
2006-10-18 06:00:46 +04:00
comm , MPI_STATUS_IGNORE , rank ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
/* ddt sendrecv your own data */
2012-03-06 02:23:44 +04:00
err = ompi_datatype_sndrcv ( ( char * ) sbuf + ( ptrdiff_t ) rank * sext * ( ptrdiff_t ) scount ,
2012-04-06 19:48:07 +04:00
( int32_t ) scount , sdtype ,
( char * ) rbuf + ( ptrdiff_t ) rank * rext * ( ptrdiff_t ) rcount ,
( int32_t ) rcount , rdtype ) ;
2005-09-13 08:28:18 +04:00
if ( err ! = MPI_SUCCESS ) { line = __LINE__ ; goto err_hndl ; }
/* done */
return MPI_SUCCESS ;
err_hndl :
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" %s:%4d \t Error occurred %d, rank %2d " , __FILE__ , line , err ,
rank ) ) ;
2006-10-18 06:00:46 +04:00
return err ;
2005-09-13 08:28:18 +04:00
}
2005-10-27 03:51:56 +04:00
/*
* Linear functions are copied from the BASIC coll module
* they do not segment the message and are simple implementations
* but for some small number of nodes and / or small data sizes they
* are just as fast as tuned / tree based segmenting operations
* and as such may be selected by the decision functions
* These are copied into this module due to the way we select modules
* in V1 . i . e . in V2 we will handle this differently and so will not
* have to duplicate code .
* GEF Oct05 after asking Jeff .
*/
/* copied function (with appropriate renaming) starts here */
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_alltoall_intra_basic_linear ( void * sbuf , int scount ,
2006-10-18 06:00:46 +04:00
struct ompi_datatype_t * sdtype ,
void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
2007-08-19 07:37:49 +04:00
struct ompi_communicator_t * comm ,
2013-07-25 23:19:41 +04:00
mca_coll_base_module_t * module )
2005-09-13 08:28:18 +04:00
{
2012-04-06 19:48:07 +04:00
int i , rank , size , err , nreqs ;
char * psnd , * prcv ;
MPI_Aint lb , sndinc , rcvinc ;
ompi_request_t * * req , * * sreq , * * rreq ;
2007-08-19 07:37:49 +04:00
mca_coll_tuned_module_t * tuned_module = ( mca_coll_tuned_module_t * ) module ;
mca_coll_tuned_comm_t * data = tuned_module - > tuned_data ;
2013-07-25 23:19:41 +04:00
if ( MPI_IN_PLACE = = sbuf ) {
return mca_coll_tuned_alltoall_intra_basic_inplace ( rbuf , rcount , rdtype ,
comm , module ) ;
}
2005-10-27 03:51:56 +04:00
/* Initialize. */
2005-09-13 08:28:18 +04:00
size = ompi_comm_size ( comm ) ;
rank = ompi_comm_rank ( comm ) ;
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream ,
2007-07-07 05:06:19 +04:00
" ompi_coll_tuned_alltoall_intra_basic_linear rank %d " , rank ) ) ;
2005-09-13 08:28:18 +04:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( sdtype , & lb , & sndinc ) ;
2005-10-27 03:51:56 +04:00
if ( OMPI_SUCCESS ! = err ) {
return err ;
}
sndinc * = scount ;
2005-09-13 08:28:18 +04:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_get_extent ( rdtype , & lb , & rcvinc ) ;
2005-10-27 03:51:56 +04:00
if ( OMPI_SUCCESS ! = err ) {
return err ;
}
rcvinc * = rcount ;
2005-09-13 08:28:18 +04:00
2005-10-27 03:51:56 +04:00
/* simple optimization */
2012-03-06 02:23:44 +04:00
psnd = ( ( char * ) sbuf ) + ( ptrdiff_t ) rank * sndinc ;
prcv = ( ( char * ) rbuf ) + ( ptrdiff_t ) rank * rcvinc ;
2005-10-27 03:51:56 +04:00
- Split the datatype engine into two parts: an MPI specific part in
OMPI
and a language agnostic part in OPAL. The convertor is completely
moved into OPAL. This offers several benefits as described in RFC
http://www.open-mpi.org/community/lists/devel/2009/07/6387.php
namely:
- Fewer basic types (int* and float* types, boolean and wchar
- Fixing naming scheme to ompi-nomenclature.
- Usability outside of the ompi-layer.
- Due to the fixed nature of simple opal types, their information is
completely
known at compile time and therefore constified
- With fewer datatypes (22), the actual sizes of bit-field types may be
reduced
from 64 to 32 bits, allowing reorganizing the opal_datatype
structure, eliminating holes and keeping data required in convertor
(upon send/recv) in one cacheline...
This has implications to the convertor-datastructure and other parts
of the code.
- Several performance tests have been run, the netpipe latency does not
change with
this patch on Linux/x86-64 on the smoky cluster.
- Extensive tests have been done to verify correctness (no new
regressions) using:
1. mpi_test_suite on linux/x86-64 using clean ompi-trunk and
ompi-ddt:
a. running both trunk and ompi-ddt resulted in no differences
(except for MPI_SHORT_INT and MPI_TYPE_MIX_LB_UB do now run
correctly).
b. with --enable-memchecker and running under valgrind (one buglet
when run with static found in test-suite, commited)
2. ibm testsuite on linux/x86-64 using clean ompi-trunk and ompi-ddt:
all passed (except for the dynamic/ tests failed!! as trunk/MTT)
3. compilation and usage of HDF5 tests on Jaguar using PGI and
PathScale compilers.
4. compilation and usage on Scicortex.
- Please note, that for the heterogeneous case, (-m32 compiled
binaries/ompi), neither
ompi-trunk, nor ompi-ddt branch would successfully launch.
This commit was SVN r21641.
2009-07-13 08:56:31 +04:00
err = ompi_datatype_sndrcv ( psnd , scount , sdtype , prcv , rcount , rdtype ) ;
2005-10-27 03:51:56 +04:00
if ( MPI_SUCCESS ! = err ) {
return err ;
}
/* If only one process, we're done. */
if ( 1 = = size ) {
return MPI_SUCCESS ;
}
/* Initiate all send/recv to/from others. */
2007-08-19 07:37:49 +04:00
req = rreq = data - > mcct_reqs ;
2005-10-27 03:51:56 +04:00
sreq = rreq + size - 1 ;
prcv = ( char * ) rbuf ;
psnd = ( char * ) sbuf ;
/* Post all receives first -- a simple optimization */
2007-01-27 00:59:31 +03:00
for ( nreqs = 0 , i = ( rank + 1 ) % size ; i ! = rank ;
i = ( i + 1 ) % size , + + rreq , + + nreqs ) {
2005-10-27 03:51:56 +04:00
err =
MCA_PML_CALL ( irecv_init
2012-03-06 02:23:44 +04:00
( prcv + ( ptrdiff_t ) i * rcvinc , rcount , rdtype , i ,
2005-10-27 03:51:56 +04:00
MCA_COLL_BASE_TAG_ALLTOALL , comm , rreq ) ) ;
if ( MPI_SUCCESS ! = err ) {
2005-12-22 16:49:33 +03:00
ompi_coll_tuned_free_reqs ( req , rreq - req ) ;
2005-10-27 03:51:56 +04:00
return err ;
}
}
2007-01-27 00:59:31 +03:00
/* Now post all sends in reverse order
- We would like to minimize the search time through message queue
when messages actually arrive in the order in which they were posted .
*/
for ( nreqs = 0 , i = ( rank + size - 1 ) % size ; i ! = rank ;
i = ( i + size - 1 ) % size , + + sreq , + + nreqs ) {
2005-10-27 03:51:56 +04:00
err =
MCA_PML_CALL ( isend_init
2012-03-06 02:23:44 +04:00
( psnd + ( ptrdiff_t ) i * sndinc , scount , sdtype , i ,
2005-10-27 03:51:56 +04:00
MCA_COLL_BASE_TAG_ALLTOALL ,
MCA_PML_BASE_SEND_STANDARD , comm , sreq ) ) ;
if ( MPI_SUCCESS ! = err ) {
2005-12-22 16:49:33 +03:00
ompi_coll_tuned_free_reqs ( req , sreq - req ) ;
2005-10-27 03:51:56 +04:00
return err ;
}
}
2006-11-08 19:54:03 +03:00
nreqs = ( size - 1 ) * 2 ;
2005-10-27 03:51:56 +04:00
/* Start your engines. This will never return an error. */
MCA_PML_CALL ( start ( nreqs , req ) ) ;
/* Wait for them all. If there's an error, note that we don't
* care what the error was - - just that there * was * an error . The
* PML will finish all requests , even if one or more of them fail .
* i . e . , by the end of this call , all the requests are free - able .
* So free them anyway - - even if there was an error , and return
* the error after we free everything . */
err = ompi_request_wait_all ( nreqs , req , MPI_STATUSES_IGNORE ) ;
/* Free the reqs */
2005-12-22 16:49:33 +03:00
ompi_coll_tuned_free_reqs ( req , nreqs ) ;
2005-10-27 03:51:56 +04:00
/* All done */
return err ;
2005-09-13 08:28:18 +04:00
}
2005-10-27 03:51:56 +04:00
/* copied function (with appropriate renaming) ends here */
2005-10-25 07:55:58 +04:00
/* The following are used by dynamic and forced rules */
2005-09-13 08:28:18 +04:00
2005-10-25 07:55:58 +04:00
/* publish details of each algorithm and if its forced/fixed/locked in */
/* as you add methods/algorithms you must update this and the query/map routines */
2005-09-13 08:28:18 +04:00
2006-04-20 03:42:06 +04:00
/* this routine is called by the component only */
/* this makes sure that the mca parameters are set to their initial values and perms */
/* module does not call this they call the forced_getvalues routine instead */
int ompi_coll_tuned_alltoall_intra_check_forced_init ( coll_tuned_force_algorithm_mca_param_indices_t * mca_param_indices )
2005-10-25 07:55:58 +04:00
{
2013-03-28 01:09:41 +04:00
mca_base_var_enum_t * new_enum ;
ompi_coll_tuned_forced_max_algorithms [ ALLTOALL ] = coll_tuned_alltoall_algorithm_count ;
( void ) mca_base_component_var_register ( & mca_coll_tuned_component . super . collm_version ,
" alltoall_algorithm_count " ,
" Number of alltoall algorithms available " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
MCA_BASE_VAR_FLAG_DEFAULT_ONLY ,
OPAL_INFO_LVL_5 ,
MCA_BASE_VAR_SCOPE_CONSTANT ,
& coll_tuned_alltoall_algorithm_count ) ;
/* MPI_T: This variable should eventually be bound to a communicator */
coll_tuned_alltoall_forced_algorithm = 0 ;
( void ) mca_base_var_enum_create ( " coll_tuned_alltoall_algorithms " , alltoall_algorithms , & new_enum ) ;
mca_param_indices - > algorithm_param_index =
mca_base_component_var_register ( & mca_coll_tuned_component . super . collm_version ,
" alltoall_algorithm " ,
" Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: linear with sync, 5:two proc only. " ,
MCA_BASE_VAR_TYPE_INT , new_enum , 0 , 0 ,
OPAL_INFO_LVL_5 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& coll_tuned_alltoall_forced_algorithm ) ;
OBJ_RELEASE ( new_enum ) ;
2009-01-03 18:56:25 +03:00
if ( mca_param_indices - > algorithm_param_index < 0 ) {
return mca_param_indices - > algorithm_param_index ;
}
2013-03-28 01:09:41 +04:00
coll_tuned_alltoall_segment_size = 0 ;
mca_param_indices - > segsize_param_index =
mca_base_component_var_register ( & mca_coll_tuned_component . super . collm_version ,
" alltoall_algorithm_segmentsize " ,
" Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_5 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& coll_tuned_alltoall_segment_size ) ;
coll_tuned_alltoall_tree_fanout = ompi_coll_tuned_init_tree_fanout ; /* get system wide default */
mca_param_indices - > tree_fanout_param_index =
mca_base_component_var_register ( & mca_coll_tuned_component . super . collm_version ,
" alltoall_algorithm_tree_fanout " ,
" Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_5 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& coll_tuned_alltoall_tree_fanout ) ;
coll_tuned_alltoall_chain_fanout = ompi_coll_tuned_init_chain_fanout ; /* get system wide default */
mca_param_indices - > chain_fanout_param_index =
mca_base_component_var_register ( & mca_coll_tuned_component . super . collm_version ,
" alltoall_algorithm_chain_fanout " ,
" Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_5 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& coll_tuned_alltoall_chain_fanout ) ;
coll_tuned_alltoall_max_requests = 0 ; /* no limit for alltoall by default */
mca_param_indices - > max_requests_param_index =
mca_base_component_var_register ( & mca_coll_tuned_component . super . collm_version ,
" alltoall_algorithm_max_requests " ,
" Maximum number of outstanding send or recv requests. Only has meaning for synchronized algorithms. " ,
MCA_BASE_VAR_TYPE_INT , NULL , 0 , 0 ,
OPAL_INFO_LVL_5 ,
MCA_BASE_VAR_SCOPE_READONLY ,
& coll_tuned_alltoall_max_requests ) ;
2009-01-03 18:56:25 +03:00
if ( mca_param_indices - > max_requests_param_index < 0 ) {
2013-03-28 01:09:41 +04:00
return mca_param_indices - > max_requests_param_index ;
2009-01-03 18:56:25 +03:00
}
2013-03-28 01:09:41 +04:00
if ( coll_tuned_alltoall_max_requests < 0 ) {
2007-02-20 07:25:00 +03:00
if ( 0 = = ompi_comm_rank ( MPI_COMM_WORLD ) ) {
2008-06-09 18:53:58 +04:00
opal_output ( 0 , " Maximum outstanding requests must be positive number greater than 1. Switching to system level default %d \n " ,
2007-02-20 07:25:00 +03:00
ompi_coll_tuned_init_max_requests ) ;
}
2013-03-28 01:09:41 +04:00
coll_tuned_alltoall_max_requests = 0 ;
2007-02-20 07:25:00 +03:00
}
2006-10-18 06:00:46 +04:00
return ( MPI_SUCCESS ) ;
2005-10-25 07:55:58 +04:00
}
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_alltoall_intra_do_forced ( void * sbuf , int scount ,
2006-10-18 06:00:46 +04:00
struct ompi_datatype_t * sdtype ,
void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
2007-08-19 07:37:49 +04:00
struct ompi_communicator_t * comm ,
2012-04-06 19:48:07 +04:00
mca_coll_base_module_t * module )
2005-10-25 07:55:58 +04:00
{
2007-08-19 07:37:49 +04:00
mca_coll_tuned_module_t * tuned_module = ( mca_coll_tuned_module_t * ) module ;
mca_coll_tuned_comm_t * data = tuned_module - > tuned_data ;
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:alltoall_intra_do_forced selected algorithm %d " ,
2007-08-19 07:37:49 +04:00
data - > user_forced [ ALLTOALL ] . algorithm ) ) ;
switch ( data - > user_forced [ ALLTOALL ] . algorithm ) {
case ( 0 ) : return ompi_coll_tuned_alltoall_intra_dec_fixed ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 1 ) : return ompi_coll_tuned_alltoall_intra_basic_linear ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 2 ) : return ompi_coll_tuned_alltoall_intra_pairwise ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 3 ) : return ompi_coll_tuned_alltoall_intra_bruck ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 4 ) : return ompi_coll_tuned_alltoall_intra_linear_sync ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module , data - > user_forced [ ALLTOALL ] . max_requests ) ;
case ( 5 ) : return ompi_coll_tuned_alltoall_intra_two_procs ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
2005-10-25 07:55:58 +04:00
default :
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:alltoall_intra_do_forced attempt to select algorithm %d when only 0-%d is valid? " ,
2007-08-19 07:37:49 +04:00
data - > user_forced [ ALLTOALL ] . algorithm , ompi_coll_tuned_forced_max_algorithms [ ALLTOALL ] ) ) ;
2005-10-25 07:55:58 +04:00
return ( MPI_ERR_ARG ) ;
} /* switch */
}
2005-09-13 08:28:18 +04:00
2005-11-11 07:49:29 +03:00
2005-12-22 16:49:33 +03:00
int ompi_coll_tuned_alltoall_intra_do_this ( void * sbuf , int scount ,
2006-10-18 06:00:46 +04:00
struct ompi_datatype_t * sdtype ,
void * rbuf , int rcount ,
struct ompi_datatype_t * rdtype ,
struct ompi_communicator_t * comm ,
2012-04-06 19:48:07 +04:00
mca_coll_base_module_t * module ,
2007-02-20 07:25:00 +03:00
int algorithm , int faninout , int segsize ,
int max_requests )
2005-11-11 07:49:29 +03:00
{
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:alltoall_intra_do_this selected algorithm %d topo faninout %d segsize %d " ,
2006-10-18 06:00:46 +04:00
algorithm , faninout , segsize ) ) ;
2005-11-11 07:49:29 +03:00
2006-10-18 06:00:46 +04:00
switch ( algorithm ) {
2007-08-19 07:37:49 +04:00
case ( 0 ) : return ompi_coll_tuned_alltoall_intra_dec_fixed ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 1 ) : return ompi_coll_tuned_alltoall_intra_basic_linear ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 2 ) : return ompi_coll_tuned_alltoall_intra_pairwise ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 3 ) : return ompi_coll_tuned_alltoall_intra_bruck ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
case ( 4 ) : return ompi_coll_tuned_alltoall_intra_linear_sync ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module , max_requests ) ;
case ( 5 ) : return ompi_coll_tuned_alltoall_intra_two_procs ( sbuf , scount , sdtype , rbuf , rcount , rdtype , comm , module ) ;
2005-11-11 07:49:29 +03:00
default :
2008-06-09 18:53:58 +04:00
OPAL_OUTPUT ( ( ompi_coll_tuned_stream , " coll:tuned:alltoall_intra_do_this attempt to select algorithm %d when only 0-%d is valid? " ,
2006-10-18 06:00:46 +04:00
algorithm , ompi_coll_tuned_forced_max_algorithms [ ALLTOALL ] ) ) ;
2005-11-11 07:49:29 +03:00
return ( MPI_ERR_ARG ) ;
} /* switch */
}