2011-03-02 09:14:44 +00:00
/*
* Copyright ( c ) 2004 - 2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation . All rights reserved .
* Copyright ( c ) 2004 - 2005 The University of Tennessee and The University
* of Tennessee Research Foundation . All rights
* reserved .
* Copyright ( c ) 2004 - 2005 High Performance Computing Center Stuttgart ,
* University of Stuttgart . All rights reserved .
* Copyright ( c ) 2004 - 2005 The Regents of the University of California .
* All rights reserved .
* Copyright ( c ) 2006 - 2008 Cisco Systems , Inc . All rights reserved .
* Copyright ( c ) 2006 - 2009 Mellanox Technologies . All rights reserved .
* Copyright ( c ) 2006 - 2007 Los Alamos National Security , LLC . All rights
* reserved .
* Copyright ( c ) 2006 - 2007 Voltaire All rights reserved .
* Copyright ( c ) 2009 - 2010 Oracle and / or its affiliates . All rights reserved .
* $ COPYRIGHT $
*
* Additional copyrights may follow
*
* $ HEADER $
*/
# include "ompi_config.h"
# include <string.h>
- Check, whether the compiler supports __builtin_clz (count leading
zeroes);
if so, use it for bit-operations like opal_cube_dim and opal_hibit.
Implement two versions of power-of-two.
In case of opal_next_poweroftwo, this reduces the average execution
time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
measured rdtsc, with loop over 2^27 values).
Numbers for other functions are similar (but of course heavily depend
on the usage, e.g. opal_hibit() with a start of 4 does not save
much). The bsr instruction on AMD Opteron is also not as fast.
- Replace various places where the next power-of-two is computed.
Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.
This commit was SVN r25270.
2011-10-11 22:49:01 +00:00
# include "opal/util/bit_ops.h"
2011-03-02 09:14:44 +00:00
# include "opal/mca/installdirs/installdirs.h"
# include "opal/util/output.h"
# include "opal/mca/base/mca_base_param.h"
2012-06-27 01:28:28 +00:00
# include "orte/util/show_help.h"
2011-03-02 09:14:44 +00:00
# include "btl_wv.h"
# include "btl_wv_mca.h"
# include "btl_wv_ini.h"
# include "connect/base.h"
/*
* Local flags
*/
enum {
REGINT_NEG_ONE_OK = 0x01 ,
REGINT_GE_ZERO = 0x02 ,
REGINT_GE_ONE = 0x04 ,
REGINT_NONZERO = 0x08 ,
REGINT_MAX = 0x88
} ;
enum {
REGSTR_EMPTY_OK = 0x01 ,
REGSTR_MAX = 0x88
} ;
/*
* utility routine for string parameter registration
*/
static int reg_string ( const char * param_name ,
const char * deprecated_param_name ,
const char * param_desc ,
const char * default_value , char * * out_value ,
int flags )
{
int index ;
char * value ;
index = mca_base_param_reg_string ( & mca_btl_wv_component . super . btl_version ,
param_name , param_desc , false , false ,
default_value , & value ) ;
if ( NULL ! = deprecated_param_name ) {
mca_base_param_reg_syn ( index ,
& mca_btl_wv_component . super . btl_version ,
deprecated_param_name , true ) ;
}
mca_base_param_lookup_string ( index , & value ) ;
if ( 0 ! = ( flags & REGSTR_EMPTY_OK ) & & 0 = = strlen ( value ) ) {
opal_output ( 0 , " Bad parameter value for parameter \" %s \" " ,
param_name ) ;
return OMPI_ERR_BAD_PARAM ;
}
* out_value = value ;
return OMPI_SUCCESS ;
}
/*
* utility routine for integer parameter registration
*/
static int reg_int ( const char * param_name ,
const char * deprecated_param_name ,
const char * param_desc ,
int default_value , int * out_value , int flags )
{
int index , value ;
index = mca_base_param_reg_int ( & mca_btl_wv_component . super . btl_version ,
param_name , param_desc , false , false ,
default_value , NULL ) ;
if ( NULL ! = deprecated_param_name ) {
mca_base_param_reg_syn ( index ,
& mca_btl_wv_component . super . btl_version ,
deprecated_param_name , true ) ;
}
mca_base_param_lookup_int ( index , & value ) ;
if ( 0 ! = ( flags & REGINT_NEG_ONE_OK ) & & - 1 = = value ) {
* out_value = value ;
return OMPI_SUCCESS ;
}
if ( ( 0 ! = ( flags & REGINT_GE_ZERO ) & & value < 0 ) | |
( 0 ! = ( flags & REGINT_GE_ONE ) & & value < 1 ) | |
( 0 ! = ( flags & REGINT_NONZERO ) & & 0 = = value ) ) {
opal_output ( 0 , " Bad parameter value for parameter \" %s \" " ,
param_name ) ;
return OMPI_ERR_BAD_PARAM ;
}
* out_value = value ;
return OMPI_SUCCESS ;
}
/*
* Register and check all MCA parameters
*/
int btl_wv_register_mca_params ( void )
{
char default_qps [ 100 ] ;
uint32_t mid_qp_size ;
int i ;
char * msg , * str , * pkey ;
int ival , ival2 , ret , tmp ;
ret = OMPI_SUCCESS ;
# define CHECK(expr) do {\
tmp = ( expr ) ; \
if ( OMPI_SUCCESS ! = tmp ) ret = tmp ; \
} while ( 0 )
/* register wv component parameters */
CHECK ( reg_int ( " verbose " , NULL ,
" Output some verbose wv BTL information "
" (0 = no output, nonzero = output) " , 0 , & ival , 0 ) ) ;
mca_btl_wv_component . verbose = ( 0 ! = ival ) ;
CHECK ( reg_int ( " warn_no_device_params_found " ,
" warn_no_hca_params_found " ,
" Warn when no device-specific parameters are found in the INI file specified by the btl_wv_device_param_files MCA parameter "
" (0 = do not warn; any other value = warn) " ,
1 , & ival , 0 ) ) ;
mca_btl_wv_component . warn_no_device_params_found = ( 0 ! = ival ) ;
CHECK ( reg_int ( " warn_default_gid_prefix " , NULL ,
" Warn when there is more than one active ports and at least one of them connected to the network with only default GID prefix configured "
" (0 = do not warn; any other value = warn) " ,
1 , & ival , 0 ) ) ;
mca_btl_wv_component . warn_default_gid_prefix = ( 0 ! = ival ) ;
CHECK ( reg_int ( " warn_nonexistent_if " , NULL ,
" Warn if non-existent devices and/or ports are specified in the btl_wv_if_[in|ex]clude MCA parameters "
" (0 = do not warn; any other value = warn) " ,
1 , & ival , 0 ) ) ;
mca_btl_wv_component . warn_nonexistent_if = ( 0 ! = ival ) ;
ival2 = 0 ;
CHECK ( reg_int ( " want_fork_support " , NULL ,
" Whether fork support is desired or not "
" (negative = try to enable fork support, but continue even if it is not available, 0 = do not enable fork support, positive = try to enable fork support and fail if it is not available) " ,
ival2 , & ival , 0 ) ) ;
if ( 0 ! = ival ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " ,
2011-03-02 09:14:44 +00:00
" ib_fork requested but not supported " , true ,
2012-06-27 01:28:28 +00:00
orte_process_info . nodename ) ;
2011-03-02 09:14:44 +00:00
return OMPI_ERROR ;
}
asprintf ( & str , " %s/mca-btl-wv-device-params.ini " ,
opal_install_dirs . pkgdatadir ) ;
if ( NULL = = str ) {
return OMPI_ERR_OUT_OF_RESOURCE ;
}
CHECK ( reg_string ( " device_param_files " , " hca_param_files " ,
" Colon-delimited list of INI-style files that contain device vendor/part-specific parameters (use semicolon for Windows) " ,
str , & mca_btl_wv_component . device_params_file_names ,
0 ) ) ;
free ( str ) ;
CHECK ( reg_string ( " device_type " , NULL ,
" Specify to only use IB or iWARP network adapters "
" (infiniband = only use InfiniBand HCAs; iwarp = only use iWARP NICs; all = use any available adapters) " ,
" all " , & str , 0 ) ) ;
if ( 0 = = strcasecmp ( str , " ib " ) | |
0 = = strcasecmp ( str , " infiniband " ) ) {
mca_btl_wv_component . device_type = BTL_WV_DT_IB ;
} else if ( 0 = = strcasecmp ( str , " iw " ) | |
0 = = strcasecmp ( str , " iwarp " ) ) {
mca_btl_wv_component . device_type = BTL_WV_DT_IWARP ;
} else if ( 0 = = strcasecmp ( str , " all " ) ) {
mca_btl_wv_component . device_type = BTL_WV_DT_ALL ;
} else {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " ,
2011-03-02 09:14:44 +00:00
" ib_fork requested but not supported " , true ,
2012-06-27 01:28:28 +00:00
orte_process_info . nodename ) ;
2011-03-02 09:14:44 +00:00
return OMPI_ERROR ;
}
free ( str ) ;
CHECK ( reg_int ( " max_btls " , NULL ,
" Maximum number of device ports to use "
" (-1 = use all available, otherwise must be >= 1) " ,
- 1 , & mca_btl_wv_component . ib_max_btls ,
REGINT_NEG_ONE_OK | REGINT_GE_ONE ) ) ;
CHECK ( reg_int ( " free_list_num " , NULL ,
" Initial size of free lists "
" (must be >= 1) " ,
8 , & mca_btl_wv_component . ib_free_list_num ,
REGINT_GE_ONE ) ) ;
CHECK ( reg_int ( " free_list_max " , NULL ,
" Maximum size of free lists "
" (-1 = infinite, otherwise must be >= 0) " ,
- 1 , & mca_btl_wv_component . ib_free_list_max ,
REGINT_NEG_ONE_OK | REGINT_GE_ONE ) ) ;
CHECK ( reg_int ( " free_list_inc " , NULL ,
" Increment size of free lists "
" (must be >= 1) " ,
32 , & mca_btl_wv_component . ib_free_list_inc ,
REGINT_GE_ONE ) ) ;
CHECK ( reg_string ( " mpool " , NULL ,
" Name of the memory pool to be used (it is unlikely that you will ever want to change this) " ,
2012-06-26 15:55:23 +00:00
" grdma " , & mca_btl_wv_component . ib_mpool_name ,
2011-03-02 09:14:44 +00:00
0 ) ) ;
CHECK ( reg_int ( " reg_mru_len " , NULL ,
" Length of the registration cache most recently used list "
" (must be >= 1) " ,
16 , ( int * ) & mca_btl_wv_component . reg_mru_len ,
REGINT_GE_ONE ) ) ;
CHECK ( reg_int ( " cq_size " , " ib_cq_size " ,
" Size of the OpenFabrics completion "
" queue (will automatically be set to a minimum of "
" (2 * number_of_peers * btl_wv_rd_num)) " ,
1000 , & ival , REGINT_GE_ONE ) ) ;
mca_btl_wv_component . ib_cq_size [ BTL_WV_LP_CQ ] =
mca_btl_wv_component . ib_cq_size [ BTL_WV_HP_CQ ] = ( uint32_t ) ival ;
CHECK ( reg_int ( " max_inline_data " , " ib_max_inline_data " ,
" Maximum size of inline data segment "
" (-1 = run-time probe to discover max value, otherwise must be >= 0). "
" If not explicitly set, use max_inline_data from "
" the INI file containing device-specific parameters " ,
- 1 , & ival , REGINT_NEG_ONE_OK | REGINT_GE_ZERO ) ) ;
mca_btl_wv_component . ib_max_inline_data = ( int32_t ) ival ;
CHECK ( reg_string ( " pkey " , " ib_pkey_val " ,
" OpenFabrics partition key (pkey) value. "
" Unsigned integer decimal or hex values are allowed (e.g., \" 3 \" or \" 0x3f \" ) and will be masked against the maximum allowable IB partition key value (0x7fff) " ,
" 0 " , & pkey , 0 ) ) ;
mca_btl_wv_component . ib_pkey_val =
ompi_btl_wv_ini_intify ( pkey ) & MCA_BTL_IB_PKEY_MASK ;
free ( pkey ) ;
CHECK ( reg_int ( " psn " , " ib_psn " ,
" OpenFabrics packet sequence starting number "
" (must be >= 0) " ,
0 , & ival , REGINT_GE_ZERO ) ) ;
mca_btl_wv_component . ib_psn = ( uint32_t ) ival ;
CHECK ( reg_int ( " ib_qp_ous_rd_atom " , NULL ,
" InfiniBand outstanding atomic reads "
" (must be >= 0) " ,
4 , & ival , REGINT_GE_ZERO ) ) ;
mca_btl_wv_component . ib_qp_ous_rd_atom = ( uint32_t ) ival ;
asprintf ( & msg , " OpenFabrics MTU, in bytes (if not specified in INI files). Valid values are: %d=256 bytes, %d=512 bytes, %d=1024 bytes, %d=2048 bytes, %d=4096 bytes " ,
WV_MTU_256 ,
WV_MTU_512 ,
WV_MTU_1024 ,
WV_MTU_2048 ,
WV_MTU_4096 ) ;
if ( NULL = = msg ) {
/* Don't try to recover from this */
return OMPI_ERR_OUT_OF_RESOURCE ;
}
CHECK ( reg_int ( " mtu " , " ib_mtu " , msg , WV_MTU_1024 , & ival , 0 ) ) ;
free ( msg ) ;
if ( ival < WV_MTU_1024 | | ival > WV_MTU_4096 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " invalid value for btl_wv_ib_mtu " ,
" btl_wv_ib_mtu reset to 1024 " ) ;
mca_btl_wv_component . ib_mtu = WV_MTU_1024 ;
} else {
mca_btl_wv_component . ib_mtu = ( uint32_t ) ival ;
}
CHECK ( reg_int ( " ib_min_rnr_timer " , NULL , " InfiniBand minimum "
" \" receiver not ready \" timer, in seconds "
" (must be >= 0 and <= 31) " ,
25 , & ival , 0 ) ) ;
if ( ival > 31 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_min_rnr_timer > 31 " ,
" btl_wv_ib_min_rnr_timer reset to 31 " ) ;
ival = 31 ;
} else if ( ival < 0 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_min_rnr_timer < 0 " ,
" btl_wv_ib_min_rnr_timer reset to 0 " ) ;
ival = 0 ;
}
mca_btl_wv_component . ib_min_rnr_timer = ( uint32_t ) ival ;
CHECK ( reg_int ( " ib_timeout " , NULL ,
" InfiniBand transmit timeout, plugged into formula: 4.096 microseconds * (2^btl_wv_ib_timeout) "
" (must be >= 0 and <= 31) " ,
20 , & ival , 0 ) ) ;
if ( ival > 31 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_timeout > 31 " ,
" btl_wv_ib_timeout reset to 31 " ) ;
ival = 31 ;
} else if ( ival < 0 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_timeout < 0 " ,
" btl_wv_ib_timeout reset to 0 " ) ;
ival = 0 ;
}
mca_btl_wv_component . ib_timeout = ( uint32_t ) ival ;
CHECK ( reg_int ( " ib_retry_count " , NULL ,
" InfiniBand transmit retry count "
" (must be >= 0 and <= 7) " ,
7 , & ival , 0 ) ) ;
if ( ival > 7 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_retry_count > 7 " ,
" btl_wv_ib_retry_count reset to 7 " ) ;
ival = 7 ;
} else if ( ival < 0 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_retry_count < 0 " ,
" btl_wv_ib_retry_count reset to 0 " ) ;
ival = 0 ;
}
mca_btl_wv_component . ib_retry_count = ( uint32_t ) ival ;
CHECK ( reg_int ( " ib_rnr_retry " , NULL ,
" InfiniBand \" receiver not ready \" "
" retry count; applies *only* to SRQ/XRC queues. PP queues "
" use RNR retry values of 0 because Open MPI performs "
" software flow control to guarantee that RNRs never occur "
" (must be >= 0 and <= 7; 7 = \" infinite \" ) " ,
7 , & ival , 0 ) ) ;
if ( ival > 7 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_rnr_retry > 7 " ,
" btl_wv_ib_rnr_retry reset to 7 " ) ;
ival = 7 ;
} else if ( ival < 0 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_rnr_retry < 0 " ,
" btl_wv_ib_rnr_retry reset to 0 " ) ;
ival = 0 ;
}
mca_btl_wv_component . ib_rnr_retry = ( uint32_t ) ival ;
CHECK ( reg_int ( " ib_service_level " , NULL , " InfiniBand service level "
" (must be >= 0 and <= 15) " ,
0 , & ival , 0 ) ) ;
if ( ival > 15 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_service_level > 15 " ,
" btl_wv_ib_service_level reset to 15 " ) ;
ival = 15 ;
} else if ( ival < 0 ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " invalid mca param value " ,
2011-03-02 09:14:44 +00:00
true , " btl_wv_ib_service_level < 0 " ,
" btl_wv_ib_service_level reset to 0 " ) ;
ival = 0 ;
}
mca_btl_wv_component . ib_service_level = ( uint32_t ) ival ;
CHECK ( reg_int ( " ib_path_rec_service_level " , NULL , " Enable getting InfiniBand service level from PathRecord "
" (must be >= 0, 0 = disabled, positive = try to get the service level from PathRecord) " ,
0 , & ival , REGINT_GE_ZERO ) ) ;
mca_btl_wv_component . ib_path_rec_service_level = ( uint32_t ) ival ;
CHECK ( reg_int ( " use_eager_rdma " , NULL , " Use RDMA for eager messages "
" (-1 = use device default, 0 = do not use eager RDMA, "
" 1 = use eager RDMA) " ,
- 1 , & ival , 0 ) ) ;
mca_btl_wv_component . use_eager_rdma = ( int32_t ) ival ;
CHECK ( reg_int ( " eager_rdma_threshold " , NULL ,
" Use RDMA for short messages after this number of "
" messages are received from a given peer "
" (must be >= 1) " ,
16 , & ival , REGINT_GE_ONE ) ) ;
mca_btl_wv_component . eager_rdma_threshold = ( int32_t ) ival ;
CHECK ( reg_int ( " max_eager_rdma " , NULL , " Maximum number of peers allowed to use "
" RDMA for short messages (RDMA is used for all long "
" messages, except if explicitly disabled, such as "
" with the \" dr \" pml) "
" (must be >= 0) " ,
16 , & ival , REGINT_GE_ZERO ) ) ;
mca_btl_wv_component . max_eager_rdma = ( int32_t ) ival ;
CHECK ( reg_int ( " eager_rdma_num " , NULL , " Number of RDMA buffers to allocate "
" for small messages "
" (must be >= 1) " ,
16 , & ival , REGINT_GE_ONE ) ) ;
mca_btl_wv_component . eager_rdma_num = ( int32_t ) ( ival + 1 ) ;
CHECK ( reg_int ( " btls_per_lid " , NULL , " Number of BTLs to create for each "
" InfiniBand LID "
" (must be >= 1) " ,
1 , & ival , REGINT_GE_ONE ) ) ;
mca_btl_wv_component . btls_per_lid = ( uint32_t ) ival ;
CHECK ( reg_int ( " max_lmc " , NULL , " Maximum number of LIDs to use for each device port "
" (must be >= 0, where 0 = use all available) " ,
0 , & ival , REGINT_GE_ZERO ) ) ;
mca_btl_wv_component . max_lmc = ( uint32_t ) ival ;
mca_btl_wv_component . enable_srq_resize = 0 ;
CHECK ( reg_int ( " buffer_alignment " , NULL ,
" Preferred communication buffer alignment, in bytes "
" (must be > 0 and power of two) " ,
64 , & ival , REGINT_GE_ZERO ) ) ;
if ( ival < = 1 | | ( ival & ( ival - 1 ) ) ) {
2012-06-27 01:28:28 +00:00
orte_show_help ( " help-mpi-btl-wv.txt " , " wrong buffer alignment " ,
true , ival , orte_process_info . nodename , 64 ) ;
2011-03-02 09:14:44 +00:00
mca_btl_wv_component . buffer_alignment = 64 ;
} else {
mca_btl_wv_component . buffer_alignment = ( uint32_t ) ival ;
}
CHECK ( reg_int ( " use_message_coalescing " , NULL ,
" If nonzero, use message coalescing " , 1 , & ival , 0 ) ) ;
mca_btl_wv_component . use_message_coalescing = ( 0 ! = ival ) ;
CHECK ( reg_int ( " cq_poll_ratio " , NULL ,
" How often to poll high priority CQ versus low priority CQ " ,
100 , & ival , REGINT_GE_ONE ) ) ;
mca_btl_wv_component . cq_poll_ratio = ( uint32_t ) ival ;
CHECK ( reg_int ( " eager_rdma_poll_ratio " , NULL ,
" How often to poll eager RDMA channel versus CQ " ,
100 , & ival , REGINT_GE_ONE ) ) ;
mca_btl_wv_component . eager_rdma_poll_ratio = ( uint32_t ) ival ;
CHECK ( reg_int ( " hp_cq_poll_per_progress " , NULL ,
" Max number of completion events to process for each call "
" of BTL progress engine " ,
10 , & ival , REGINT_GE_ONE ) ) ;
mca_btl_wv_component . cq_poll_progress = ( uint32_t ) ival ;
mca_btl_wv_module . super . btl_exclusivity = MCA_BTL_EXCLUSIVITY_DEFAULT ;
mca_btl_wv_module . super . btl_eager_limit = 12 * 1024 ;
mca_btl_wv_module . super . btl_rndv_eager_limit = 12 * 1024 ;
mca_btl_wv_module . super . btl_max_send_size = 64 * 1024 ;
mca_btl_wv_module . super . btl_rdma_pipeline_send_length = 1024 * 1024 ;
mca_btl_wv_module . super . btl_rdma_pipeline_frag_size = 1024 * 1024 ;
mca_btl_wv_module . super . btl_min_rdma_pipeline_size = 256 * 1024 ;
mca_btl_wv_module . super . btl_flags = MCA_BTL_FLAGS_PUT |
MCA_BTL_FLAGS_NEED_ACK | MCA_BTL_FLAGS_NEED_CSUM | MCA_BTL_FLAGS_HETEROGENEOUS_RDMA ;
# if BTL_WV_FAILOVER_ENABLED
mca_btl_wv_module . super . btl_flags | = MCA_BTL_FLAGS_FAILOVER_SUPPORT ;
# endif
mca_btl_wv_module . super . btl_bandwidth = 800 ;
mca_btl_wv_module . super . btl_latency = 10 ;
CHECK ( mca_btl_base_param_register (
& mca_btl_wv_component . super . btl_version ,
& mca_btl_wv_module . super ) ) ;
/* setup all the qp stuff */
/* round mid_qp_size to smallest power of two */
- Check, whether the compiler supports __builtin_clz (count leading
zeroes);
if so, use it for bit-operations like opal_cube_dim and opal_hibit.
Implement two versions of power-of-two.
In case of opal_next_poweroftwo, this reduces the average execution
time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
measured rdtsc, with loop over 2^27 values).
Numbers for other functions are similar (but of course heavily depend
on the usage, e.g. opal_hibit() with a start of 4 does not save
much). The bsr instruction on AMD Opteron is also not as fast.
- Replace various places where the next power-of-two is computed.
Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.
This commit was SVN r25270.
2011-10-11 22:49:01 +00:00
mid_qp_size = opal_next_poweroftwo ( mca_btl_wv_module . super . btl_eager_limit / 4 ) > > 1 ;
2011-03-02 09:14:44 +00:00
- Check, whether the compiler supports __builtin_clz (count leading
zeroes);
if so, use it for bit-operations like opal_cube_dim and opal_hibit.
Implement two versions of power-of-two.
In case of opal_next_poweroftwo, this reduces the average execution
time from 83 cycles to 4 cycles (Intel Nehalem, icc, -O2, inlining,
measured rdtsc, with loop over 2^27 values).
Numbers for other functions are similar (but of course heavily depend
on the usage, e.g. opal_hibit() with a start of 4 does not save
much). The bsr instruction on AMD Opteron is also not as fast.
- Replace various places where the next power-of-two is computed.
Tested on Intel Nehalem Cluster with openib, compilers GNU-4.6.1 and
Intel-12.0.4 using mpi_testsuite -t "Collective" with 128 processes.
This commit was SVN r25270.
2011-10-11 22:49:01 +00:00
/* mid_qp_size = MAX (mid_qp_size, 1024); ?! */
2011-03-02 09:14:44 +00:00
if ( mid_qp_size < = 128 ) {
mid_qp_size = 1024 ;
}
snprintf ( default_qps , 100 ,
" P,128,256,192,128:S,%u,256,128,32:S,%u,256,128,32:S,%u,256,128,32 " ,
mid_qp_size ,
( uint32_t ) mca_btl_wv_module . super . btl_eager_limit ,
( uint32_t ) mca_btl_wv_module . super . btl_max_send_size ) ;
mca_btl_wv_component . default_recv_qps = strdup ( default_qps ) ;
if ( NULL = = mca_btl_wv_component . default_recv_qps ) {
BTL_ERROR ( ( " Unable to allocate memory for default receive queues string. \n " ) ) ;
return OMPI_ERROR ;
}
CHECK ( reg_string ( " receive_queues " , NULL ,
" Colon-delimited, comma-delimited list of receive queues: P,4096,8,6,4:P,32768,8,6,4 " ,
default_qps , & mca_btl_wv_component . receive_queues ,
0 ) ) ;
mca_btl_wv_component . receive_queues_source =
( 0 = = strcmp ( default_qps ,
mca_btl_wv_component . receive_queues ) ) ?
BTL_WV_RQ_SOURCE_DEFAULT : BTL_WV_RQ_SOURCE_MCA ;
CHECK ( reg_string ( " if_include " , NULL ,
" Comma-delimited list of devices/ports to be used (e.g. \" mthca0,mthca1:2 \" ; empty value means to use all ports found). Mutually exclusive with btl_wv_if_exclude. " ,
NULL , & mca_btl_wv_component . if_include ,
0 ) ) ;
CHECK ( reg_string ( " if_exclude " , NULL ,
" Comma-delimited list of device/ports to be excluded (empty value means to not exclude any ports). Mutually exclusive with btl_wv_if_include. " ,
NULL , & mca_btl_wv_component . if_exclude ,
0 ) ) ;
CHECK ( reg_string ( " ipaddr_include " , NULL ,
" Comma-delimited list of IP Addresses to be used (e.g. \" 192.168.1.0/24 \" ). Mutually exclusive with btl_wv_ipaddr_exclude. " ,
NULL , & mca_btl_wv_component . ipaddr_include ,
0 ) ) ;
CHECK ( reg_string ( " ipaddr_exclude " , NULL ,
" Comma-delimited list of IP Addresses to be excluded (e.g. \" 192.168.1.0/24 \" ). Mutually exclusive with btl_wv_ipaddr_include. " ,
NULL , & mca_btl_wv_component . ipaddr_exclude ,
0 ) ) ;
/* Register any MCA params for the connect pseudo-components */
if ( OMPI_SUCCESS = = ret ) {
ret = ompi_btl_wv_connect_base_register ( ) ;
}
return ret ;
}