Merge pull request #6814 from brminich/tuned_all2all_select
COLL/TUNED: Update alltoall selection rule for mellanox platform
Этот коммит содержится в:
Коммит
98d0ecfe14
@ -78,3 +78,8 @@ opal_event_include=epoll
|
|||||||
|
|
||||||
bml_r2_show_unreach_errors = 0
|
bml_r2_show_unreach_errors = 0
|
||||||
|
|
||||||
|
# alltoall algorithm selection settings for tuned coll mca
|
||||||
|
coll_tuned_alltoall_large_msg = 250000
|
||||||
|
coll_tuned_alltoall_min_procs = 2048
|
||||||
|
coll_tuned_alltoall_algorithm_max_requests = 8
|
||||||
|
|
||||||
|
@ -38,6 +38,9 @@ extern int ompi_coll_tuned_init_chain_fanout;
|
|||||||
extern int ompi_coll_tuned_init_max_requests;
|
extern int ompi_coll_tuned_init_max_requests;
|
||||||
extern int ompi_coll_tuned_alltoall_small_msg;
|
extern int ompi_coll_tuned_alltoall_small_msg;
|
||||||
extern int ompi_coll_tuned_alltoall_intermediate_msg;
|
extern int ompi_coll_tuned_alltoall_intermediate_msg;
|
||||||
|
extern int ompi_coll_tuned_alltoall_large_msg;
|
||||||
|
extern int ompi_coll_tuned_alltoall_min_procs;
|
||||||
|
extern int ompi_coll_tuned_alltoall_max_requests;
|
||||||
|
|
||||||
/* forced algorithm choices */
|
/* forced algorithm choices */
|
||||||
/* this structure is for storing the indexes to the forced algorithm mca params... */
|
/* this structure is for storing the indexes to the forced algorithm mca params... */
|
||||||
|
@ -28,7 +28,6 @@
|
|||||||
/* alltoall algorithm variables */
|
/* alltoall algorithm variables */
|
||||||
static int coll_tuned_alltoall_forced_algorithm = 0;
|
static int coll_tuned_alltoall_forced_algorithm = 0;
|
||||||
static int coll_tuned_alltoall_segment_size = 0;
|
static int coll_tuned_alltoall_segment_size = 0;
|
||||||
static int coll_tuned_alltoall_max_requests;
|
|
||||||
static int coll_tuned_alltoall_tree_fanout;
|
static int coll_tuned_alltoall_tree_fanout;
|
||||||
static int coll_tuned_alltoall_chain_fanout;
|
static int coll_tuned_alltoall_chain_fanout;
|
||||||
|
|
||||||
@ -115,7 +114,22 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm
|
|||||||
MCA_BASE_VAR_SCOPE_ALL,
|
MCA_BASE_VAR_SCOPE_ALL,
|
||||||
&coll_tuned_alltoall_chain_fanout);
|
&coll_tuned_alltoall_chain_fanout);
|
||||||
|
|
||||||
coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
|
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||||
|
"alltoall_large_msg",
|
||||||
|
"use pairwise exchange algorithm for messages larger than this value",
|
||||||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||||
|
OPAL_INFO_LVL_6,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&ompi_coll_tuned_alltoall_large_msg);
|
||||||
|
|
||||||
|
(void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||||
|
"alltoall_min_procs",
|
||||||
|
"use pairwise exchange algorithm for communicators larger than this value",
|
||||||
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||||
|
OPAL_INFO_LVL_6,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&ompi_coll_tuned_alltoall_min_procs);
|
||||||
|
|
||||||
mca_param_indices->max_requests_param_index =
|
mca_param_indices->max_requests_param_index =
|
||||||
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||||
"alltoall_algorithm_max_requests",
|
"alltoall_algorithm_max_requests",
|
||||||
@ -123,17 +137,16 @@ int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm
|
|||||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||||
OPAL_INFO_LVL_5,
|
OPAL_INFO_LVL_5,
|
||||||
MCA_BASE_VAR_SCOPE_ALL,
|
MCA_BASE_VAR_SCOPE_ALL,
|
||||||
&coll_tuned_alltoall_max_requests);
|
&ompi_coll_tuned_alltoall_max_requests);
|
||||||
if (mca_param_indices->max_requests_param_index < 0) {
|
if (mca_param_indices->max_requests_param_index < 0) {
|
||||||
return mca_param_indices->max_requests_param_index;
|
return mca_param_indices->max_requests_param_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (coll_tuned_alltoall_max_requests < 0) {
|
if (ompi_coll_tuned_alltoall_max_requests < 0) {
|
||||||
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
|
if( 0 == ompi_comm_rank( MPI_COMM_WORLD ) ) {
|
||||||
opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to system level default %d \n",
|
opal_output( 0, "Maximum outstanding requests must be positive number greater than 1. Switching to 0 \n");
|
||||||
ompi_coll_tuned_init_max_requests );
|
|
||||||
}
|
}
|
||||||
coll_tuned_alltoall_max_requests = 0;
|
ompi_coll_tuned_alltoall_max_requests = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (MPI_SUCCESS);
|
return (MPI_SUCCESS);
|
||||||
|
@ -57,6 +57,13 @@ int ompi_coll_tuned_init_max_requests = 128;
|
|||||||
int ompi_coll_tuned_alltoall_small_msg = 200;
|
int ompi_coll_tuned_alltoall_small_msg = 200;
|
||||||
int ompi_coll_tuned_alltoall_intermediate_msg = 3000;
|
int ompi_coll_tuned_alltoall_intermediate_msg = 3000;
|
||||||
|
|
||||||
|
/* Set it to the same value as intermediate msg by default, so it does not affect
|
||||||
|
* default algorithm selection. Changing this value will force using linear with
|
||||||
|
* sync algorithm on certain message sizes. */
|
||||||
|
int ompi_coll_tuned_alltoall_large_msg = 3000;
|
||||||
|
int ompi_coll_tuned_alltoall_min_procs = 0; /* disable by default */
|
||||||
|
int ompi_coll_tuned_alltoall_max_requests = 0; /* no limit for alltoall by default */
|
||||||
|
|
||||||
/* forced alogrithm variables */
|
/* forced alogrithm variables */
|
||||||
/* indices for the MCA parameters */
|
/* indices for the MCA parameters */
|
||||||
coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}};
|
coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}};
|
||||||
|
@ -136,6 +136,12 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(const void *sbuf, int scount,
|
|||||||
return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype,
|
return ompi_coll_base_alltoall_intra_basic_linear(sbuf, scount, sdtype,
|
||||||
rbuf, rcount, rdtype,
|
rbuf, rcount, rdtype,
|
||||||
comm, module);
|
comm, module);
|
||||||
|
} else if ((block_dsize < (size_t) ompi_coll_tuned_alltoall_large_msg) &&
|
||||||
|
(communicator_size <= ompi_coll_tuned_alltoall_min_procs)) {
|
||||||
|
return ompi_coll_base_alltoall_intra_linear_sync(sbuf, scount, sdtype,
|
||||||
|
rbuf, rcount, rdtype,
|
||||||
|
comm, module,
|
||||||
|
ompi_coll_tuned_alltoall_max_requests);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype,
|
return ompi_coll_base_alltoall_intra_pairwise(sbuf, scount, sdtype,
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user