diff --git a/ompi/mca/coll/tuned/coll_tuned.h b/ompi/mca/coll/tuned/coll_tuned.h index 82856448a0..f044a60375 100644 --- a/ompi/mca/coll/tuned/coll_tuned.h +++ b/ompi/mca/coll/tuned/coll_tuned.h @@ -88,6 +88,8 @@ extern char* ompi_coll_tuned_dynamic_rules_filename; extern int ompi_coll_tuned_init_tree_fanout; extern int ompi_coll_tuned_init_chain_fanout; extern int ompi_coll_tuned_init_max_requests; +extern int ompi_coll_tuned_alltoall_small_msg; +extern int ompi_coll_tuned_alltoall_intermediate_msg; /* forced algorithm choices */ /* this structure is for storing the indexes to the forced algorithm mca params... */ diff --git a/ompi/mca/coll/tuned/coll_tuned_component.c b/ompi/mca/coll/tuned/coll_tuned_component.c index 248d31e575..36f89a6d4b 100644 --- a/ompi/mca/coll/tuned/coll_tuned_component.c +++ b/ompi/mca/coll/tuned/coll_tuned_component.c @@ -50,6 +50,8 @@ char* ompi_coll_tuned_dynamic_rules_filename = (char*) NULL; int ompi_coll_tuned_init_tree_fanout = 4; int ompi_coll_tuned_init_chain_fanout = 4; int ompi_coll_tuned_init_max_requests = 128; +int ompi_coll_tuned_alltoall_small_msg = 1000; +int ompi_coll_tuned_alltoall_intermediate_msg = 2000; /* forced alogrithm variables */ /* indices for the MCA parameters */ @@ -148,6 +150,24 @@ static int tuned_register(void) MCA_BASE_VAR_SCOPE_READONLY, &ompi_coll_tuned_init_chain_fanout); + ompi_coll_tuned_alltoall_small_msg = 1000; + (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, + "alltoall_small_msg", + "threshold (if supported) to decide if small MSGs alltoall algorithm will be used", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_6, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_coll_tuned_alltoall_small_msg); + + ompi_coll_tuned_alltoall_intermediate_msg = 2000; + (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, + "alltoall_intermediate_msg", + "threshold (if supported) to decide if intermediate MSGs alltoall algorithm will be used", + MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, + OPAL_INFO_LVL_6, + MCA_BASE_VAR_SCOPE_READONLY, + &ompi_coll_tuned_alltoall_intermediate_msg); + ompi_coll_tuned_use_dynamic_rules = false; (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, "use_dynamic_rules", diff --git a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c index 135c040994..d1c36aeab8 100644 --- a/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c +++ b/ompi/mca/coll/tuned/coll_tuned_decision_fixed.c @@ -121,12 +121,13 @@ int ompi_coll_tuned_alltoall_intra_dec_fixed(void *sbuf, int scount, ompi_datatype_type_size(sdtype, &dsize); block_dsize = dsize * (ptrdiff_t)scount; - if ((block_dsize < 200) && (communicator_size > 12)) { + if ((block_dsize < ompi_coll_tuned_alltoall_small_msg) + && (communicator_size > 12)) { return ompi_coll_tuned_alltoall_intra_bruck(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module); - } else if (block_dsize < 3000) { + } else if (block_dsize < ompi_coll_tuned_alltoall_intermediate_msg) { return ompi_coll_tuned_alltoall_intra_basic_linear(sbuf, scount, sdtype, rbuf, rcount, rdtype, comm, module);