
* first import of Bull specific modifications to HAN * Cleaning, renaming and compilation fixing Changed all future into han. * Import BULL specific modifications in coll/tuned and coll/base * Fixed compilation issues in Han * Changed han_output to directly point to coll framework output. * The verbosity MCA parameter was removed as a duplicated of coll verbosity * Add fallback in han reduce when op cannot commute and ppn are imbalanced * Added fallback wfor han bcast when nodes do not have the same number of process * Add fallback in han scatter when ppn are imbalanced + fixed missing scatter_fn pointer in the module interface Signed-off-by: Brelle Emmanuel <emmanuel.brelle@atos.net> Co-authored-by: a700850 <pierre.lemarinier@atos.net> Co-authored-by: germainf <florent.germain@atos.net>
517 строки
22 KiB
C
517 строки
22 KiB
C
/*
|
|
* Copyright (c) 2018-2020 The University of Tennessee and The University
|
|
* of Tennessee Research Foundation. All rights
|
|
* reserved.
|
|
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
*
|
|
* Most of the description of the data layout is in the
|
|
* coll_han_module.c file.
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "opal/util/show_help.h"
|
|
#include "ompi/constants.h"
|
|
#include "ompi/mca/coll/coll.h"
|
|
#include "coll_han.h"
|
|
#include "coll_han_dynamic.h"
|
|
#include "coll_han_dynamic_file.h"
|
|
|
|
/*
|
|
* Public string showing the coll ompi_han component version number
|
|
*/
|
|
const char *mca_coll_han_component_version_string =
|
|
"Open MPI han collective MCA component version " OMPI_VERSION;
|
|
|
|
|
|
/*
|
|
* Local functions
|
|
*/
|
|
static int han_open(void);
|
|
static int han_close(void);
|
|
static int han_register(void);
|
|
|
|
/*
|
|
* Instantiate the public struct with all of our public information
|
|
* and pointers to our public functions in it
|
|
*/
|
|
|
|
mca_coll_han_component_t mca_coll_han_component = {
|
|
|
|
/* First, fill in the super */
|
|
|
|
{
|
|
/* First, the mca_component_t struct containing meta
|
|
information about the component itself */
|
|
|
|
.collm_version = {
|
|
MCA_COLL_BASE_VERSION_2_0_0,
|
|
|
|
/* Component name and version */
|
|
.mca_component_name = "han",
|
|
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
|
|
OMPI_RELEASE_VERSION),
|
|
|
|
/* Component functions */
|
|
.mca_open_component = han_open,
|
|
.mca_close_component = han_close,
|
|
.mca_register_component_params = han_register,
|
|
},
|
|
.collm_data = {
|
|
/* The component is not checkpoint ready */
|
|
MCA_BASE_METADATA_PARAM_NONE},
|
|
|
|
/* Initialization / querying functions */
|
|
|
|
.collm_init_query = mca_coll_han_init_query,
|
|
.collm_comm_query = mca_coll_han_comm_query,
|
|
},
|
|
|
|
/* han-component specifc information */
|
|
|
|
/* (default) priority */
|
|
20,
|
|
};
|
|
|
|
/*
|
|
* Init the component
|
|
*/
|
|
static int han_open(void)
|
|
{
|
|
int param;
|
|
mca_coll_han_component_t *cs = &mca_coll_han_component;
|
|
if (cs->han_auto_tune) {
|
|
cs->han_auto_tuned =
|
|
(selection *) malloc(2 * cs->han_auto_tune_n * cs->han_auto_tune_c *
|
|
cs->han_auto_tune_m * sizeof(selection));
|
|
char *filename = "/home/dycz0fx/results/auto/auto_tuned.bin";
|
|
FILE *file = fopen(filename, "r");
|
|
fread(cs->han_auto_tuned, sizeof(selection),
|
|
2 * cs->han_auto_tune_n * cs->han_auto_tune_c * cs->han_auto_tune_m, file);
|
|
fclose(file);
|
|
}
|
|
|
|
/*
|
|
* Get the global coll verbosity: it will be ours
|
|
*/
|
|
cs->han_output = ompi_coll_base_framework.framework_output;
|
|
opal_output_verbose(1, cs->han_output,
|
|
"coll:han:component_open: done!");
|
|
|
|
cs->topo_level = GLOBAL_COMMUNICATOR;
|
|
return mca_coll_han_init_dynamic_rules();
|
|
}
|
|
|
|
|
|
/*
|
|
* Shut down the component
|
|
*/
|
|
static int han_close(void)
|
|
{
|
|
mca_coll_han_component_t *cs = &mca_coll_han_component;
|
|
if (cs->han_auto_tune && cs->han_auto_tuned != NULL) {
|
|
free(cs->han_auto_tuned);
|
|
cs->han_auto_tuned = NULL;
|
|
}
|
|
mca_coll_han_free_dynamic_rules();
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
static bool is_simple_implemented(COLLTYPE_T coll)
|
|
{
|
|
switch(coll) {
|
|
case ALLGATHER:
|
|
case ALLREDUCE:
|
|
case BCAST:
|
|
case GATHER:
|
|
case REDUCE:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl)
|
|
{
|
|
switch(topo_lvl) {
|
|
case INTRA_NODE:
|
|
return "intra_node";
|
|
case INTER_NODE:
|
|
return "inter_node";
|
|
case GLOBAL_COMMUNICATOR:
|
|
return "global_communicator";
|
|
case NB_TOPO_LVL:
|
|
default:
|
|
return "invalid topologic level";
|
|
}
|
|
}
|
|
const char* mca_coll_han_colltype_to_str(COLLTYPE_T coll)
|
|
{
|
|
switch(coll) {
|
|
case ALLGATHER:
|
|
return "allgather";
|
|
case ALLGATHERV:
|
|
return "allgatherv";
|
|
case ALLREDUCE:
|
|
return "allreduce";
|
|
case ALLTOALL:
|
|
return "alltoall";
|
|
case ALLTOALLV:
|
|
return "alltoallv";
|
|
case ALLTOALLW:
|
|
return "alltoallw";
|
|
case BARRIER:
|
|
return "barrier";
|
|
case BCAST:
|
|
return "bcast";
|
|
case EXSCAN:
|
|
return "exscan";
|
|
case GATHER:
|
|
return "gather";
|
|
case GATHERV:
|
|
return "gatherv";
|
|
case REDUCE:
|
|
return "reduce";
|
|
case REDUCESCATTER:
|
|
return "reduce_scatter";
|
|
case REDUCESCATTERBLOCK:
|
|
return "reduce_scatter_block";
|
|
case SCAN:
|
|
return "scan";
|
|
case SCATTER:
|
|
return "scatter";
|
|
case SCATTERV:
|
|
return "scatterv";
|
|
case NEIGHBOR_ALLGATHER:
|
|
return "neighbor_allgather";
|
|
case NEIGHBOR_ALLGATHERV:
|
|
return "neighbor_allgatherv";
|
|
case NEIGHBOR_ALLTOALL:
|
|
return "neighbor_alltoall";
|
|
case NEIGHBOR_ALLTOALLV:
|
|
return "neighbor_alltoallv";
|
|
case NEIGHBOR_ALLTOALLW:
|
|
return "neighbor_alltoallw";
|
|
default:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Register MCA params
|
|
*/
|
|
static int han_register(void)
|
|
{
|
|
mca_base_component_t *c = &mca_coll_han_component.super.collm_version;
|
|
mca_coll_han_component_t *cs = &mca_coll_han_component;
|
|
|
|
/* Generated parameters name and description */
|
|
char param_name[100] = "";
|
|
char param_desc[300] = "";
|
|
int param_desc_size;
|
|
COLLTYPE_T coll;
|
|
TOPO_LVL_T topo_lvl;
|
|
COMPONENT_T component;
|
|
|
|
cs->han_priority = 0;
|
|
(void) mca_base_component_var_register(c, "priority", "Priority of the han coll component",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_priority);
|
|
|
|
cs->han_bcast_segsize = 65536;
|
|
(void) mca_base_component_var_register(c, "bcast_segsize",
|
|
"segment size for bcast",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_segsize);
|
|
|
|
cs->han_bcast_up_module = 0;
|
|
(void) mca_base_component_var_register(c, "bcast_up_module",
|
|
"up level module for bcast, 0 libnbc, 1 adapt",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_up_module);
|
|
|
|
cs->han_bcast_low_module = 0;
|
|
(void) mca_base_component_var_register(c, "bcast_low_module",
|
|
"low level module for bcast, 0 sm, 1 solo",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_low_module);
|
|
|
|
cs->han_reduce_segsize = 524288;
|
|
(void) mca_base_component_var_register(c, "reduce_segsize",
|
|
"segment size for reduce",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_segsize);
|
|
|
|
cs->han_reduce_up_module = 0;
|
|
(void) mca_base_component_var_register(c, "reduce_up_module",
|
|
"up level module for allreduce, 0 libnbc, 1 adapt",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_reduce_up_module);
|
|
|
|
cs->han_reduce_low_module = 0;
|
|
(void) mca_base_component_var_register(c, "reduce_low_module",
|
|
"low level module for allreduce, 0 sm, 1 shared",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_reduce_low_module);
|
|
cs->han_allreduce_segsize = 524288;
|
|
(void) mca_base_component_var_register(c, "allreduce_segsize",
|
|
"segment size for allreduce",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_segsize);
|
|
|
|
cs->han_allreduce_up_module = 0;
|
|
(void) mca_base_component_var_register(c, "allreduce_up_module",
|
|
"up level module for allreduce, 0 libnbc, 1 adapt",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_allreduce_up_module);
|
|
|
|
cs->han_allreduce_low_module = 0;
|
|
(void) mca_base_component_var_register(c, "allreduce_low_module",
|
|
"low level module for allreduce, 0 sm, 1 shared",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_allreduce_low_module);
|
|
|
|
cs->han_allgather_up_module = 0;
|
|
(void) mca_base_component_var_register(c, "allgather_up_module",
|
|
"up level module for allgather, 0 libnbc, 1 adapt",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_allgather_up_module);
|
|
|
|
cs->han_allgather_low_module = 0;
|
|
(void) mca_base_component_var_register(c, "allgather_low_module",
|
|
"low level module for allgather, 0 sm, 1 shared",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_allgather_low_module);
|
|
|
|
cs->han_gather_up_module = 0;
|
|
(void) mca_base_component_var_register(c, "gather_up_module",
|
|
"up level module for gather, 0 libnbc, 1 adapt",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_up_module);
|
|
|
|
cs->han_gather_low_module = 0;
|
|
(void) mca_base_component_var_register(c, "gather_low_module",
|
|
"low level module for gather, 0 sm, 1 shared",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_low_module);
|
|
|
|
cs->han_scatter_up_module = 0;
|
|
(void) mca_base_component_var_register(c, "scatter_up_module",
|
|
"up level module for scatter, 0 libnbc, 1 adapt",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_up_module);
|
|
|
|
cs->han_scatter_low_module = 0;
|
|
(void) mca_base_component_var_register(c, "scatter_low_module",
|
|
"low level module for scatter, 0 sm, 1 shared",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_scatter_low_module);
|
|
|
|
cs->han_auto_tune = 0;
|
|
(void) mca_base_component_var_register(c, "auto_tune",
|
|
"whether enable auto tune, 0 disable, 1 enable, default 0",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune);
|
|
|
|
cs->han_reproducible = 0;
|
|
(void) mca_base_component_var_register(c, "reproducible",
|
|
"whether we need reproducible results "
|
|
"(enabling this disables optimisations using topology)"
|
|
"0 disable 1 enable, default 0",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_3,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_reproducible);
|
|
|
|
/* Simple algorithms MCA parameters */
|
|
for(coll = 0 ; coll < COLLCOUNT ; coll++) {
|
|
cs->use_simple_algorithm[coll] = false;
|
|
if(is_simple_implemented(coll)) {
|
|
snprintf(param_name, 100, "use_simple_%s",
|
|
mca_coll_han_colltype_to_str(coll));
|
|
snprintf(param_desc, 300, "whether to enable simple algo for %s",
|
|
mca_coll_han_colltype_to_str(coll));
|
|
mca_base_component_var_register(c, param_name,
|
|
param_desc,
|
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
|
OPAL_INFO_LVL_5,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&(cs->use_simple_algorithm[coll]));
|
|
}
|
|
}
|
|
|
|
/* Dynamic rules MCA parameters */
|
|
/* TODO: Find a way to avoid unused entried */
|
|
memset(cs->mca_rules, 0,
|
|
COLLCOUNT * (GLOBAL_COMMUNICATOR+1) * sizeof(COMPONENT_T));
|
|
for(coll = 0 ; coll < COLLCOUNT ; coll++) {
|
|
if(!mca_coll_han_is_coll_dynamic_implemented(coll)) {
|
|
continue;
|
|
}
|
|
/*
|
|
* Default values
|
|
* Do not avoid to set correct default parameters
|
|
*/
|
|
cs->mca_rules[coll][INTRA_NODE] = TUNED;
|
|
cs->mca_rules[coll][INTER_NODE] = BASIC;
|
|
cs->mca_rules[coll][GLOBAL_COMMUNICATOR] = HAN;
|
|
|
|
for(topo_lvl = 0 ; topo_lvl < NB_TOPO_LVL ; topo_lvl++) {
|
|
|
|
snprintf(param_name, 100, "%s_dynamic_%s_module",
|
|
mca_coll_han_colltype_to_str(coll),
|
|
mca_coll_han_topo_lvl_to_str(topo_lvl));
|
|
|
|
param_desc_size = snprintf(param_desc, 300,
|
|
"Collective module to use for "
|
|
"collective %s on %s topological level: ",
|
|
mca_coll_han_colltype_to_str(coll),
|
|
mca_coll_han_topo_lvl_to_str(topo_lvl));
|
|
/*
|
|
* Exhaustive description:
|
|
* 0 = self; 1 = basic; 2 = libnbc; ...
|
|
* FIXME: Do not print component not providing this collective
|
|
*/
|
|
for(component = 0 ; component < COMPONENTS_COUNT ; component++) {
|
|
if(HAN == component && GLOBAL_COMMUNICATOR != topo_lvl) {
|
|
/* Han can only be used on the global communicator */
|
|
continue;
|
|
}
|
|
param_desc_size += snprintf(param_desc+param_desc_size, 300,
|
|
"%d = %s; ",
|
|
component,
|
|
components_name[component]);
|
|
}
|
|
|
|
mca_base_component_var_register(c, param_name, param_desc,
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&(cs->mca_rules[coll][topo_lvl]));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* TODO: remove the following lines when auto-tune is added back to the code
|
|
*/
|
|
cs->han_auto_tune = 0;
|
|
|
|
cs->han_auto_tune_n = 5;
|
|
cs->han_auto_tune_c = 3;
|
|
cs->han_auto_tune_m = 21;
|
|
#if 0
|
|
cs->han_auto_tune_n = 5;
|
|
(void) mca_base_component_var_register(c, "auto_tune_n",
|
|
"auto tune n",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune_n);
|
|
|
|
cs->han_auto_tune_c = 3;
|
|
(void) mca_base_component_var_register(c, "auto_tune_c",
|
|
"auto tune c",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune_c);
|
|
|
|
cs->han_auto_tune_m = 21;
|
|
(void) mca_base_component_var_register(c, "auto_tune_m",
|
|
"auto tune n",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_9,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&cs->han_auto_tune_m);
|
|
#endif
|
|
|
|
/* Dynamic rules */
|
|
cs->use_dynamic_file_rules = false;
|
|
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
|
|
"use_dynamic_file_rules",
|
|
"Switch used to decide if we use "
|
|
"dynamic module choice rules "
|
|
"defines by file",
|
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
|
OPAL_INFO_LVL_6,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&(cs->use_dynamic_file_rules));
|
|
|
|
cs->dynamic_rules_filename = NULL;
|
|
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
|
|
"dynamic_rules_filename",
|
|
"Filename of configuration file that "
|
|
"contains the dynamic module choice rules",
|
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
|
OPAL_INFO_LVL_6,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&(cs->dynamic_rules_filename));
|
|
|
|
cs->dump_dynamic_rules = false;
|
|
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
|
|
"dump_dynamic_rules",
|
|
"Switch used to decide if we dump "
|
|
"dynamic rules provided by "
|
|
"configuration file",
|
|
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
|
OPAL_INFO_LVL_6,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&(cs->dump_dynamic_rules));
|
|
|
|
if((cs->dump_dynamic_rules || NULL != cs->dynamic_rules_filename)
|
|
&& !cs->use_dynamic_file_rules) {
|
|
opal_output_verbose(0, cs->han_output,
|
|
"coll:han:han_register "
|
|
"you asked for dynamic rules "
|
|
"but they are not activated. "
|
|
"Check coll_han_use_dynamic_file_rules "
|
|
"MCA parameter");
|
|
}
|
|
|
|
cs->max_dynamic_errors = 10;
|
|
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
|
|
"max_dynamic_errors",
|
|
"Number of dynamic rules module/function "
|
|
"errors printed on rank 0 "
|
|
"with a 0 verbosity."
|
|
"Useless if coll_base_verbose is 30 or more.",
|
|
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
|
OPAL_INFO_LVL_6,
|
|
MCA_BASE_VAR_SCOPE_READONLY,
|
|
&(cs->max_dynamic_errors));
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|