1
1
openmpi/ompi/mca/coll/han/coll_han_component.c
bsergentm 220b997a58 Coll/han Bull
* first import of Bull specific modifications to HAN

* Cleaning, renaming and compilation fixing Changed all future into han.

* Import BULL specific modifications in coll/tuned and coll/base

* Fixed compilation issues in Han

* Changed han_output to directly point to coll framework output.

* The verbosity MCA parameter was removed as a duplicated of coll verbosity

* Add fallback in han reduce when op cannot commute and ppn are imbalanced

* Added fallback wfor han bcast when nodes do not have the same number of process

* Add fallback in han scatter when ppn are imbalanced

+ fixed missing scatter_fn pointer in the module interface

Signed-off-by: Brelle Emmanuel <emmanuel.brelle@atos.net>
Co-authored-by: a700850 <pierre.lemarinier@atos.net>
Co-authored-by: germainf <florent.germain@atos.net>
2020-10-09 14:17:46 -04:00

517 строки
22 KiB
C

/*
* Copyright (c) 2018-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Most of the description of the data layout is in the
* coll_han_module.c file.
*/
#include "ompi_config.h"
#include "opal/util/show_help.h"
#include "ompi/constants.h"
#include "ompi/mca/coll/coll.h"
#include "coll_han.h"
#include "coll_han_dynamic.h"
#include "coll_han_dynamic_file.h"
/*
* Public string showing the coll ompi_han component version number
*/
const char *mca_coll_han_component_version_string =
"Open MPI han collective MCA component version " OMPI_VERSION;
/*
* Local functions
*/
static int han_open(void);
static int han_close(void);
static int han_register(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
mca_coll_han_component_t mca_coll_han_component = {
/* First, fill in the super */
{
/* First, the mca_component_t struct containing meta
information about the component itself */
.collm_version = {
MCA_COLL_BASE_VERSION_2_0_0,
/* Component name and version */
.mca_component_name = "han",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
/* Component functions */
.mca_open_component = han_open,
.mca_close_component = han_close,
.mca_register_component_params = han_register,
},
.collm_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE},
/* Initialization / querying functions */
.collm_init_query = mca_coll_han_init_query,
.collm_comm_query = mca_coll_han_comm_query,
},
/* han-component specifc information */
/* (default) priority */
20,
};
/*
* Init the component
*/
static int han_open(void)
{
int param;
mca_coll_han_component_t *cs = &mca_coll_han_component;
if (cs->han_auto_tune) {
cs->han_auto_tuned =
(selection *) malloc(2 * cs->han_auto_tune_n * cs->han_auto_tune_c *
cs->han_auto_tune_m * sizeof(selection));
char *filename = "/home/dycz0fx/results/auto/auto_tuned.bin";
FILE *file = fopen(filename, "r");
fread(cs->han_auto_tuned, sizeof(selection),
2 * cs->han_auto_tune_n * cs->han_auto_tune_c * cs->han_auto_tune_m, file);
fclose(file);
}
/*
* Get the global coll verbosity: it will be ours
*/
cs->han_output = ompi_coll_base_framework.framework_output;
opal_output_verbose(1, cs->han_output,
"coll:han:component_open: done!");
cs->topo_level = GLOBAL_COMMUNICATOR;
return mca_coll_han_init_dynamic_rules();
}
/*
* Shut down the component
*/
static int han_close(void)
{
mca_coll_han_component_t *cs = &mca_coll_han_component;
if (cs->han_auto_tune && cs->han_auto_tuned != NULL) {
free(cs->han_auto_tuned);
cs->han_auto_tuned = NULL;
}
mca_coll_han_free_dynamic_rules();
return OMPI_SUCCESS;
}
static bool is_simple_implemented(COLLTYPE_T coll)
{
switch(coll) {
case ALLGATHER:
case ALLREDUCE:
case BCAST:
case GATHER:
case REDUCE:
return true;
default:
return false;
}
}
const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl)
{
switch(topo_lvl) {
case INTRA_NODE:
return "intra_node";
case INTER_NODE:
return "inter_node";
case GLOBAL_COMMUNICATOR:
return "global_communicator";
case NB_TOPO_LVL:
default:
return "invalid topologic level";
}
}
const char* mca_coll_han_colltype_to_str(COLLTYPE_T coll)
{
switch(coll) {
case ALLGATHER:
return "allgather";
case ALLGATHERV:
return "allgatherv";
case ALLREDUCE:
return "allreduce";
case ALLTOALL:
return "alltoall";
case ALLTOALLV:
return "alltoallv";
case ALLTOALLW:
return "alltoallw";
case BARRIER:
return "barrier";
case BCAST:
return "bcast";
case EXSCAN:
return "exscan";
case GATHER:
return "gather";
case GATHERV:
return "gatherv";
case REDUCE:
return "reduce";
case REDUCESCATTER:
return "reduce_scatter";
case REDUCESCATTERBLOCK:
return "reduce_scatter_block";
case SCAN:
return "scan";
case SCATTER:
return "scatter";
case SCATTERV:
return "scatterv";
case NEIGHBOR_ALLGATHER:
return "neighbor_allgather";
case NEIGHBOR_ALLGATHERV:
return "neighbor_allgatherv";
case NEIGHBOR_ALLTOALL:
return "neighbor_alltoall";
case NEIGHBOR_ALLTOALLV:
return "neighbor_alltoallv";
case NEIGHBOR_ALLTOALLW:
return "neighbor_alltoallw";
default:
return "";
}
}
/*
* Register MCA params
*/
static int han_register(void)
{
mca_base_component_t *c = &mca_coll_han_component.super.collm_version;
mca_coll_han_component_t *cs = &mca_coll_han_component;
/* Generated parameters name and description */
char param_name[100] = "";
char param_desc[300] = "";
int param_desc_size;
COLLTYPE_T coll;
TOPO_LVL_T topo_lvl;
COMPONENT_T component;
cs->han_priority = 0;
(void) mca_base_component_var_register(c, "priority", "Priority of the han coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_priority);
cs->han_bcast_segsize = 65536;
(void) mca_base_component_var_register(c, "bcast_segsize",
"segment size for bcast",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_segsize);
cs->han_bcast_up_module = 0;
(void) mca_base_component_var_register(c, "bcast_up_module",
"up level module for bcast, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_up_module);
cs->han_bcast_low_module = 0;
(void) mca_base_component_var_register(c, "bcast_low_module",
"low level module for bcast, 0 sm, 1 solo",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_bcast_low_module);
cs->han_reduce_segsize = 524288;
(void) mca_base_component_var_register(c, "reduce_segsize",
"segment size for reduce",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_reduce_segsize);
cs->han_reduce_up_module = 0;
(void) mca_base_component_var_register(c, "reduce_up_module",
"up level module for allreduce, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_reduce_up_module);
cs->han_reduce_low_module = 0;
(void) mca_base_component_var_register(c, "reduce_low_module",
"low level module for allreduce, 0 sm, 1 shared",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_reduce_low_module);
cs->han_allreduce_segsize = 524288;
(void) mca_base_component_var_register(c, "allreduce_segsize",
"segment size for allreduce",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_allreduce_segsize);
cs->han_allreduce_up_module = 0;
(void) mca_base_component_var_register(c, "allreduce_up_module",
"up level module for allreduce, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_allreduce_up_module);
cs->han_allreduce_low_module = 0;
(void) mca_base_component_var_register(c, "allreduce_low_module",
"low level module for allreduce, 0 sm, 1 shared",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_allreduce_low_module);
cs->han_allgather_up_module = 0;
(void) mca_base_component_var_register(c, "allgather_up_module",
"up level module for allgather, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_allgather_up_module);
cs->han_allgather_low_module = 0;
(void) mca_base_component_var_register(c, "allgather_low_module",
"low level module for allgather, 0 sm, 1 shared",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_allgather_low_module);
cs->han_gather_up_module = 0;
(void) mca_base_component_var_register(c, "gather_up_module",
"up level module for gather, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_up_module);
cs->han_gather_low_module = 0;
(void) mca_base_component_var_register(c, "gather_low_module",
"low level module for gather, 0 sm, 1 shared",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_gather_low_module);
cs->han_scatter_up_module = 0;
(void) mca_base_component_var_register(c, "scatter_up_module",
"up level module for scatter, 0 libnbc, 1 adapt",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_scatter_up_module);
cs->han_scatter_low_module = 0;
(void) mca_base_component_var_register(c, "scatter_low_module",
"low level module for scatter, 0 sm, 1 shared",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_scatter_low_module);
cs->han_auto_tune = 0;
(void) mca_base_component_var_register(c, "auto_tune",
"whether enable auto tune, 0 disable, 1 enable, default 0",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune);
cs->han_reproducible = 0;
(void) mca_base_component_var_register(c, "reproducible",
"whether we need reproducible results "
"(enabling this disables optimisations using topology)"
"0 disable 1 enable, default 0",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_reproducible);
/* Simple algorithms MCA parameters */
for(coll = 0 ; coll < COLLCOUNT ; coll++) {
cs->use_simple_algorithm[coll] = false;
if(is_simple_implemented(coll)) {
snprintf(param_name, 100, "use_simple_%s",
mca_coll_han_colltype_to_str(coll));
snprintf(param_desc, 300, "whether to enable simple algo for %s",
mca_coll_han_colltype_to_str(coll));
mca_base_component_var_register(c, param_name,
param_desc,
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&(cs->use_simple_algorithm[coll]));
}
}
/* Dynamic rules MCA parameters */
/* TODO: Find a way to avoid unused entried */
memset(cs->mca_rules, 0,
COLLCOUNT * (GLOBAL_COMMUNICATOR+1) * sizeof(COMPONENT_T));
for(coll = 0 ; coll < COLLCOUNT ; coll++) {
if(!mca_coll_han_is_coll_dynamic_implemented(coll)) {
continue;
}
/*
* Default values
* Do not avoid to set correct default parameters
*/
cs->mca_rules[coll][INTRA_NODE] = TUNED;
cs->mca_rules[coll][INTER_NODE] = BASIC;
cs->mca_rules[coll][GLOBAL_COMMUNICATOR] = HAN;
for(topo_lvl = 0 ; topo_lvl < NB_TOPO_LVL ; topo_lvl++) {
snprintf(param_name, 100, "%s_dynamic_%s_module",
mca_coll_han_colltype_to_str(coll),
mca_coll_han_topo_lvl_to_str(topo_lvl));
param_desc_size = snprintf(param_desc, 300,
"Collective module to use for "
"collective %s on %s topological level: ",
mca_coll_han_colltype_to_str(coll),
mca_coll_han_topo_lvl_to_str(topo_lvl));
/*
* Exhaustive description:
* 0 = self; 1 = basic; 2 = libnbc; ...
* FIXME: Do not print component not providing this collective
*/
for(component = 0 ; component < COMPONENTS_COUNT ; component++) {
if(HAN == component && GLOBAL_COMMUNICATOR != topo_lvl) {
/* Han can only be used on the global communicator */
continue;
}
param_desc_size += snprintf(param_desc+param_desc_size, 300,
"%d = %s; ",
component,
components_name[component]);
}
mca_base_component_var_register(c, param_name, param_desc,
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&(cs->mca_rules[coll][topo_lvl]));
}
}
/*
* TODO: remove the following lines when auto-tune is added back to the code
*/
cs->han_auto_tune = 0;
cs->han_auto_tune_n = 5;
cs->han_auto_tune_c = 3;
cs->han_auto_tune_m = 21;
#if 0
cs->han_auto_tune_n = 5;
(void) mca_base_component_var_register(c, "auto_tune_n",
"auto tune n",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune_n);
cs->han_auto_tune_c = 3;
(void) mca_base_component_var_register(c, "auto_tune_c",
"auto tune c",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune_c);
cs->han_auto_tune_m = 21;
(void) mca_base_component_var_register(c, "auto_tune_m",
"auto tune n",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&cs->han_auto_tune_m);
#endif
/* Dynamic rules */
cs->use_dynamic_file_rules = false;
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
"use_dynamic_file_rules",
"Switch used to decide if we use "
"dynamic module choice rules "
"defines by file",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&(cs->use_dynamic_file_rules));
cs->dynamic_rules_filename = NULL;
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
"dynamic_rules_filename",
"Filename of configuration file that "
"contains the dynamic module choice rules",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&(cs->dynamic_rules_filename));
cs->dump_dynamic_rules = false;
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
"dump_dynamic_rules",
"Switch used to decide if we dump "
"dynamic rules provided by "
"configuration file",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&(cs->dump_dynamic_rules));
if((cs->dump_dynamic_rules || NULL != cs->dynamic_rules_filename)
&& !cs->use_dynamic_file_rules) {
opal_output_verbose(0, cs->han_output,
"coll:han:han_register "
"you asked for dynamic rules "
"but they are not activated. "
"Check coll_han_use_dynamic_file_rules "
"MCA parameter");
}
cs->max_dynamic_errors = 10;
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,
"max_dynamic_errors",
"Number of dynamic rules module/function "
"errors printed on rank 0 "
"with a 0 verbosity."
"Useless if coll_base_verbose is 30 or more.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&(cs->max_dynamic_errors));
return OMPI_SUCCESS;
}