1
1
openmpi/ompi/mca/coll/han/coll_han_dynamic_file.c
George Bosilca 16b49dc5b3 A complete overhaul of the HAN code.
Among many other things:
- Fix an imbalance bug in MPI_allgather
- Accept more human readable configuration files. We can now specify
  the collective by name instead of a magic number, and the component
  we want to use also by name.
- Add the capability to have optional arguments in the collective
  communication configuration file. Right now the capability exists
  for segment lengths, but is yet to be connected with the algorithms.
- Redo the initialization of all HAN collectives.

Cleanup the fallback collective support.
- In case the module is unable to deliver the expected result, it will fallback
  executing the collective operation on another collective component. This change
  make the support for this fallback simpler to use.
- Implement a fallback allowing a HAN module to remove itself as
  potential active collective module, and instead fallback to the
  next module in line.
- Completely disable the HAN modules on error. From the moment an error is
  encountered they remove themselves from the communicator, and in case some
  other modules calls them simply behave as a pass-through.

Communicator: provide ompi_comm_split_with_info to split and provide info at the same time
Add ompi_comm_coll_preference info key to control collective component selection

COLL HAN: use info keys instead of component-level variable to communicate topology level between abstraction layers
- The info value is a comma-separated list of entries, which are chosen with
  decreasing priorities. This overrides the priority of the component,
  unless the component has disqualified itself.
  An entry prefixed with ^ starts the ignore-list. Any entry following this
  character will be ingnored during the collective component selection for the
  communicator.
  Example: "sm,libnbc,^han,adapt" gives sm the highest preference, followed
  by libnbc. The components han and adapt are ignored in the selection process.
- Allocate a temporary buffer for all lower-level leaders (length 2 segments)
- Fix the handling of MPI_IN_PLACE for gather and scatter.

COLL HAN: Fix topology handling
 - HAN should not rely on node names to determine the ordering of ranks.
   Instead, use the node leaders as identifiers and short-cut if the
   node-leaders agree that ranks are consecutive. Also, error out if
   the rank distribution is imbalanced for now.

Signed-off-by: Xi Luo <xluo12@vols.utk.edu>
Signed-off-by: Joseph Schuchart <schuchart@icl.utk.edu>
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
2020-10-25 18:13:16 -04:00

607 строки
26 KiB
C

/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2018-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
#ifdef HAVE_STDIO_H
#include <stdio.h>
#endif
#include "ompi_config.h"
#include "coll_han.h"
#include "coll_han_dynamic.h"
#include "coll_han_dynamic_file.h"
#include "ompi/mca/coll/base/coll_base_util.h"
#define getnext_long(fptr, pval) ompi_coll_base_file_getnext_long(fptr, &fileline, pval)
#define getnext_string(fptr, pval) ompi_coll_base_file_getnext_string(fptr, &fileline, pval)
#define getnext_size_t(fptr, pval) ompi_coll_base_file_getnext_size_t(fptr, &fileline, pval)
static void check_dynamic_rules(void);
/* Current file line for verbose message */
static int fileline = 1;
int
mca_coll_han_init_dynamic_rules(void)
{
/* File management */
const char *fname;
FILE *fptr = NULL;
int nb_entries = 0, rc;
/* Loop counters */
int i, j, k, l;
/* Collective informations */
long nb_coll, coll_id;
char * coll_name = NULL;
collective_rule_t *coll_rules;
/* Topo informations */
long nb_topo, topo_lvl;
topologic_rule_t *topo_rules;
/* Configuration informations */
long nb_rules, conf_size;
configuration_rule_t *conf_rules;
/* Message size informations */
long nb_msg_size;
size_t msg_size;
msg_size_rule_t *msg_size_rules;
/* Component informations */
long component;
/* If the dynamic rules are not used, do not even read the file */
if(!mca_coll_han_component.use_dynamic_file_rules) {
nb_coll = 0;
return OMPI_SUCCESS;
}
if( NULL == (fname = mca_coll_han_component.dynamic_rules_filename) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules coll_han_use_dynamic_file_rules is set but "
"coll_han_dynamic_rules_filename is not Rules from MCA parameters will be used instead\n");
mca_coll_han_component.dynamic_rules.nb_collectives = 0;
return OMPI_SUCCESS;
}
if( NULL == (fptr = fopen(fname, "r")) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules cannot open dynamic file provided by "
"coll_han_dynamic_rules_filename=%s. Make sure it provides the full path and "
"check file permissions. Rules from MCA parameters will be used instead\n",
fname);
mca_coll_han_component.dynamic_rules.nb_collectives = 0;
return OMPI_SUCCESS;
}
/* The first information of the file is the collective count */
if( (getnext_long(fptr, &nb_coll) < 0) || (nb_coll <= 0) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: an invalid value %ld is given for collective count "
"or the reader encountered an unexpected EOF\n",
fname, fileline, nb_coll);
mca_coll_han_component.dynamic_rules.nb_collectives = 0;
goto file_reading_error;
}
mca_coll_han_component.dynamic_rules.nb_collectives = nb_coll;
/* Allocate collective rules */
coll_rules = malloc(nb_coll * sizeof(collective_rule_t));
mca_coll_han_component.dynamic_rules.collective_rules = coll_rules;
if(NULL == coll_rules) {
mca_coll_han_component.dynamic_rules.nb_collectives = 0;
goto cannot_allocate;
}
/* Iterates on collective rules */
for( i = 0 ; i < nb_coll ; i++ ) {
coll_rules[i].nb_topologic_levels = 0;
mca_coll_han_component.dynamic_rules.nb_collectives = i+1;
/* Get the collective identifier */
if( getnext_string(fptr, &coll_name) < 0 ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules invalid collective at line %d."
"The rest of the input file will be ignored.\n",
fileline);
goto file_reading_error;
}
coll_id = mca_coll_base_name_to_colltype(coll_name);
if( (coll_id < ALLGATHER) || (coll_id >= COLLCOUNT)) {
/* maybe the file was in the old format and we read the collective index instead of the name. */
char* endp;
coll_id = strtol(coll_name, &endp, 10);
if( '\0' != *endp ) { /* there is garbage in the input */
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules invalid collective %s "
"at line %d: the collective must be at least %d and less than %d. "
"The rest of the input file will be ignored.\n",
coll_name, fileline, ALLGATHER, COLLCOUNT);
goto file_reading_error;
}
free(coll_name);
coll_name = mca_coll_base_colltype_to_str(coll_id);
}
if(!mca_coll_han_is_coll_dynamic_implemented(coll_id)) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"read collective id %ld at line %d but this collective is not implemented yet. "
"This is not an error but this set of rules will not be used\n",
fname, coll_id, fileline);
}
/*
* The first information of a collective rule
* is the number of topologic rules
*/
if( (getnext_long(fptr, &nb_topo) < 0) || (nb_topo < 0) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: an invalid value %ld is given for topo level count "
"or the reader encountered an unexpected EOF\n",
fname, fileline, nb_topo);
goto file_reading_error;
}
/* Store the collective rule informations */
coll_rules[i].nb_topologic_levels = nb_topo;
coll_rules[i].collective_id = (COLLTYPE_T)coll_id;
if(0 == nb_topo) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules Warning on dynamic rules file %s "
"at line %d: an invalid value %ld is given for topo level count\n",
fname, fileline, nb_topo);
continue;
}
/* Allocate topologic rules */
topo_rules = malloc(nb_topo * sizeof(topologic_rule_t));
coll_rules[i].topologic_rules = topo_rules;
if(NULL == topo_rules) {
coll_rules[i].nb_topologic_levels = 0;
goto cannot_allocate;
}
/* Iterates on topologic rules */
for( j = 0 ; j < nb_topo ; j++ ) {
topo_rules[j].nb_rules = 0;
coll_rules[i].nb_topologic_levels = j+1;
/* Get the topologic level identifier */
if( (getnext_long(fptr, &topo_lvl) < 0) || (topo_lvl < INTRA_NODE) || (topo_lvl >= NB_TOPO_LVL) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: an invalid topo level %ld is given or the reader encountered an unexpected EOF. "
"Topologic level must be at least %d and less than %d\n",
fname, fileline, topo_lvl, INTRA_NODE, NB_TOPO_LVL);
goto file_reading_error;
}
/*
* The first information of a topologic rule
* is the number of configurations
*/
nb_rules = -1;
if( (getnext_long(fptr, &nb_rules) < 0) || (nb_rules < 0) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: an invalid value %ld is given for rules count "
"or the reader encountered an unexpected EOF\n",
fname, fileline, nb_rules);
goto file_reading_error;
}
/* Store the topologic rule informations */
topo_rules[j].collective_id = coll_id;
topo_rules[j].topologic_level = (TOPO_LVL_T)topo_lvl;
topo_rules[j].nb_rules = nb_rules;
if(0 == nb_rules) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules Warning on dynamic rules file %s "
"at line %d: an invalid value %ld is given for configuration rules count\n",
fname, fileline, nb_rules);
continue;
}
/* Allocate configuration rules */
conf_rules = malloc(nb_rules * sizeof(configuration_rule_t));
topo_rules[j].configuration_rules = conf_rules;
if(NULL == conf_rules) {
topo_rules[j].nb_rules = 0;
goto cannot_allocate;
}
/* Iterate on configuration rules */
for( k = 0; k < nb_rules; k++ ) {
conf_rules[k].nb_msg_size = 0;
topo_rules[j].nb_rules = k+1;
/* Get the configuration size */
if( (getnext_long(fptr, &conf_size) < 0) || (conf_size < 1) || (0 == k && conf_size > 1) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules invalid configuration size %ld at line %d "
"or the reader encountered an unexpected EOF the configuration size must be at least %d "
"and the first configuration size of a topologic level must be %d\n",
conf_size, fileline, 1, 1);
goto file_reading_error;
}
/*
* The first information of a configuration rule
* is the number of message size rules
*/
if( (getnext_long(fptr, &nb_msg_size) < 0) || (nb_msg_size < 0) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: an invalid value %ld is given for message size rules count "
"or the reader encountered an unexpected EOF\n",
fname, fileline, nb_msg_size);
goto file_reading_error;
}
/* Store configuration rule information */
conf_rules[k].collective_id = coll_id;
conf_rules[k].topologic_level = topo_lvl;
conf_rules[k].configuration_size = conf_size;
conf_rules[k].nb_msg_size = nb_msg_size;
if(0 == nb_msg_size) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules Warning on dynamic rules file %s "
"at line %d: an invalid value %ld is given for message size rules count\n",
fname, fileline, nb_msg_size);
continue;
}
/* Allocate message size rules */
msg_size_rules = malloc(nb_msg_size * sizeof(msg_size_rule_t));
conf_rules[k].msg_size_rules = msg_size_rules;
if(NULL == msg_size_rules) {
conf_rules[k].nb_msg_size = 0;
goto cannot_allocate;
}
/* Iterate on message size rules */
for( l = 0; l < nb_msg_size; l++ ) {
char* target_comp_name = NULL;
conf_rules[k].nb_msg_size = l+1;
/* Get the message size */
rc = getnext_size_t(fptr, &msg_size);
if( (rc < 0) ||
(0 == l && msg_size > 1)) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: an invalid value %" PRIsize_t " is given for message size "
"or the reader encountered an unexpected EOF. "
"The first message size rule of a configuration must be 0\n",
fname, fileline, msg_size);
goto file_reading_error;
}
/* Get the component identifier for this message size rule */
if( getnext_string(fptr, &target_comp_name) < 0 ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: cannot read the name of a collective component\n",
fname, fileline);
goto file_reading_error;
}
component = mca_coll_han_component_name_to_id(target_comp_name);
if( (component < SELF) || (component >= COMPONENTS_COUNT) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found an error on dynamic rules file %s "
"at line %d: an invalid collective component name %s was given or the "
"reader encountered an unexpected EOF. Collective component id must be at "
"least %d and less than %d\n",
fname, fileline, target_comp_name, SELF, COMPONENTS_COUNT);
free(target_comp_name);
goto file_reading_error;
}
/* Store message size rule information */
msg_size_rules[l].collective_id = coll_id;
msg_size_rules[l].topologic_level = topo_lvl;
msg_size_rules[l].configuration_size = conf_size;
msg_size_rules[l].msg_size = msg_size;
msg_size_rules[l].component = (COMPONENT_T)component;
nb_entries++;
/* do we have the optional segment length */
if( 1 == ompi_coll_base_file_peek_next_char_is(fptr, &fileline, '[') ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules found optional pipelining segment lengths\n");
long seglength;
if( 0 != topo_lvl ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules "
"file %s line %d found segment lengths for topological collective at level != 0 "
"for collective %s component %s. These values will be ignored.\n",
fname, fileline, coll_name, target_comp_name);
}
while( 0 == ompi_coll_base_file_peek_next_char_is(fptr, &fileline, ']') ) {
if( getnext_long(fptr, &seglength) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules "
"file %s line %d found end of file while reading the optional list "
"of segment lengths for collective %s component %s\n",
fname, fileline, coll_name, target_comp_name);
free(target_comp_name);
goto file_reading_error;
}
}
}
free(target_comp_name);
}
}
}
if( NULL != coll_name ) {
free(coll_name);
coll_name = NULL;
}
}
if( getnext_long(fptr, &nb_coll) > 0 ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules. Warning on file %s at line %d: "
"rule reading is over but reader does not seem to have reached the end of the file\n",
fname, fileline);
}
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules read %d rules from %s\n",
nb_entries, fname);
if(mca_coll_han_component.dump_dynamic_rules) {
mca_coll_han_dump_dynamic_rules();
}
fclose(fptr);
check_dynamic_rules();
return OMPI_SUCCESS;
cannot_allocate:
/* The dynamic rules allocation failed
* Free the already allocated rules and return a failure
*/
opal_output_verbose(0, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules "
"cannot allocate dynamic rules\n");
/* Do not check free_dynamic_rules
* because we are returning OMPI_ERROR anyway */
mca_coll_han_free_dynamic_rules();
return OMPI_ERROR;
file_reading_error:
if( NULL != coll_name ) {
free(coll_name);
}
opal_output_verbose(0, mca_coll_han_component.han_output,
"coll:han:mca_coll_han_init_dynamic_rules "
"could not fully read dynamic rules file. "
"Will use mca parameters defined rules. "
"To see error detail, please set "
"collective verbosity level over 5\n");
if(fptr) {
fclose (fptr);
}
mca_coll_han_free_dynamic_rules();
return OMPI_SUCCESS;
}
void
mca_coll_han_free_dynamic_rules(void)
{
/* Loop counters */
int i, j, k;
/* Loop ranges */
int nb_coll, nb_topo, nb_conf;
/* Aliases */
collective_rule_t *coll_rules;
topologic_rule_t *topo_rules;
configuration_rule_t *conf_rules;
nb_coll = mca_coll_han_component.dynamic_rules.nb_collectives;
coll_rules = mca_coll_han_component.dynamic_rules.collective_rules;
for(i=0 ; i<nb_coll ; i++) {
nb_topo = coll_rules[i].nb_topologic_levels;
topo_rules = coll_rules[i].topologic_rules;
for(j=0 ; j<nb_topo ; j++) {
nb_conf = topo_rules[j].nb_rules;
conf_rules = topo_rules[j].configuration_rules;
for(k=0 ; k<nb_conf ; k++) {
if(conf_rules[k].nb_msg_size > 0) {
free(conf_rules[k].msg_size_rules);
}
}
if(nb_conf > 0) {
free(conf_rules);
}
}
if(nb_topo > 0) {
free(topo_rules);
}
}
if(nb_coll > 0) {
free(coll_rules);
}
mca_coll_han_component.dynamic_rules.nb_collectives = 0;
}
/*
* Try to find any logical issue in dynamic rules
*/
static void check_dynamic_rules(void)
{
/* Loop counters */
int i, j, k, l;
/* Collective informations */
int nb_coll;
COLLTYPE_T coll_id;
collective_rule_t *coll_rules;
/* Topo informations */
int nb_topo;
TOPO_LVL_T topo_lvl;
topologic_rule_t *topo_rules;
/* Configuration informations */
int nb_rules, conf_size;
configuration_rule_t *conf_rules;
/* Message size informations */
int nb_msg_size;
size_t msg_size;
msg_size_rule_t *msg_size_rules;
/* Component informations */
COMPONENT_T component;
nb_coll = mca_coll_han_component.dynamic_rules.nb_collectives;
coll_rules = mca_coll_han_component.dynamic_rules.collective_rules;
for( i = 0; i < nb_coll; i++ ) {
coll_id = coll_rules[i].collective_id;
nb_topo = coll_rules[i].nb_topologic_levels;
topo_rules = coll_rules[i].topologic_rules;
for( j = 0; j < nb_topo; j++ ) {
topo_lvl = topo_rules[j].topologic_level;
nb_rules = topo_rules[j].nb_rules;
conf_rules = topo_rules[j].configuration_rules;
for( k = 0; k < nb_rules; k++ ) {
conf_size = conf_rules[k].configuration_size;
nb_msg_size = conf_rules[k].nb_msg_size;
msg_size_rules = conf_rules[k].msg_size_rules;
if( k >= 1 && conf_rules[k-1].configuration_size > conf_size) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:check_dynamic_rules HAN found an issue on dynamic rules "
"for collective %d on topological level %d: "
"configuration sizes %d and %d are not sorted by increasing value\n",
coll_id, topo_lvl, conf_rules[k-1].configuration_size, conf_size);
}
for( l = 0; l < nb_msg_size; l++ ) {
msg_size = msg_size_rules[l].msg_size;
component = msg_size_rules[l].component;
if( l >= 1 && msg_size_rules[l-1].msg_size > msg_size) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:check_dynamic_rules HAN found an issue on dynamic rules "
"for collective %d on topological level %d with configuration size %d: "
"message sizes %" PRIsize_t " and %" PRIsize_t " are "
"not sorted by increasing value\n",
coll_id, topo_lvl, conf_size, msg_size_rules[l-1].msg_size, msg_size);
}
if( (HAN == component) && (GLOBAL_COMMUNICATOR != topo_lvl) ) {
opal_output_verbose(5, mca_coll_han_component.han_output,
"coll:han:check_dynamic_rules HAN found an issue on dynamic rules "
"for collective %d on topological level %d with configuration size %d "
"for message size %" PRIsize_t ": han collective component %d "
"can only be activated for topology level %d\n",
coll_id, topo_lvl, conf_size, msg_size, HAN, GLOBAL_COMMUNICATOR);
}
}
}
}
}
}
void mca_coll_han_dump_dynamic_rules(void)
{
int nb_entries = 0;
/* Collective informations */
int nb_coll;
COLLTYPE_T coll_id;
collective_rule_t *coll_rules;
/* Topo informations */
int nb_topo;
TOPO_LVL_T topo_lvl;
topologic_rule_t *topo_rules;
/* Configuration informations */
int nb_rules, conf_size;
configuration_rule_t *conf_rules;
/* Message size informations */
int nb_msg_size, msg_size;
msg_size_rule_t *msg_size_rules;
/* Component informations */
COMPONENT_T component;
nb_coll = mca_coll_han_component.dynamic_rules.nb_collectives;
coll_rules = mca_coll_han_component.dynamic_rules.collective_rules;
for(int i = 0; i < nb_coll; i++ ) {
coll_id = coll_rules[i].collective_id;
nb_topo = coll_rules[i].nb_topologic_levels;
topo_rules = coll_rules[i].topologic_rules;
for(int j = 0; j < nb_topo; j++ ) {
topo_lvl = topo_rules[j].topologic_level;
nb_rules = topo_rules[j].nb_rules;
conf_rules = topo_rules[j].configuration_rules;
for(int k = 0; k < nb_rules; k++ ) {
conf_size = conf_rules[k].configuration_size;
nb_msg_size = conf_rules[k].nb_msg_size;
msg_size_rules = conf_rules[k].msg_size_rules;
for(int l = 0; l < nb_msg_size; l++ ) {
msg_size = msg_size_rules[l].msg_size;
component = msg_size_rules[l].component;
opal_output(mca_coll_han_component.han_output,
"coll:han:dump_dynamic_rules %d collective %d (%s) "
"topology level %d (%s) configuration size %d "
"mesage size %d -> collective component %d (%s)\n",
nb_entries, coll_id, mca_coll_base_colltype_to_str(coll_id),
topo_lvl, mca_coll_han_topo_lvl_to_str(topo_lvl), conf_size,
msg_size, component, available_components[component].component_name);
nb_entries++;
}
}
}
}
}