1
1
openmpi/ompi/mca/coll/han/coll_han_module.c
bsergentm 220b997a58 Coll/han Bull
* first import of Bull specific modifications to HAN

* Cleaning, renaming and compilation fixing Changed all future into han.

* Import BULL specific modifications in coll/tuned and coll/base

* Fixed compilation issues in Han

* Changed han_output to directly point to coll framework output.

* The verbosity MCA parameter was removed as a duplicated of coll verbosity

* Add fallback in han reduce when op cannot commute and ppn are imbalanced

* Added fallback wfor han bcast when nodes do not have the same number of process

* Add fallback in han scatter when ppn are imbalanced

+ fixed missing scatter_fn pointer in the module interface

Signed-off-by: Brelle Emmanuel <emmanuel.brelle@atos.net>
Co-authored-by: a700850 <pierre.lemarinier@atos.net>
Co-authored-by: germainf <florent.germain@atos.net>
2020-10-09 14:17:46 -04:00

322 строки
12 KiB
C

/*
* Copyright (c) 2018-2020 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2020 Bull S.A.S. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mpi.h"
#include "coll_han.h"
#include "coll_han_dynamic.h"
/*
* Local functions
*/
static int han_module_enable(mca_coll_base_module_t * module,
struct ompi_communicator_t *comm);
static int mca_coll_han_module_disable(mca_coll_base_module_t * module,
struct ompi_communicator_t *comm);
/*
* Module constructor
*/
static void han_module_clear(mca_coll_han_module_t *han_module)
{
int i;
for (i = 0; i < COLLCOUNT; i++) {
/*
* Since the previous routines function pointers are declared as
* a union, initializing the dummy routineis enough
*/
han_module->previous_routines[i].previous_routine.dummy = NULL;
han_module->previous_routines[i].previous_module = NULL;
}
han_module->reproducible_reduce = NULL;
han_module->reproducible_reduce_module = NULL;
han_module->reproducible_allreduce = NULL;
han_module->reproducible_allreduce_module = NULL;
}
static void mca_coll_han_module_construct(mca_coll_han_module_t * module)
{
int i;
module->enabled = false;
module->super.coll_module_disable = mca_coll_han_module_disable;
module->cached_comm = NULL;
module->cached_low_comms = NULL;
module->cached_up_comms = NULL;
module->cached_vranks = NULL;
module->cached_topo = NULL;
module->is_mapbycore = false;
module->storage_initialized = false;
for (i = 0 ; i < NB_TOPO_LVL ; i++) {
module->sub_comm[i] = NULL;
}
for (i=SELF ; i<COMPONENTS_COUNT ; i++) {
module->modules_storage.modules[i].module_handler = NULL;
}
module->dynamic_errors = 0;
han_module_clear(module);
}
#define OBJ_RELEASE_IF_NOT_NULL(obj) do { \
if (NULL != (obj)) { \
OBJ_RELEASE(obj); \
} \
} while (0)
/*
* Module destructor
*/
static void mca_coll_han_module_destruct(mca_coll_han_module_t * module)
{
int i;
module->enabled = false;
if (module->cached_low_comms != NULL) {
for (i = 0; i < COLL_HAN_LOW_MODULES; i++) {
ompi_comm_free(&(module->cached_low_comms[i]));
module->cached_low_comms[i] = NULL;
}
free(module->cached_low_comms);
module->cached_low_comms = NULL;
}
if (module->cached_up_comms != NULL) {
for (i = 0; i < COLL_HAN_UP_MODULES; i++) {
ompi_comm_free(&(module->cached_up_comms[i]));
module->cached_up_comms[i] = NULL;
}
free(module->cached_up_comms);
module->cached_up_comms = NULL;
}
if (module->cached_vranks != NULL) {
free(module->cached_vranks);
module->cached_vranks = NULL;
}
if (module->cached_topo != NULL) {
free(module->cached_topo);
module->cached_topo = NULL;
}
for(i=0 ; i<NB_TOPO_LVL ; i++) {
if(NULL != module->sub_comm[i]) {
ompi_comm_free(&(module->sub_comm[i]));
}
}
OBJ_RELEASE_IF_NOT_NULL(module->previous_allgather_module);
OBJ_RELEASE_IF_NOT_NULL(module->previous_allreduce_module);
OBJ_RELEASE_IF_NOT_NULL(module->previous_bcast_module);
OBJ_RELEASE_IF_NOT_NULL(module->previous_gather_module);
OBJ_RELEASE_IF_NOT_NULL(module->previous_reduce_module);
OBJ_RELEASE_IF_NOT_NULL(module->previous_scatter_module);
han_module_clear(module);
}
OBJ_CLASS_INSTANCE(mca_coll_han_module_t,
mca_coll_base_module_t,
mca_coll_han_module_construct,
mca_coll_han_module_destruct);
/*
* Initial query function that is invoked during MPI_INIT, allowing
* this component to disqualify itself if it doesn't support the
* required level of thread support. This function is invoked exactly
* once.
*/
int mca_coll_han_init_query(bool enable_progress_threads,
bool enable_mpi_threads)
{
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:han:init_query: pick me! pick me!");
return OMPI_SUCCESS;
}
/*
* Invoked when there's a new communicator that has been created.
* Look at the communicator and decide which set of functions and
* priority we want to return.
*/
mca_coll_base_module_t *
mca_coll_han_comm_query(struct ompi_communicator_t * comm, int *priority)
{
mca_coll_han_module_t *han_module;
/*
* If we're intercomm, or if there's only one process in the communicator
*/
if (OMPI_COMM_IS_INTER(comm)) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:han:comm_query (%d/%s): intercomm; disqualifying myself",
comm->c_contextid, comm->c_name);
return NULL;
}
if (1 == ompi_comm_size(comm)) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:han:comm_query (%d/%s): comm is too small; disqualifying myself",
comm->c_contextid, comm->c_name);
return NULL;
}
/* Get the priority level attached to this module. If priority is less
* than or equal to 0, then the module is unavailable. */
*priority = mca_coll_han_component.han_priority;
if (mca_coll_han_component.han_priority <= 0) {
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:han:comm_query (%d/%s): priority too low; disqualifying myself",
comm->c_contextid, comm->c_name);
return NULL;
}
han_module = OBJ_NEW(mca_coll_han_module_t);
if (NULL == han_module) {
return NULL;
}
/* All is good -- return a module */
han_module->topologic_level = mca_coll_han_component.topo_level;
/*
* TODO: When the selector is fully implemented,
* this if will be meaningless
*/
if (GLOBAL_COMMUNICATOR == han_module->topologic_level) {
/* We are on the global communicator, return topological algorithms */
han_module->super.coll_module_enable = han_module_enable;
han_module->super.ft_event = NULL;
han_module->super.coll_allgather = mca_coll_han_allgather_intra_dynamic;
han_module->super.coll_allgatherv = NULL;
han_module->super.coll_allreduce = mca_coll_han_allreduce_intra_dynamic;
han_module->super.coll_alltoall = NULL;
han_module->super.coll_alltoallv = NULL;
han_module->super.coll_alltoallw = NULL;
han_module->super.coll_barrier = NULL;
han_module->super.coll_bcast = mca_coll_han_bcast_intra_dynamic;
han_module->super.coll_exscan = NULL;
han_module->super.coll_gather = mca_coll_han_gather_intra_dynamic;
han_module->super.coll_gatherv = NULL;
han_module->super.coll_reduce = mca_coll_han_reduce_intra_dynamic;
han_module->super.coll_reduce_scatter = NULL;
han_module->super.coll_scan = NULL;
han_module->super.coll_scatter = mca_coll_han_scatter_intra_dynamic;
han_module->super.coll_scatterv = NULL;
} else {
/* We are on a topologic sub-communicator, return only the selector */
han_module->super.coll_module_enable = han_module_enable;
han_module->super.ft_event = NULL;
han_module->super.coll_allgather = mca_coll_han_allgather_intra_dynamic;
han_module->super.coll_allgatherv = mca_coll_han_allgatherv_intra_dynamic;
han_module->super.coll_allreduce = mca_coll_han_allreduce_intra_dynamic;
han_module->super.coll_alltoall = NULL;
han_module->super.coll_alltoallv = NULL;
han_module->super.coll_alltoallw = NULL;
han_module->super.coll_barrier = NULL;
han_module->super.coll_bcast = mca_coll_han_bcast_intra_dynamic;
han_module->super.coll_exscan = NULL;
han_module->super.coll_gather = mca_coll_han_gather_intra_dynamic;
han_module->super.coll_gatherv = NULL;
han_module->super.coll_reduce = mca_coll_han_reduce_intra_dynamic;
han_module->super.coll_reduce_scatter = NULL;
han_module->super.coll_scan = NULL;
han_module->super.coll_scatter = mca_coll_han_scatter_intra_dynamic;
han_module->super.coll_scatterv = NULL;
}
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
"coll:han:comm_query (%d/%s): pick me! pick me!",
comm->c_contextid, comm->c_name);
return &(han_module->super);
}
/*
* In this macro, the following variables are supposed to have been declared
* in the caller:
* . ompi_communicator_t *comm
* . mca_coll_han_module_t *han_module
*/
#define HAN_SAVE_PREV_COLL_API(__api) do { \
han_module->previous_ ## __api = comm->c_coll->coll_ ## __api; \
han_module->previous_ ## __api ## _module = comm->c_coll->coll_ ## __api ## _module;\
if (!comm->c_coll->coll_ ## __api || !comm->c_coll->coll_ ## __api ## _module) { \
opal_output_verbose(1, ompi_coll_base_framework.framework_output, \
"(%d/%s): no underlying " # __api"; disqualifying myself", \
comm->c_contextid, comm->c_name); \
return OMPI_ERROR; \
} \
/* TODO add a OBJ_RELEASE at module disabling */ \
/* + FIXME find why releasing generates memory corruption */ \
OBJ_RETAIN(han_module->previous_ ## __api ## _module); \
} while(0)
/*
* Init module on the communicator
*/
static int han_module_enable(mca_coll_base_module_t * module,
struct ompi_communicator_t *comm)
{
mca_coll_han_module_t * han_module = (mca_coll_han_module_t*) module;
HAN_SAVE_PREV_COLL_API(allgather);
HAN_SAVE_PREV_COLL_API(allgatherv);
HAN_SAVE_PREV_COLL_API(allreduce);
HAN_SAVE_PREV_COLL_API(bcast);
HAN_SAVE_PREV_COLL_API(gather);
HAN_SAVE_PREV_COLL_API(reduce);
HAN_SAVE_PREV_COLL_API(scatter);
/* set reproducible algos */
mca_coll_han_reduce_reproducible_decision(comm, module);
mca_coll_han_allreduce_reproducible_decision(comm, module);
return OMPI_SUCCESS;
}
/*
* Module disable
*/
static int mca_coll_han_module_disable(mca_coll_base_module_t * module,
struct ompi_communicator_t *comm)
{
mca_coll_han_module_t * han_module = (mca_coll_han_module_t *) module;
OBJ_RELEASE_IF_NOT_NULL(han_module->previous_allgather_module);
OBJ_RELEASE_IF_NOT_NULL(han_module->previous_allgatherv_module);
OBJ_RELEASE_IF_NOT_NULL(han_module->previous_allreduce_module);
OBJ_RELEASE_IF_NOT_NULL(han_module->previous_bcast_module);
OBJ_RELEASE_IF_NOT_NULL(han_module->previous_gather_module);
OBJ_RELEASE_IF_NOT_NULL(han_module->previous_reduce_module);
OBJ_RELEASE_IF_NOT_NULL(han_module->previous_scatter_module);
han_module_clear(han_module);
return OMPI_SUCCESS;
}
/*
* Free the han request
*/
int han_request_free(ompi_request_t ** request)
{
(*request)->req_state = OMPI_REQUEST_INVALID;
OBJ_RELEASE(*request);
*request = MPI_REQUEST_NULL;
return OMPI_SUCCESS;
}