1
1
openmpi/ompi/mca/coll/tuned/coll_tuned_module.c
Graham Fagg 607bdf51b6 Last Cleanup BEFORE adding last two methods and final cross over points.
- new mca param calls
- move printfs to OPAL_OUTPUT

This commit was SVN r7692.
2005-10-11 18:51:03 +00:00

440 строки
13 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_tuned.h"
#include <stdio.h>
#include "mpi.h"
#include "communicator/communicator.h"
#include "mca/base/mca_base_param.h"
#include "mca/coll/coll.h"
#include "mca/coll/base/base.h"
#include "coll_tuned.h"
#include "coll_tuned_topo.h"
/* from component.. shouldn't it be cached on the component somehow */
extern int mca_coll_tuned_use_dynamic_rules_param;
extern int mca_coll_tuned_init_tree_fanout_param;
extern int mca_coll_tuned_init_chain_fanout_param;
extern int mca_coll_tuned_preallocate_memory_comm_size_limit_param;
/*
* Which set are we using?
*/
static const mca_coll_base_module_1_0_0_t *to_use = NULL;
/*
* Intra communicator decision functions
*
* Two prototypes, one for fixed rules and one for dynamic rules
*
*/
static const mca_coll_base_module_1_0_0_t intra_fixed = {
/* Initialization / finalization functions */
mca_coll_tuned_module_init,
mca_coll_tuned_module_finalize,
/* Collective function pointers */
/* mca_coll_tuned_allgather_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_allgatherv_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_allreduce_intra_dec_fixed, */
NULL,
mca_coll_tuned_alltoall_intra_dec_fixed,
/* NULL, */
/* mca_coll_tuned_alltoallv_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_alltoallw_intra_dec_fixed, */
NULL,
mca_coll_tuned_barrier_intra_dec_fixed,
/* NULL, */
mca_coll_tuned_bcast_intra_dec_fixed,
/* NULL, */
/* mca_coll_tuned_exscan_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_gather_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_gatherv_intra_dec_fixed, */
NULL,
mca_coll_tuned_reduce_intra_dec_fixed,
/* NULL, */
/* mca_coll_tuned_reduce_scatter_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_scan_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_scatter_intra_dec_fixed, */
NULL,
/* mca_coll_tuned_scatterv_intra_dec_fixed */
NULL
};
static const mca_coll_base_module_1_0_0_t intra_dynamic = {
/* Initialization / finalization functions */
mca_coll_tuned_module_init,
mca_coll_tuned_module_finalize,
/* Collective function pointers */
/* mca_coll_tuned_allgather_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_allgatherv_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_allreduce_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_alltoall_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_alltoallv_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_alltoallw_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_barrier_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_bcast_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_exscan_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_gather_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_gatherv_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_reduce_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_reduce_scatter_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_scan_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_scatter_intra_dec_dynamic, */
NULL,
/* mca_coll_tuned_scatterv_intra_dec_dynamic */
NULL
};
/*
* collective decision functions for intercommunicators
*
* Two prototypes, one for fixed rules and one for dynamic rules
*
*/
static const mca_coll_base_module_1_0_0_t inter_fixed = {
/* Initialization / finalization functions */
mca_coll_tuned_module_init,
mca_coll_tuned_module_finalize,
/* Collective function pointers */
/* mca_coll_tuned_allgather_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_allgatherv_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_allreduce_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_alltoall_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_alltoallv_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_alltoallw_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_barrier_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_bcast_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_exscan_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_gather_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_gatherv_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_reduce_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_reduce_scatter_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_scan_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_scatter_inter_dec_fixed, */
NULL,
/* mca_coll_tuned_scatterv_inter_dec_fixed */
NULL
};
static const mca_coll_base_module_1_0_0_t inter_dynamic = {
/* Initialization / finalization functions */
mca_coll_tuned_module_init,
mca_coll_tuned_module_finalize,
/* Collective function pointers */
/* mca_coll_tuned_allgather_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_allgatherv_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_allreduce_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_alltoall_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_alltoallv_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_alltoallw_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_barrier_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_bcast_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_exscan_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_gather_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_gatherv_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_reduce_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_reduce_scatter_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_scan_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_scatter_inter_dec_dynamic, */
NULL,
/* mca_coll_tuned_scatterv_inter_dec_dynamic */
NULL
};
/*
* Note I keep the names here as place markers until all the functions
* are implemented
*/
/*
* Initial query function that is invoked during MPI_INIT, allowing
* this component to disqualify itself if it doesn't support the
* required level of thread support.
*/
int mca_coll_tuned_init_query(bool enable_progress_threads,
bool enable_mpi_threads)
{
/* Nothing to do */
return OMPI_SUCCESS;
}
/*
* Invoked when there's a new communicator that has been created.
* Look at the communicator and decide which set of functions and
* priority we want to return.
*/
const mca_coll_base_module_1_0_0_t *
mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
struct mca_coll_base_comm_t **data)
{
OPAL_OUTPUT((mca_coll_tuned_stream, "coll:tuned:module_tuned query called"));
*priority = mca_coll_tuned_priority;
/*
* Choose whether to use [intra|inter] decision functions
* and if using fixed OR dynamic rule sets.
* Right now you cannot mix them, maybe later on it can be changed
* but this would probably add an extra if and funct call to the path
*
*/
if (OMPI_COMM_IS_INTER(comm)) {
if (mca_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic"));
to_use = &inter_dynamic;
} else {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using inter_fixed"));
to_use = &inter_fixed;
}
} else { /* is an intra comm */
if (mca_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic"));
to_use = &intra_dynamic;
} else {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_query using intra_fixed"));
to_use = &intra_fixed;
}
}
return to_use;
}
/*
* Init module on the communicator
*/
const struct mca_coll_base_module_1_0_0_t *
mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
{
int size;
struct mca_coll_base_comm_t *data;
/* fanout parameters */
int pre_allocate = 1;
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init called."));
/* This routine will become more complex and might have to be */
/* broken into more sections/function calls */
/* Order of operations:
* alloc memory for nb reqs (in case we fall through)
* add decision rules if using dynamic rules
* compact rules using communicator size info etc
* build first guess cached topologies (might depend on the rules from above)
*
* then attach all to the communicator and return base module funct ptrs
*/
/* Allocate the data that hangs off the communicator */
if (OMPI_COMM_IS_INTER(comm)) {
size = ompi_comm_remote_size(comm);
} else {
size = ompi_comm_size(comm);
}
/*
* we still malloc data as it is used by the TUNED modules
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
* we place any special info after the default data
*
* BUT on very large systems we might not be able to allocate all this memory so
* we do check a MCA parameter to see if if we should allocate this memory
*
* The default is set very high
*
*/
/* if we within the memory/size limit, allow preallocated data */
if (size<=mca_coll_tuned_preallocate_memory_comm_size_limit) {
data = malloc(sizeof(struct mca_coll_base_comm_t) +
(sizeof(ompi_request_t *) * size * 2));
if (NULL == data) {
return NULL;
}
data->mcct_reqs = (ompi_request_t **) (data + 1);
data->mcct_num_reqs = size * 2;
}
else {
data = malloc(sizeof(struct mca_coll_base_comm_t));
if (NULL == data) {
data->mcct_reqs = (ompi_request_t **) NULL;
data->mcct_num_reqs = 0;
}
}
/*
* now for the cached topo functions
* guess the initial topologies to use rank 0 as root
*/
/* general n fan out tree */
data->cached_ntree = ompi_coll_tuned_topo_build_tree (mca_coll_tuned_init_tree_fanout, comm, 0);
data->cached_ntree_root = 0;
data->cached_ntree_fanout = mca_coll_tuned_init_tree_fanout;
/* binary tree */
data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0);
data->cached_bintree_root = 0;
/* binomial tree */
data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
data->cached_bmtree_root = 0;
/*
* chains (fanout followed by pipelines)
* are more difficuilt as the fan out really really depends on message size [sometimes]..
* as size gets larger fan-out gets smaller [usually]
*
* will probably change how we cache this later, for now a midsize
* GEF
*/
data->cached_chain = ompi_coll_tuned_topo_build_chain (mca_coll_tuned_init_chain_fanout, comm, 0);
data->cached_chain_root = 0;
data->cached_chain_fanout = mca_coll_tuned_init_chain_fanout;
/* standard pipeline */
data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
data->cached_pipeline_root = 0;
/* All done */
comm->c_coll_selected_data = data;
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
return to_use;
}
/*
* Finalize module on the communicator
*/
int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
{
if (NULL == comm->c_coll_selected_module) {
return OMPI_SUCCESS;
}
#if OMPI_ENABLE_DEBUG
/* Reset the reqs to NULL/0 -- they'll be freed as part of freeing
the generel c_coll_selected_data */
comm->c_coll_selected_data->mcct_reqs = NULL;
comm->c_coll_selected_data->mcct_num_reqs = 0;
#endif
/* free any cached information that has been allocated */
if (comm->c_coll_selected_data->cached_ntree) { /* destroy general tree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_ntree);
}
if (comm->c_coll_selected_data->cached_bintree) { /* destroy bintree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bintree);
}
if (comm->c_coll_selected_data->cached_bmtree) { /* destroy bmtree if defined */
ompi_coll_tuned_topo_destroy_tree (&comm->c_coll_selected_data->cached_bmtree);
}
if (comm->c_coll_selected_data->cached_chain) { /* destroy general chain if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_chain);
}
if (comm->c_coll_selected_data->cached_pipeline) { /* destroy pipeline if defined */
ompi_coll_tuned_topo_destroy_chain (&comm->c_coll_selected_data->cached_pipeline);
}
/* if allocated memory free it */
if (comm->c_coll_selected_data) {
free(comm->c_coll_selected_data);
comm->c_coll_selected_data = NULL;
}
return OMPI_SUCCESS;
}