1
1

Start moving stuff out of modules that should be in the component.

This commit was SVN r8874.
Этот коммит содержится в:
Graham Fagg 2006-02-01 20:50:14 +00:00
родитель 23118171d6
Коммит 232bb9534a
8 изменённых файлов: 115 добавлений и 52 удалений

Просмотреть файл

@ -79,7 +79,7 @@ extern "C" {
* Globally exported variable
*/
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component;
/* OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component; */
OMPI_COMP_EXPORT extern int ompi_coll_tuned_stream;
OMPI_COMP_EXPORT extern int ompi_coll_tuned_priority;
@ -290,9 +290,40 @@ static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
ompi_request_free(&reqs[i]);
}
struct mca_coll_tuned_component_t {
/** Base coll component */
mca_coll_base_component_1_0_0_t super;
/** MCA parameter: Priority of this component */
int tuned_priority;
/** global stuff that I need the component to store */
/* MCA parameters first */
/* cached decision table stuff (moved from MCW module) */
ompi_coll_alg_rule_t *all_base_rules;
};
/**
* Convenience typedef
*/
typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t;
/**
* Global component instance
*/
extern mca_coll_tuned_component_t mca_coll_tuned_component;
/*
* Data structure for hanging data off the communicator
* i.e. per module instance
*/
struct mca_coll_base_comm_t {
/* standard data for requests and PML usage */
@ -339,9 +370,16 @@ struct mca_coll_base_comm_t {
/* extra data required by the decision functions */
ompi_coll_alg_rule_t *all_base_rules; /* stored only on MCW, all other coms ref it */
/* moving to the component */
ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */
};
/**
* Convenience typedef
*/
typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -138,26 +138,26 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
int ompi_coll_tuned_allreduce_intra_check_forced ( )
{
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm",
"Which allreduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)",
false, false, ompi_coll_tuned_allreduce_forced_choice,
&ompi_coll_tuned_allreduce_forced_choice);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_segmentsize",
"Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, ompi_coll_tuned_allreduce_forced_segsize,
&ompi_coll_tuned_allreduce_forced_segsize);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_tree_fanout",
"Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
&ompi_coll_tuned_allreduce_forced_tree_fanout);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"allreduce_algorithm_chain_fanout",
"Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,

Просмотреть файл

@ -443,26 +443,26 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
int ompi_coll_tuned_alltoall_intra_check_forced ( )
{
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm",
"Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: two proc only.",
false, false, ompi_coll_tuned_alltoall_forced_choice,
&ompi_coll_tuned_alltoall_forced_choice);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_segmentsize",
"Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, ompi_coll_tuned_alltoall_forced_segsize,
&ompi_coll_tuned_alltoall_forced_segsize);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_tree_fanout",
"Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
&ompi_coll_tuned_alltoall_forced_tree_fanout);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"alltoall_algorithm_chain_fanout",
"Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,

Просмотреть файл

@ -328,7 +328,7 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
int ompi_coll_tuned_barrier_intra_check_forced ( )
{
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"barrier_algorithm",
"Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: step based bmtree",
false, false, ompi_coll_tuned_barrier_forced_choice,

Просмотреть файл

@ -780,26 +780,26 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
int ompi_coll_tuned_bcast_intra_check_forced ( )
{
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm",
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: BM tree.",
false, false, ompi_coll_tuned_bcast_forced_choice,
&ompi_coll_tuned_bcast_forced_choice);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_segmentsize",
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, ompi_coll_tuned_bcast_forced_segsize,
&ompi_coll_tuned_bcast_forced_segsize);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_tree_fanout",
"Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
&ompi_coll_tuned_bcast_forced_tree_fanout);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"bcast_algorithm_chain_fanout",
"Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,

Просмотреть файл

@ -76,49 +76,62 @@ int ompi_coll_tuned_reduce_forced_tree_fanout = 0;
* Local function
*/
static int tuned_open(void);
static int tuned_close(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
const mca_coll_base_component_1_0_0_t mca_coll_tuned_component = {
mca_coll_tuned_component_t mca_coll_tuned_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
/* First, fill in the super (mca_coll_base_component_1_0_0_t) */
{
/* Indicate that we are a coll v1.0.0 component (which also implies a
specific MCA version) */
{
MCA_COLL_BASE_VERSION_1_0_0,
/* First, the mca_component_t struct containing meta information
about the component itself */
/* Component name and version */
{
/* Indicate that we are a coll v1.0.0 component (which also implies a
specific MCA version) */
"tuned",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
MCA_COLL_BASE_VERSION_1_0_0,
/* Component open and close functions */
/* Component name and version */
tuned_open,
NULL
},
"tuned",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Next the MCA v1.0.0 component meta data */
/* Component open and close functions */
{
/* Whether the component is checkpointable or not */
tuned_open,
tuned_close
},
true
},
/* Next the MCA v1.0.0 component meta data */
/* Initialization / querying functions */
{
/* Whether the component is checkpointable or not */
ompi_coll_tuned_init_query,
ompi_coll_tuned_comm_query,
NULL
true
},
/* Initialization / querying functions */
ompi_coll_tuned_init_query,
ompi_coll_tuned_comm_query,
NULL
},
/* priority of the module */
0,
/* Tuned component specific information */
/* Note some of this WAS in the module */
NULL /* ompi_coll_alg_rule_t ptr */
};
@ -130,21 +143,21 @@ static int tuned_open(void)
/* Use a low priority, but allow other components to be lower */
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"priority",
"Priority of the tuned coll component",
false, false, ompi_coll_tuned_priority,
&ompi_coll_tuned_priority);
/* parameter for pre-allocated memory requests etc */
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"pre_allocate_memory_comm_size_limit",
"Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!",
false, false, ompi_coll_tuned_preallocate_memory_comm_size_limit,
&ompi_coll_tuned_preallocate_memory_comm_size_limit);
/* by default DISABLE dynamic rules and instead use fixed [if based] rules */
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"use_dynamic_rules",
"Switch used to decide if we use static (if statements) or dynamic (built at runtime) decision function rules",
false, false, ompi_coll_tuned_use_dynamic_rules,
@ -155,7 +168,7 @@ static int tuned_open(void)
if (ompi_coll_tuned_use_dynamic_rules) {
/* char *default_name; */
/* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */
mca_base_param_reg_string(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_string(&mca_coll_tuned_component.super.collm_version,
"dynamic_rules_filename",
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
false, false, ompi_coll_tuned_dynamic_rules_filename,
@ -163,13 +176,13 @@ static int tuned_open(void)
}
/* some initial guesses at topology parameters */
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"init_tree_fanout",
"Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
false, false, ompi_coll_tuned_init_tree_fanout,
&ompi_coll_tuned_init_tree_fanout);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"init_chain_fanout",
"Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
false, false, ompi_coll_tuned_init_chain_fanout,
@ -200,3 +213,17 @@ static int tuned_open(void)
return OMPI_SUCCESS;
}
/* here we should clean up state stored on the component */
/* i.e. alg table and dynamic changable rules if allocated etc */
static int tuned_close(void)
{
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: called"));
/* dealloc alg table if allocated */
/* dealloc dynamic changable rules if allocated */
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!"));
return OMPI_SUCCESS;
}

Просмотреть файл

@ -19,8 +19,6 @@
#include "ompi_config.h"
#include "mpi.h"
#include "include/constants.h"
#include "include/types.h"
#include "datatype/datatype.h"
#include "communicator/communicator.h"
#include "mca/coll/coll.h"
@ -221,7 +219,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
MPI_Aint ext;
long lb;
int segsize = 0;
int fanout = 0;
/* int fanout = 0; */
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed"));

Просмотреть файл

@ -447,26 +447,26 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
int ompi_coll_tuned_reduce_intra_check_forced ( )
{
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm",
"Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline",
false, false, ompi_coll_tuned_reduce_forced_choice,
&ompi_coll_tuned_reduce_forced_choice);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_segmentsize",
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
false, false, ompi_coll_tuned_reduce_forced_segsize,
&ompi_coll_tuned_reduce_forced_segsize);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_tree_fanout",
"Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
false, false,
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
&ompi_coll_tuned_reduce_forced_tree_fanout);
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
"reduce_algorithm_chain_fanout",
"Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
false, false,