Start moving stuff out of modules that should be in the component.
This commit was SVN r8874.
Этот коммит содержится в:
родитель
23118171d6
Коммит
232bb9534a
@ -79,7 +79,7 @@ extern "C" {
|
||||
* Globally exported variable
|
||||
*/
|
||||
|
||||
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component;
|
||||
/* OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component; */
|
||||
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_stream;
|
||||
OMPI_COMP_EXPORT extern int ompi_coll_tuned_priority;
|
||||
@ -290,9 +290,40 @@ static inline void ompi_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
|
||||
ompi_request_free(&reqs[i]);
|
||||
}
|
||||
|
||||
struct mca_coll_tuned_component_t {
|
||||
/** Base coll component */
|
||||
mca_coll_base_component_1_0_0_t super;
|
||||
|
||||
/** MCA parameter: Priority of this component */
|
||||
int tuned_priority;
|
||||
|
||||
/** global stuff that I need the component to store */
|
||||
|
||||
/* MCA parameters first */
|
||||
|
||||
/* cached decision table stuff (moved from MCW module) */
|
||||
ompi_coll_alg_rule_t *all_base_rules;
|
||||
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t;
|
||||
|
||||
/**
|
||||
* Global component instance
|
||||
*/
|
||||
extern mca_coll_tuned_component_t mca_coll_tuned_component;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Data structure for hanging data off the communicator
|
||||
* i.e. per module instance
|
||||
*/
|
||||
struct mca_coll_base_comm_t {
|
||||
/* standard data for requests and PML usage */
|
||||
@ -339,9 +370,16 @@ struct mca_coll_base_comm_t {
|
||||
|
||||
/* extra data required by the decision functions */
|
||||
ompi_coll_alg_rule_t *all_base_rules; /* stored only on MCW, all other coms ref it */
|
||||
/* moving to the component */
|
||||
ompi_coll_com_rule_t *com_rules[COLLCOUNT]; /* the communicator rules for each MPI collective for ONLY my comsize */
|
||||
};
|
||||
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct mca_coll_base_comm_t mca_coll_base_comm_t;
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
@ -138,26 +138,26 @@ ompi_coll_tuned_allreduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
int ompi_coll_tuned_allreduce_intra_check_forced ( )
|
||||
{
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm",
|
||||
"Which allreduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 nonoverlapping (tuned reduce + tuned bcast)",
|
||||
false, false, ompi_coll_tuned_allreduce_forced_choice,
|
||||
&ompi_coll_tuned_allreduce_forced_choice);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for allreduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_allreduce_forced_segsize,
|
||||
&ompi_coll_tuned_allreduce_forced_segsize);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for allreduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_allreduce_forced_tree_fanout);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"allreduce_algorithm_chain_fanout",
|
||||
"Fanout for chains used for allreduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
|
@ -443,26 +443,26 @@ int ompi_coll_tuned_alltoall_intra_basic_linear(void *sbuf, int scount,
|
||||
int ompi_coll_tuned_alltoall_intra_check_forced ( )
|
||||
{
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm",
|
||||
"Which alltoall algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 pairwise, 3: modified bruck, 4: two proc only.",
|
||||
false, false, ompi_coll_tuned_alltoall_forced_choice,
|
||||
&ompi_coll_tuned_alltoall_forced_choice);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for alltoall algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_alltoall_forced_segsize,
|
||||
&ompi_coll_tuned_alltoall_forced_segsize);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for alltoall algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_alltoall_forced_tree_fanout);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"alltoall_algorithm_chain_fanout",
|
||||
"Fanout for chains used for alltoall algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
|
@ -328,7 +328,7 @@ static int ompi_coll_tuned_barrier_intra_basic_linear(struct ompi_communicator_t
|
||||
int ompi_coll_tuned_barrier_intra_check_forced ( )
|
||||
{
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"barrier_algorithm",
|
||||
"Which barrier algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 double ring, 3: recursive doubling 4: bruck, 5: two proc only, 6: step based bmtree",
|
||||
false, false, ompi_coll_tuned_barrier_forced_choice,
|
||||
|
@ -780,26 +780,26 @@ ompi_coll_tuned_bcast_intra_basic_linear (void *buff, int count,
|
||||
int ompi_coll_tuned_bcast_intra_check_forced ( )
|
||||
{
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm",
|
||||
"Which bcast algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 chain, 3: pipeline, 4: split binary tree, 5: binary tree, 6: BM tree.",
|
||||
false, false, ompi_coll_tuned_bcast_forced_choice,
|
||||
&ompi_coll_tuned_bcast_forced_choice);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for bcast algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_bcast_forced_segsize,
|
||||
&ompi_coll_tuned_bcast_forced_segsize);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for bcast algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_bcast_forced_tree_fanout);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"bcast_algorithm_chain_fanout",
|
||||
"Fanout for chains used for bcast algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
|
@ -76,49 +76,62 @@ int ompi_coll_tuned_reduce_forced_tree_fanout = 0;
|
||||
* Local function
|
||||
*/
|
||||
static int tuned_open(void);
|
||||
static int tuned_close(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
|
||||
const mca_coll_base_component_1_0_0_t mca_coll_tuned_component = {
|
||||
mca_coll_tuned_component_t mca_coll_tuned_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
/* First, fill in the super (mca_coll_base_component_1_0_0_t) */
|
||||
|
||||
{
|
||||
/* Indicate that we are a coll v1.0.0 component (which also implies a
|
||||
specific MCA version) */
|
||||
{
|
||||
|
||||
MCA_COLL_BASE_VERSION_1_0_0,
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
/* Component name and version */
|
||||
{
|
||||
/* Indicate that we are a coll v1.0.0 component (which also implies a
|
||||
specific MCA version) */
|
||||
|
||||
"tuned",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
MCA_COLL_BASE_VERSION_1_0_0,
|
||||
|
||||
/* Component open and close functions */
|
||||
/* Component name and version */
|
||||
|
||||
tuned_open,
|
||||
NULL
|
||||
},
|
||||
"tuned",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
/* Component open and close functions */
|
||||
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
tuned_open,
|
||||
tuned_close
|
||||
},
|
||||
|
||||
true
|
||||
},
|
||||
/* Next the MCA v1.0.0 component meta data */
|
||||
|
||||
/* Initialization / querying functions */
|
||||
{
|
||||
/* Whether the component is checkpointable or not */
|
||||
|
||||
ompi_coll_tuned_init_query,
|
||||
ompi_coll_tuned_comm_query,
|
||||
NULL
|
||||
true
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
ompi_coll_tuned_init_query,
|
||||
ompi_coll_tuned_comm_query,
|
||||
NULL
|
||||
},
|
||||
|
||||
/* priority of the module */
|
||||
0,
|
||||
|
||||
/* Tuned component specific information */
|
||||
/* Note some of this WAS in the module */
|
||||
NULL /* ompi_coll_alg_rule_t ptr */
|
||||
};
|
||||
|
||||
|
||||
@ -130,21 +143,21 @@ static int tuned_open(void)
|
||||
|
||||
/* Use a low priority, but allow other components to be lower */
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"priority",
|
||||
"Priority of the tuned coll component",
|
||||
false, false, ompi_coll_tuned_priority,
|
||||
&ompi_coll_tuned_priority);
|
||||
|
||||
/* parameter for pre-allocated memory requests etc */
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"pre_allocate_memory_comm_size_limit",
|
||||
"Size of communicator were we stop pre-allocating memory for the fixed internal buffer used for message requests etc that is hung off the communicator data segment. I.e. if you have a 100'000 nodes you might not want to pre-allocate 200'000 request handle slots per communicator instance!",
|
||||
false, false, ompi_coll_tuned_preallocate_memory_comm_size_limit,
|
||||
&ompi_coll_tuned_preallocate_memory_comm_size_limit);
|
||||
|
||||
/* by default DISABLE dynamic rules and instead use fixed [if based] rules */
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"use_dynamic_rules",
|
||||
"Switch used to decide if we use static (if statements) or dynamic (built at runtime) decision function rules",
|
||||
false, false, ompi_coll_tuned_use_dynamic_rules,
|
||||
@ -155,7 +168,7 @@ static int tuned_open(void)
|
||||
if (ompi_coll_tuned_use_dynamic_rules) {
|
||||
/* char *default_name; */
|
||||
/* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */
|
||||
mca_base_param_reg_string(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_string(&mca_coll_tuned_component.super.collm_version,
|
||||
"dynamic_rules_filename",
|
||||
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
|
||||
false, false, ompi_coll_tuned_dynamic_rules_filename,
|
||||
@ -163,13 +176,13 @@ static int tuned_open(void)
|
||||
}
|
||||
|
||||
/* some initial guesses at topology parameters */
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"init_tree_fanout",
|
||||
"Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
|
||||
false, false, ompi_coll_tuned_init_tree_fanout,
|
||||
&ompi_coll_tuned_init_tree_fanout);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"init_chain_fanout",
|
||||
"Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
|
||||
false, false, ompi_coll_tuned_init_chain_fanout,
|
||||
@ -200,3 +213,17 @@ static int tuned_open(void)
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* here we should clean up state stored on the component */
|
||||
/* i.e. alg table and dynamic changable rules if allocated etc */
|
||||
static int tuned_close(void)
|
||||
{
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: called"));
|
||||
|
||||
/* dealloc alg table if allocated */
|
||||
/* dealloc dynamic changable rules if allocated */
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!"));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -19,8 +19,6 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "include/constants.h"
|
||||
#include "include/types.h"
|
||||
#include "datatype/datatype.h"
|
||||
#include "communicator/communicator.h"
|
||||
#include "mca/coll/coll.h"
|
||||
@ -221,7 +219,7 @@ int ompi_coll_tuned_reduce_intra_dec_fixed( void *sendbuf, void *recvbuf,
|
||||
MPI_Aint ext;
|
||||
long lb;
|
||||
int segsize = 0;
|
||||
int fanout = 0;
|
||||
/* int fanout = 0; */
|
||||
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_reduce_intra_dec_fixed"));
|
||||
|
@ -447,26 +447,26 @@ ompi_coll_tuned_reduce_intra_basic_linear(void *sbuf, void *rbuf, int count,
|
||||
int ompi_coll_tuned_reduce_intra_check_forced ( )
|
||||
{
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm",
|
||||
"Which reduce algorithm is used. Can be locked down to choice of: 0 ignore, 1 linear, 2 chain, 3 pipeline",
|
||||
false, false, ompi_coll_tuned_reduce_forced_choice,
|
||||
&ompi_coll_tuned_reduce_forced_choice);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm_segmentsize",
|
||||
"Segment size in bytes used by default for reduce algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation.",
|
||||
false, false, ompi_coll_tuned_reduce_forced_segsize,
|
||||
&ompi_coll_tuned_reduce_forced_segsize);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm_tree_fanout",
|
||||
"Fanout for n-tree used for reduce algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation.",
|
||||
false, false,
|
||||
ompi_coll_tuned_init_tree_fanout, /* get system wide default */
|
||||
&ompi_coll_tuned_reduce_forced_tree_fanout);
|
||||
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_algorithm_chain_fanout",
|
||||
"Fanout for chains used for reduce algorithms. Only has meaning if algorithm is forced and supports chain topo based operation.",
|
||||
false, false,
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user