snapshot while switching systems
but, dynamic rules from a user defined config file is almost there now This commit was SVN r7943.
Этот коммит содержится в:
родитель
e27dfb180d
Коммит
9547a635a9
@ -28,6 +28,29 @@
|
||||
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
|
||||
#include "coll_tuned_topo.h"
|
||||
|
||||
/* also need the dynamic rule structures */
|
||||
#include "coll_tuned_dynamic_rules.h"
|
||||
|
||||
/* some fixed value index vars to simplify certain operations */
|
||||
#define ALLGATHER 0
|
||||
#define ALLGATHERV 1
|
||||
#define ALLREDUCE 2
|
||||
#define ALLTOALL 3
|
||||
#define ALLTOALLV 4
|
||||
#define ALLTOALLW 5
|
||||
#define BARRIER 6
|
||||
#define BCAST 7
|
||||
#define EXSCAN 8
|
||||
#define GATHER 9
|
||||
#define GATHERV 10
|
||||
#define REDUCE 11
|
||||
#define REDUCESCATTER 11
|
||||
#define SCAN 12
|
||||
#define SCATTER 13
|
||||
#define SCATTERV 14
|
||||
#define COLLCOUNT 15
|
||||
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -38,35 +61,36 @@ extern "C" {
|
||||
|
||||
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component;
|
||||
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_stream;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_preallocate_memory_comm_size_limit;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_use_dynamic_rules;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_chain_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_stream;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_preallocate_memory_comm_size_limit;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_use_dynamic_rules;
|
||||
OMPI_COMP_EXPORT extern char* mca_coll_tuned_dynamic_rules_filename;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_chain_fanout;
|
||||
|
||||
/* forced algorithm choices */
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_chain_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_chain_fanout;
|
||||
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_chain_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_chain_fanout;
|
||||
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_barrier_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_barrier_forced_choice;
|
||||
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_chain_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_chain_fanout;
|
||||
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_choice;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_segsize;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_tree_fanout;
|
||||
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
|
||||
|
||||
/*
|
||||
* coll API functions
|
||||
@ -544,10 +568,6 @@ static inline void mca_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
|
||||
ompi_request_free(&reqs[i]);
|
||||
}
|
||||
|
||||
/* decision table declaraion */
|
||||
/* currently a place holder */
|
||||
typedef struct rule_s {
|
||||
} rule_t;
|
||||
|
||||
/*
|
||||
* Data structure for hanging data off the communicator
|
||||
@ -596,7 +616,7 @@ struct mca_coll_base_comm_t {
|
||||
int cached_pipeline_root;
|
||||
|
||||
/* extra data required by the decision functions */
|
||||
rule_t* decision_table;
|
||||
rule_t rules[COLLCOUNT];
|
||||
};
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
|
@ -36,12 +36,13 @@ const char *mca_coll_tuned_component_version_string =
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
int mca_coll_tuned_stream = -1;
|
||||
int mca_coll_tuned_priority = 30;
|
||||
int mca_coll_tuned_preallocate_memory_comm_size_limit = (32*1024);
|
||||
int mca_coll_tuned_use_dynamic_rules = 0;
|
||||
int mca_coll_tuned_init_tree_fanout = 4;
|
||||
int mca_coll_tuned_init_chain_fanout = 4;
|
||||
int mca_coll_tuned_stream = -1;
|
||||
int mca_coll_tuned_priority = 30;
|
||||
int mca_coll_tuned_preallocate_memory_comm_size_limit = (32*1024);
|
||||
int mca_coll_tuned_use_dynamic_rules = 0;
|
||||
char* mca_coll_tuned_dynamic_rules_filename = (char*) NULL;
|
||||
int mca_coll_tuned_init_tree_fanout = 4;
|
||||
int mca_coll_tuned_init_chain_fanout = 4;
|
||||
|
||||
/* forced alogrithm variables */
|
||||
int mca_coll_tuned_allreduce_forced_choice = 0;
|
||||
@ -145,6 +146,17 @@ static int tuned_open(void)
|
||||
false, false, mca_coll_tuned_use_dynamic_rules,
|
||||
&mca_coll_tuned_use_dynamic_rules);
|
||||
|
||||
/* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */
|
||||
if (mca_coll_tuned_use_dynamic_rules) {
|
||||
/* char *default_name; */
|
||||
/* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */
|
||||
mca_base_param_reg_string(&mca_coll_tuned_component.collm_version,
|
||||
"dynamic_rules_filename",
|
||||
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
|
||||
false, false, mca_coll_tuned_dynamic_rules_filename,
|
||||
&mca_coll_tuned_dynamic_rules_filename);
|
||||
}
|
||||
|
||||
/* some initial guesses at topology parameters */
|
||||
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
|
||||
"init_tree_fanout",
|
||||
|
@ -18,7 +18,7 @@
|
||||
#define MCA_COLL_TUNED_DYNAMIC_RULES_EXPORT_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_tuned.h"
|
||||
/* #include "coll_tuned.h" */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/include/constants.h"
|
||||
@ -97,7 +97,7 @@ typedef struct rule_s {
|
||||
|
||||
|
||||
|
||||
rule_t* mk_rule ();
|
||||
rule_t* mk_rule (void);
|
||||
|
||||
int mk_and_add_condition_to_rule (rule_t* rule, param_index_t param,
|
||||
condition_op_t op, int target);
|
||||
|
@ -27,13 +27,6 @@
|
||||
#include "coll_tuned.h"
|
||||
#include "coll_tuned_topo.h"
|
||||
|
||||
/* from component.. shouldn't it be cached on the component somehow */
|
||||
extern int mca_coll_tuned_use_dynamic_rules_param;
|
||||
extern int mca_coll_tuned_init_tree_fanout_param;
|
||||
extern int mca_coll_tuned_init_chain_fanout_param;
|
||||
extern int mca_coll_tuned_preallocate_memory_comm_size_limit_param;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Which set are we using?
|
||||
@ -292,7 +285,7 @@ mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
|
||||
const struct mca_coll_base_module_1_0_0_t *
|
||||
mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
{
|
||||
int size;
|
||||
int size, rank;
|
||||
struct mca_coll_base_comm_t *data;
|
||||
/* fanout parameters */
|
||||
int pre_allocate = 1;
|
||||
@ -320,6 +313,41 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
size = ompi_comm_size(comm);
|
||||
}
|
||||
|
||||
/*
|
||||
* If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file..
|
||||
* then this effects how much storage space you need
|
||||
* (This is a basic version of what will go into V2)
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
rank = ompi_comm_rank(comm); /* find rank as only MCW:0 opens any tuned conf files */
|
||||
|
||||
|
||||
if (!rank) {
|
||||
if (&ompi_mpi_comm_world==comm) {
|
||||
if (mca_coll_tuned_use_dynamic_rules) {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic"));
|
||||
if (mca_coll_tuned_dynamic_rules_filename) {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init Opening [%s]",
|
||||
mca_coll_tuned_dynamic_rules_filename));
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init MCW & NOT Dynamic"));
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (mca_coll_tuned_use_dynamic_rules) {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic"));
|
||||
}
|
||||
else {
|
||||
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init NOT MCW & NOT Dynamic"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* we still malloc data as it is used by the TUNED modules
|
||||
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
|
||||
@ -347,9 +375,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
|
||||
data = malloc(sizeof(struct mca_coll_base_comm_t));
|
||||
|
||||
if (NULL == data) {
|
||||
data->mcct_reqs = (ompi_request_t **) NULL;
|
||||
data->mcct_num_reqs = 0;
|
||||
return NULL;
|
||||
}
|
||||
data->mcct_reqs = (ompi_request_t **) NULL;
|
||||
data->mcct_num_reqs = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -437,3 +466,4 @@ int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user