1
1

snapshot while switching systems

but, dynamic rules from a user defined config file is almost there now

This commit was SVN r7943.
Этот коммит содержится в:
Graham Fagg 2005-11-01 00:19:05 +00:00
родитель e27dfb180d
Коммит 9547a635a9
4 изменённых файлов: 108 добавлений и 46 удалений

Просмотреть файл

@ -28,6 +28,29 @@
/* need to include our own topo prototypes so we can malloc data on the comm correctly */
#include "coll_tuned_topo.h"
/* also need the dynamic rule structures */
#include "coll_tuned_dynamic_rules.h"
/* some fixed value index vars to simplify certain operations */
#define ALLGATHER 0
#define ALLGATHERV 1
#define ALLREDUCE 2
#define ALLTOALL 3
#define ALLTOALLV 4
#define ALLTOALLW 5
#define BARRIER 6
#define BCAST 7
#define EXSCAN 8
#define GATHER 9
#define GATHERV 10
#define REDUCE 11
#define REDUCESCATTER 11
#define SCAN 12
#define SCATTER 13
#define SCATTERV 14
#define COLLCOUNT 15
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
@ -38,35 +61,36 @@ extern "C" {
OMPI_COMP_EXPORT extern const mca_coll_base_component_1_0_0_t mca_coll_tuned_component;
OMPI_COMP_EXPORT extern int mca_coll_tuned_stream;
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority;
OMPI_COMP_EXPORT extern int mca_coll_tuned_preallocate_memory_comm_size_limit;
OMPI_COMP_EXPORT extern int mca_coll_tuned_use_dynamic_rules;
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_stream;
OMPI_COMP_EXPORT extern int mca_coll_tuned_priority;
OMPI_COMP_EXPORT extern int mca_coll_tuned_preallocate_memory_comm_size_limit;
OMPI_COMP_EXPORT extern int mca_coll_tuned_use_dynamic_rules;
OMPI_COMP_EXPORT extern char* mca_coll_tuned_dynamic_rules_filename;
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_init_chain_fanout;
/* forced algorithm choices */
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_allreduce_forced_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_alltoall_forced_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_barrier_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_barrier_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_bcast_forced_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_choice;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_segsize;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_tree_fanout;
OMPI_COMP_EXPORT extern int mca_coll_tuned_reduce_forced_chain_fanout;
/*
* coll API functions
@ -544,10 +568,6 @@ static inline void mca_coll_tuned_free_reqs(ompi_request_t **reqs, int count)
ompi_request_free(&reqs[i]);
}
/* decision table declaraion */
/* currently a place holder */
typedef struct rule_s {
} rule_t;
/*
* Data structure for hanging data off the communicator
@ -596,7 +616,7 @@ struct mca_coll_base_comm_t {
int cached_pipeline_root;
/* extra data required by the decision functions */
rule_t* decision_table;
rule_t rules[COLLCOUNT];
};
#if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -36,12 +36,13 @@ const char *mca_coll_tuned_component_version_string =
/*
* Global variable
*/
int mca_coll_tuned_stream = -1;
int mca_coll_tuned_priority = 30;
int mca_coll_tuned_preallocate_memory_comm_size_limit = (32*1024);
int mca_coll_tuned_use_dynamic_rules = 0;
int mca_coll_tuned_init_tree_fanout = 4;
int mca_coll_tuned_init_chain_fanout = 4;
int mca_coll_tuned_stream = -1;
int mca_coll_tuned_priority = 30;
int mca_coll_tuned_preallocate_memory_comm_size_limit = (32*1024);
int mca_coll_tuned_use_dynamic_rules = 0;
char* mca_coll_tuned_dynamic_rules_filename = (char*) NULL;
int mca_coll_tuned_init_tree_fanout = 4;
int mca_coll_tuned_init_chain_fanout = 4;
/* forced alogrithm variables */
int mca_coll_tuned_allreduce_forced_choice = 0;
@ -145,6 +146,17 @@ static int tuned_open(void)
false, false, mca_coll_tuned_use_dynamic_rules,
&mca_coll_tuned_use_dynamic_rules);
/* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */
if (mca_coll_tuned_use_dynamic_rules) {
/* char *default_name; */
/* asprintf(&default_name, "~/.openmpi/openmpi-coll-tuned-params.conf"); */
mca_base_param_reg_string(&mca_coll_tuned_component.collm_version,
"dynamic_rules_filename",
"Filename of configuration file that contains the dynamic (@runtime) decision function rules",
false, false, mca_coll_tuned_dynamic_rules_filename,
&mca_coll_tuned_dynamic_rules_filename);
}
/* some initial guesses at topology parameters */
mca_base_param_reg_int(&mca_coll_tuned_component.collm_version,
"init_tree_fanout",

Просмотреть файл

@ -18,7 +18,7 @@
#define MCA_COLL_TUNED_DYNAMIC_RULES_EXPORT_H
#include "ompi_config.h"
#include "coll_tuned.h"
/* #include "coll_tuned.h" */
#include "mpi.h"
#include "ompi/include/constants.h"
@ -97,7 +97,7 @@ typedef struct rule_s {
rule_t* mk_rule ();
rule_t* mk_rule (void);
int mk_and_add_condition_to_rule (rule_t* rule, param_index_t param,
condition_op_t op, int target);

Просмотреть файл

@ -27,13 +27,6 @@
#include "coll_tuned.h"
#include "coll_tuned_topo.h"
/* from component.. shouldn't it be cached on the component somehow */
extern int mca_coll_tuned_use_dynamic_rules_param;
extern int mca_coll_tuned_init_tree_fanout_param;
extern int mca_coll_tuned_init_chain_fanout_param;
extern int mca_coll_tuned_preallocate_memory_comm_size_limit_param;
/*
* Which set are we using?
@ -292,7 +285,7 @@ mca_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
const struct mca_coll_base_module_1_0_0_t *
mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
{
int size;
int size, rank;
struct mca_coll_base_comm_t *data;
/* fanout parameters */
int pre_allocate = 1;
@ -320,6 +313,41 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
size = ompi_comm_size(comm);
}
/*
* If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file..
* then this effects how much storage space you need
* (This is a basic version of what will go into V2)
*
*/
rank = ompi_comm_rank(comm); /* find rank as only MCW:0 opens any tuned conf files */
if (!rank) {
if (&ompi_mpi_comm_world==comm) {
if (mca_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic"));
if (mca_coll_tuned_dynamic_rules_filename) {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init Opening [%s]",
mca_coll_tuned_dynamic_rules_filename));
}
}
else {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init MCW & NOT Dynamic"));
}
}
else {
if (mca_coll_tuned_use_dynamic_rules) {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic"));
}
else {
OPAL_OUTPUT((mca_coll_tuned_stream,"coll:tuned:module_init NOT MCW & NOT Dynamic"));
}
}
}
/*
* we still malloc data as it is used by the TUNED modules
* if we don't allocate it and fall back to a BASIC module routine then confuses debuggers
@ -347,9 +375,10 @@ mca_coll_tuned_module_init(struct ompi_communicator_t *comm)
data = malloc(sizeof(struct mca_coll_base_comm_t));
if (NULL == data) {
data->mcct_reqs = (ompi_request_t **) NULL;
data->mcct_num_reqs = 0;
return NULL;
}
data->mcct_reqs = (ompi_request_t **) NULL;
data->mcct_num_reqs = 0;
}
/*
@ -437,3 +466,4 @@ int mca_coll_tuned_module_finalize(struct ompi_communicator_t *comm)
}
return OMPI_SUCCESS;
}