1
1
Optimize_topology is commented for now until bug resolved in TM

Signed-off-by: Guillaume Mercier <guillaume.mercier@bordeaux-inp.fr>
Этот коммит содержится в:
Guillaume Mercier 2018-11-28 12:08:20 +01:00 коммит произвёл George Bosilca
родитель c6f73e8883
Коммит 27aa34e53f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 09C926752C9F09B1
9 изменённых файлов: 115 добавлений и 80 удалений

Просмотреть файл

@ -3,8 +3,8 @@
* Copyright (c) 2011-2017 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2011-2016 INRIA. All rights reserved.
* Copyright (c) 2012-2017 Bordeaux Polytechnic Institute
* Copyright (c) 2011-2018 Inria. All rights reserved.
* Copyright (c) 2011-2018 Bordeaux Polytechnic Institute
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
@ -36,7 +36,7 @@
#include "opal/mca/pmix/pmix.h"
/* #define __DEBUG__ 1 */
/* #define __DEBUG__ 1 */
/**
* This function is a allreduce between all processes to detect for oversubscription.
@ -320,7 +320,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
}
reqs = (MPI_Request *)calloc(num_procs_in_node-1, sizeof(MPI_Request));
if( rank == lindex_to_grank[0] ) { /* local leader clean the hierarchy */
if( rank == lindex_to_grank[0] ) { /* local leader cleans the hierarchy */
int array_size = effective_depth + 1;
int *myhierarchy = (int *)calloc(array_size, sizeof(int));
@ -449,7 +449,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
for(i = 0; i < num_nodes; i++)
num_objs_total += objs_per_node[i];
obj_mapping = (int *)calloc(num_objs_total,sizeof(int));
obj_mapping = (int *)malloc(num_objs_total*sizeof(int));
for(i = 0; i < num_objs_total; i++)
obj_mapping[i] = -1;
memcpy(obj_mapping, obj_to_rank_in_comm, objs_per_node[0]*sizeof(int));
displ = objs_per_node[0];
@ -508,8 +510,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
for(i = 0 ; i < hierarchies[0]; i++)
hierarchies[i+1] = tracker[i]->arity;
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with -1 */
hierarchies[i] = -1;
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with 0 */
hierarchies[i] = 0;
/* gather hierarchies iff more than 1 node! */
if ( num_nodes > 1 ) {
@ -592,32 +594,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
for(i = 1 ; i < tm_topology->nb_levels; i++)
tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1] * tm_topology->arity[i-1];
#ifdef __DEBUG__
assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
#endif
/* Build process id tab */
tm_topology->node_id = (int **)calloc(tm_topology->nb_levels, sizeof(int*));
tm_topology->node_rank = (int **)malloc(sizeof(int *) * tm_topology->nb_levels);
for(i = 0; i < tm_topology->nb_levels; i++) {
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
/*note : we make the hypothesis that logical indexes in hwloc range from
0 to N, are contiguous and crescent. */
for( j = 0 ; j < (int)tm_topology->nb_nodes[i] ; j++ ) {
tm_topology->node_id[i][j] = j;
tm_topology->node_rank[i][j] = j;
/* Should use object->logical_index */
/* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node);
id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/
/*
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
topology->node_id[i][j] = id;
topology->node_rank[i][id] = j;
*/
}
tm_topology->node_id = (int *)malloc(num_objs_total*sizeof(int));
tm_topology->node_rank = (int *)malloc(num_objs_total*sizeof(int));
for( i = 0 ; i < num_objs_total ; i++ )
tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
/*note : we make the hypothesis that logical indexes in hwloc range from
0 to N, are contiguous and crescent. */
for( i = 0 ; i < num_objs_total ; i++ ) {
tm_topology->node_id[i] = obj_mapping[i]; /* use process ranks instead of core numbers */
if (obj_mapping[i] != -1) /* so that k[i] is the new rank of process i */
tm_topology->node_rank[obj_mapping[i]] = i; /* after computation by TreeMatch */
}
/* unused for now*/
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
tm_topology->nb_proc_units = num_objs_total;
tm_topology->nb_constraints = 0;
@ -627,22 +621,23 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
for(idx = 0, i = 0; i < tm_topology->nb_proc_units ; i++)
if (obj_mapping[i] != -1)
tm_topology->constraints[idx++] = obj_mapping[i];
tm_topology->constraints[idx++] = obj_mapping[i]; /* use process ranks instead of core numbers */
#ifdef __DEBUG__
assert(idx == tm_topology->nb_constraints);
#endif
tm_topology->oversub_fact = 1;
#ifdef __DEBUG__
assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
/*
for(i = 0; i < tm_topology->nb_levels ; i++) {
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"tm topo node_id for level [%i] : ",i);
dump_int_array(10, ompi_topo_base_framework.framework_output,
"", "", obj_mapping, tm_topology->nb_nodes[i]);
}
*/
tm_display_topology(tm_topology);
#endif
comm_pattern = (double **)malloc(size*sizeof(double *));
for(i = 0 ; i < size ; i++)
comm_pattern[i] = local_pattern + i * size;
@ -660,7 +655,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
"", "", comm_pattern[i], size);
}
#endif
tm_optimize_topology(&tm_topology);
//tm_optimize_topology(&tm_topology);
aff_mat = tm_build_affinity_mat(comm_pattern,size);
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
sol = tm_compute_mapping(tm_topology, comm_tree);
@ -668,7 +663,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
k = (int *)calloc(sol->k_length, sizeof(int));
for(idx = 0 ; idx < (int)sol->k_length ; idx++)
k[idx] = sol->k[idx][0];
#ifdef __DEBUG__
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
"====> nb levels : %i\n",tm_topology->nb_levels);
@ -690,6 +684,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
/* Todo : Bcast + group creation */
/* scatter the ranks */
/* don't need to convert k from local rank to global rank */
if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_scatter(k, 1, MPI_INT,
&newrank, 1, MPI_INT,
0, comm_old,
@ -770,6 +765,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
tm_solution_t *sol = NULL;
tm_affinity_mat_t *aff_mat = NULL;
double **comm_pattern = NULL;
int *obj_to_rank_in_lcomm = NULL;
comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
for( i = 0; i < num_procs_in_node; i++ ) {
@ -800,35 +796,57 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
tm_topology->nb_levels = numlevels;
tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int));
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
tm_topology->node_rank = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
for(i = 0 ; i < tm_topology->nb_levels ; i++){
int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, tracker[i]->depth);
tm_topology->nb_nodes[i] = nb_objs;
tm_topology->arity[i] = tracker[i]->arity;
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
for(j = 0; j < (int)tm_topology->nb_nodes[i] ; j++){
tm_topology->node_id[i][j] = j;
tm_topology->node_rank[i][j] = j;
}
}
#ifdef __DEBUG__
assert(num_objs_in_node == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
#endif
/* create a table that derives the rank in local (node) comm from the object number */
obj_to_rank_in_lcomm = (int *)malloc(num_objs_in_node*sizeof(int));
for(i = 0 ; i < num_objs_in_node ; i++) {
obj_to_rank_in_lcomm[i] = -1;
object = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, i);
for( j = 0; j < num_procs_in_node ; j++ )
if(localrank_to_objnum[j] == (int)(object->logical_index)) {
obj_to_rank_in_lcomm[i] = j;
break;
}
}
/* Build process id tab */
tm_topology->node_id = (int *)malloc(num_objs_in_node*sizeof(int));
tm_topology->node_rank = (int *)malloc(num_objs_in_node*sizeof(int));
for(i = 1 ; i < num_objs_in_node; i++)
tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
for( i = 0 ; i < num_objs_in_node ; i++ ) {
/*note : we make the hypothesis that logical indexes in hwloc range from
0 to N, are contiguous and crescent. */
tm_topology->node_id[i] = obj_to_rank_in_lcomm[i];
if( obj_to_rank_in_lcomm[i] != -1)
tm_topology->node_rank[obj_to_rank_in_lcomm[i]] = i;
}
/* unused for now*/
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
tm_topology->nb_proc_units = num_objs_in_node;
//tm_topology->nb_proc_units = num_procs_in_node;
tm_topology->nb_constraints = 0;
for(i = 0; i < num_procs_in_node ; i++)
if (localrank_to_objnum[i] != -1)
for(i = 0; i < num_objs_in_node ; i++)
if (obj_to_rank_in_lcomm[i] != -1)
tm_topology->nb_constraints++;
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
for(idx = 0,i = 0; i < num_procs_in_node ; i++)
if (localrank_to_objnum[i] != -1)
tm_topology->constraints[idx++] = localrank_to_objnum[i];
for(idx = 0,i = 0; i < num_objs_in_node ; i++)
if (obj_to_rank_in_lcomm[i] != -1)
tm_topology->constraints[idx++] = obj_to_rank_in_lcomm[i];
tm_topology->oversub_fact = 1;
@ -841,12 +859,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
"Nb objs for level %i : %lu | arity %i\n ",
i, tm_topology->nb_nodes[i],tm_topology->arity[i]));
dump_int_array(10, ompi_topo_base_framework.framework_output,
"", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]);
}
dump_int_array(10, ompi_topo_base_framework.framework_output,
"", "Obj id ", tm_topology->node_id, tm_topology->nb_nodes[tm_topology->nb_levels-1]);
tm_display_topology(tm_topology);
#endif
tm_optimize_topology(&tm_topology);
//tm_optimize_topology(&tm_topology);
aff_mat = tm_build_affinity_mat(comm_pattern,num_procs_in_node);
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
sol = tm_compute_mapping(tm_topology, comm_tree);
@ -866,7 +884,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
dump_int_array(10, ompi_topo_base_framework.framework_output,
"Matching : ", "", sol->sigma, sol->sigma_length);
#endif
free(obj_to_rank_in_lcomm);
free(aff_mat->sum_row);
free(aff_mat);
free(comm_pattern);
@ -874,7 +892,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
tm_free_tree(comm_tree);
tm_free_topology(tm_topology);
}
/* Todo : Bcast + group creation */
/* scatter the ranks */
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_scatter(k, 1, MPI_INT,

Просмотреть файл

@ -4,7 +4,11 @@
/*
This comparison function is used to sort elements in key descending order.
*/
static int compFunc(const FiboNode * const node1, const FiboNode * const node2)
int compfunc(const FiboNode * const, const FiboNode * const);
int compFunc(const FiboNode * const node1, const FiboNode * const node2)
{
return
( ( ((QueueElement*)(node1))->key > ((QueueElement*)(node2))->key ) ? -1 : 1);

Просмотреть файл

@ -31,7 +31,7 @@ static int ilog2(int val)
static int verbose_level = ERROR;
static bucket_list_t global_bl;
bucket_list_t global_bl;
int tab_cmp(const void*,const void*);
int old_bucket_id(int,int,bucket_list_t);

Просмотреть файл

@ -1,11 +1,9 @@
typedef struct _com_mat_t{
double **comm;
double **comm;
int n; /*comm is of size n by n the other element are zeroes*/
} com_mat_t;
int *kpartition(int, com_mat_t*, int, int *, int);
tm_tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);
#define HAVE_LIBSCOTCH 0 // missing configure setup?

Просмотреть файл

@ -47,7 +47,23 @@ typedef struct {
} hash2_t;
static tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order, long int nnz);
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i);
int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift);
long int init_mat(char *filename,int N, double **mat, double *sum_row);
void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
int *sigma, int nb_processes, int **k, int nb_compute_units);
int nb_leaves(tm_tree_t *comm_tree);
int nb_lines(char *filename);
void print_1D_tab(int *tab,int N);
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
void tm_finalize();
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
void update_comm_speed(double **comm_speed,int old_size,int new_size);
tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
/* compute the number of leaves of any subtree starting froma node of depth depth*/
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
@ -60,7 +76,7 @@ int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
return res;
}
void tm_finalize(void){
void tm_finalize(){
terminate_thread_pool();
tm_mem_check();
}

Просмотреть файл

@ -19,9 +19,8 @@ int nb_lines(char *filename);
int nb_processing_units(tm_topology_t *topology);
void print_1D_tab(int *tab,int N);
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
void tm_finalize(void);
void tm_finalize();
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
/* load affinity matrix */
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
void update_comm_speed(double **comm_speed,int old_size,int new_size);

Просмотреть файл

@ -162,14 +162,7 @@ double ** topology_to_arch(hwloc_topology_t topology)
double **arch = NULL;
nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
if( nb_proc <= 0 ) { /* if multiple levels with PUs */
return NULL;
}
arch = (double**)MALLOC(sizeof(double*)*nb_proc);
if( NULL == arch ) {
return NULL;
}
for( i = 0 ; i < nb_proc ; i++ ){
obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);

Просмотреть файл

@ -934,7 +934,7 @@ void partial_exhaustive_search(int nb_args, void **args, int thread_id){
work_unit_t *work = (work_unit_t *) args[7];
pthread_mutex_t *lock = (pthread_mutex_t *) args[8];
int *tab_i;
int id = -1, id1, id2;
int id=-1, id1, id2;
int total_work = work->nb_work;
int cur_work = 0;
@ -1768,6 +1768,7 @@ void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new
int mat_order = aff_mat -> order;
tm_tree_t **cur_group = NULL;
int j, l;
unsigned long int list_size;
unsigned long int i;
group_list_t list, **best_selection = NULL, **tab_group = NULL;
double best_val, last_best;
@ -1827,7 +1828,8 @@ void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new
best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
list_all_possible_groups(cost_mat, tab_node, 0, arity, 0, cur_group, &list);
assert( nb_groups == (unsigned long int)list.val );
list_size = (int)list.val;
assert( list_size == nb_groups);
tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
list_to_tab(list.next, tab_group, nb_groups);
if(verbose_level>=INFO)

Просмотреть файл

@ -58,13 +58,15 @@ typedef struct {
int *arity; /* Arity of the nodes of each level*/
int nb_levels; /* Number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
size_t *nb_nodes; /* Number of nodes of each level*/
int physical_num; /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */
int physical_num; /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */
int *node_id; /* ID of the nodes of the tree of the last level*/
int *node_rank ; /* Rank of the nodes of the tree for the last level given its ID: this is the inverse tab of node_id*/
size_t *nb_free_nodes; /* Nb of available nodes of each level*/
int **free_nodes; /* array of node that are free: useful to simulate batch scheduler*/
double *cost; /* Cost of the communication depending on the distance:
cost[i] is the cost for communicating at distance nb_levels-i*/
int *constraints; /* Array of constraints: id of the nodes where it is possible to map processes */
int nb_constraints; /* Size of the above array */
int oversub_fact; /* Maximum number of processes to be mapped on a given node */
@ -135,6 +137,8 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty
*/
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
/* load affinity matrix */
tm_affinity_mat_t *tm_load_aff_mat(char *com_filename);
/*
Alternativelly, build the affinity matrix from a array of array of matrix of size order by order
For performance reason mat is not copied.
@ -175,6 +179,7 @@ void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
void tm_set_verbose_level(unsigned int level);
unsigned int tm_get_verbose_level(void);
/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/
void tm_finalize();
/*
Ask for exhaustive search: may be very long
@ -182,7 +187,7 @@ Ask for exhaustive search: may be very long
new_val != 0 : exhuative search
*/
void tm_set_exhaustive_search_flag(int new_val);
int tm_get_exhaustive_search_flag(void);
int tm_get_exhaustive_search_flag();
/*
Ask for greedy k-partitionning even if scotch is available
@ -190,7 +195,7 @@ Ask for greedy k-partitionning even if scotch is available
new_val != 0 : greedy k-partitionning
*/
void tm_set_greedy_flag(int new_val);
int tm_get_greedy_flag(void);
int tm_get_greedy_flag();
/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */
@ -198,7 +203,7 @@ void tm_set_max_nb_threads(unsigned int val);
/* managing the usage of physical vs. logical core numbering when using hwloc/xml files */
void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
tm_numbering_t tm_get_numbering(void); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
tm_numbering_t tm_get_numbering(); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
#include "tm_malloc.h"