New version based on TM 1.3
Optimize_topology is commented for now until bug resolved in TM Signed-off-by: Guillaume Mercier <guillaume.mercier@bordeaux-inp.fr>
Этот коммит содержится в:
родитель
c6f73e8883
Коммит
27aa34e53f
@ -3,8 +3,8 @@
|
||||
* Copyright (c) 2011-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2016 INRIA. All rights reserved.
|
||||
* Copyright (c) 2012-2017 Bordeaux Polytechnic Institute
|
||||
* Copyright (c) 2011-2018 Inria. All rights reserved.
|
||||
* Copyright (c) 2011-2018 Bordeaux Polytechnic Institute
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -36,7 +36,7 @@
|
||||
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
/* #define __DEBUG__ 1 */
|
||||
/* #define __DEBUG__ 1 */
|
||||
|
||||
/**
|
||||
* This function is a allreduce between all processes to detect for oversubscription.
|
||||
@ -320,7 +320,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
}
|
||||
|
||||
reqs = (MPI_Request *)calloc(num_procs_in_node-1, sizeof(MPI_Request));
|
||||
if( rank == lindex_to_grank[0] ) { /* local leader clean the hierarchy */
|
||||
if( rank == lindex_to_grank[0] ) { /* local leader cleans the hierarchy */
|
||||
int array_size = effective_depth + 1;
|
||||
int *myhierarchy = (int *)calloc(array_size, sizeof(int));
|
||||
|
||||
@ -449,7 +449,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
|
||||
for(i = 0; i < num_nodes; i++)
|
||||
num_objs_total += objs_per_node[i];
|
||||
obj_mapping = (int *)calloc(num_objs_total,sizeof(int));
|
||||
obj_mapping = (int *)malloc(num_objs_total*sizeof(int));
|
||||
for(i = 0; i < num_objs_total; i++)
|
||||
obj_mapping[i] = -1;
|
||||
|
||||
memcpy(obj_mapping, obj_to_rank_in_comm, objs_per_node[0]*sizeof(int));
|
||||
displ = objs_per_node[0];
|
||||
@ -508,8 +510,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
|
||||
for(i = 0 ; i < hierarchies[0]; i++)
|
||||
hierarchies[i+1] = tracker[i]->arity;
|
||||
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with -1 */
|
||||
hierarchies[i] = -1;
|
||||
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with 0 */
|
||||
hierarchies[i] = 0;
|
||||
|
||||
/* gather hierarchies iff more than 1 node! */
|
||||
if ( num_nodes > 1 ) {
|
||||
@ -592,32 +594,24 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
for(i = 1 ; i < tm_topology->nb_levels; i++)
|
||||
tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1] * tm_topology->arity[i-1];
|
||||
|
||||
#ifdef __DEBUG__
|
||||
assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
|
||||
#endif
|
||||
/* Build process id tab */
|
||||
tm_topology->node_id = (int **)calloc(tm_topology->nb_levels, sizeof(int*));
|
||||
tm_topology->node_rank = (int **)malloc(sizeof(int *) * tm_topology->nb_levels);
|
||||
for(i = 0; i < tm_topology->nb_levels; i++) {
|
||||
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
/*note : we make the hypothesis that logical indexes in hwloc range from
|
||||
0 to N, are contiguous and crescent. */
|
||||
|
||||
for( j = 0 ; j < (int)tm_topology->nb_nodes[i] ; j++ ) {
|
||||
tm_topology->node_id[i][j] = j;
|
||||
tm_topology->node_rank[i][j] = j;
|
||||
|
||||
/* Should use object->logical_index */
|
||||
/* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node);
|
||||
id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/
|
||||
/*
|
||||
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
|
||||
topology->node_id[i][j] = id;
|
||||
topology->node_rank[i][id] = j;
|
||||
*/
|
||||
}
|
||||
tm_topology->node_id = (int *)malloc(num_objs_total*sizeof(int));
|
||||
tm_topology->node_rank = (int *)malloc(num_objs_total*sizeof(int));
|
||||
for( i = 0 ; i < num_objs_total ; i++ )
|
||||
tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
|
||||
/*note : we make the hypothesis that logical indexes in hwloc range from
|
||||
0 to N, are contiguous and crescent. */
|
||||
for( i = 0 ; i < num_objs_total ; i++ ) {
|
||||
tm_topology->node_id[i] = obj_mapping[i]; /* use process ranks instead of core numbers */
|
||||
if (obj_mapping[i] != -1) /* so that k[i] is the new rank of process i */
|
||||
tm_topology->node_rank[obj_mapping[i]] = i; /* after computation by TreeMatch */
|
||||
}
|
||||
|
||||
/* unused for now*/
|
||||
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
|
||||
|
||||
tm_topology->nb_proc_units = num_objs_total;
|
||||
|
||||
tm_topology->nb_constraints = 0;
|
||||
@ -627,22 +621,23 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
|
||||
for(idx = 0, i = 0; i < tm_topology->nb_proc_units ; i++)
|
||||
if (obj_mapping[i] != -1)
|
||||
tm_topology->constraints[idx++] = obj_mapping[i];
|
||||
|
||||
tm_topology->constraints[idx++] = obj_mapping[i]; /* use process ranks instead of core numbers */
|
||||
#ifdef __DEBUG__
|
||||
assert(idx == tm_topology->nb_constraints);
|
||||
#endif
|
||||
tm_topology->oversub_fact = 1;
|
||||
|
||||
#ifdef __DEBUG__
|
||||
assert(num_objs_total == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
|
||||
|
||||
/*
|
||||
for(i = 0; i < tm_topology->nb_levels ; i++) {
|
||||
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
|
||||
"tm topo node_id for level [%i] : ",i);
|
||||
dump_int_array(10, ompi_topo_base_framework.framework_output,
|
||||
"", "", obj_mapping, tm_topology->nb_nodes[i]);
|
||||
}
|
||||
*/
|
||||
tm_display_topology(tm_topology);
|
||||
#endif
|
||||
|
||||
comm_pattern = (double **)malloc(size*sizeof(double *));
|
||||
for(i = 0 ; i < size ; i++)
|
||||
comm_pattern[i] = local_pattern + i * size;
|
||||
@ -660,7 +655,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
"", "", comm_pattern[i], size);
|
||||
}
|
||||
#endif
|
||||
tm_optimize_topology(&tm_topology);
|
||||
//tm_optimize_topology(&tm_topology);
|
||||
aff_mat = tm_build_affinity_mat(comm_pattern,size);
|
||||
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
|
||||
sol = tm_compute_mapping(tm_topology, comm_tree);
|
||||
@ -668,7 +663,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
k = (int *)calloc(sol->k_length, sizeof(int));
|
||||
for(idx = 0 ; idx < (int)sol->k_length ; idx++)
|
||||
k[idx] = sol->k[idx][0];
|
||||
|
||||
#ifdef __DEBUG__
|
||||
opal_output_verbose(10, ompi_topo_base_framework.framework_output,
|
||||
"====> nb levels : %i\n",tm_topology->nb_levels);
|
||||
@ -690,6 +684,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
|
||||
/* Todo : Bcast + group creation */
|
||||
/* scatter the ranks */
|
||||
/* don't need to convert k from local rank to global rank */
|
||||
if (OMPI_SUCCESS != (err = comm_old->c_coll->coll_scatter(k, 1, MPI_INT,
|
||||
&newrank, 1, MPI_INT,
|
||||
0, comm_old,
|
||||
@ -770,6 +765,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
tm_solution_t *sol = NULL;
|
||||
tm_affinity_mat_t *aff_mat = NULL;
|
||||
double **comm_pattern = NULL;
|
||||
int *obj_to_rank_in_lcomm = NULL;
|
||||
|
||||
comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
|
||||
for( i = 0; i < num_procs_in_node; i++ ) {
|
||||
@ -800,35 +796,57 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
tm_topology->nb_levels = numlevels;
|
||||
tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int));
|
||||
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
|
||||
tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
|
||||
tm_topology->node_rank = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
|
||||
|
||||
|
||||
for(i = 0 ; i < tm_topology->nb_levels ; i++){
|
||||
int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, tracker[i]->depth);
|
||||
tm_topology->nb_nodes[i] = nb_objs;
|
||||
tm_topology->arity[i] = tracker[i]->arity;
|
||||
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
for(j = 0; j < (int)tm_topology->nb_nodes[i] ; j++){
|
||||
tm_topology->node_id[i][j] = j;
|
||||
tm_topology->node_rank[i][j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef __DEBUG__
|
||||
assert(num_objs_in_node == (int)tm_topology->nb_nodes[tm_topology->nb_levels-1]);
|
||||
#endif
|
||||
/* create a table that derives the rank in local (node) comm from the object number */
|
||||
obj_to_rank_in_lcomm = (int *)malloc(num_objs_in_node*sizeof(int));
|
||||
for(i = 0 ; i < num_objs_in_node ; i++) {
|
||||
obj_to_rank_in_lcomm[i] = -1;
|
||||
object = hwloc_get_obj_by_depth(opal_hwloc_topology, effective_depth, i);
|
||||
for( j = 0; j < num_procs_in_node ; j++ )
|
||||
if(localrank_to_objnum[j] == (int)(object->logical_index)) {
|
||||
obj_to_rank_in_lcomm[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Build process id tab */
|
||||
tm_topology->node_id = (int *)malloc(num_objs_in_node*sizeof(int));
|
||||
tm_topology->node_rank = (int *)malloc(num_objs_in_node*sizeof(int));
|
||||
for(i = 1 ; i < num_objs_in_node; i++)
|
||||
tm_topology->node_id[i] = tm_topology->node_rank[i] = -1;
|
||||
|
||||
for( i = 0 ; i < num_objs_in_node ; i++ ) {
|
||||
/*note : we make the hypothesis that logical indexes in hwloc range from
|
||||
0 to N, are contiguous and crescent. */
|
||||
tm_topology->node_id[i] = obj_to_rank_in_lcomm[i];
|
||||
if( obj_to_rank_in_lcomm[i] != -1)
|
||||
tm_topology->node_rank[obj_to_rank_in_lcomm[i]] = i;
|
||||
}
|
||||
|
||||
/* unused for now*/
|
||||
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
|
||||
|
||||
tm_topology->nb_proc_units = num_objs_in_node;
|
||||
//tm_topology->nb_proc_units = num_procs_in_node;
|
||||
tm_topology->nb_constraints = 0;
|
||||
for(i = 0; i < num_procs_in_node ; i++)
|
||||
if (localrank_to_objnum[i] != -1)
|
||||
|
||||
for(i = 0; i < num_objs_in_node ; i++)
|
||||
if (obj_to_rank_in_lcomm[i] != -1)
|
||||
tm_topology->nb_constraints++;
|
||||
|
||||
|
||||
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
|
||||
for(idx = 0,i = 0; i < num_procs_in_node ; i++)
|
||||
if (localrank_to_objnum[i] != -1)
|
||||
tm_topology->constraints[idx++] = localrank_to_objnum[i];
|
||||
for(idx = 0,i = 0; i < num_objs_in_node ; i++)
|
||||
if (obj_to_rank_in_lcomm[i] != -1)
|
||||
tm_topology->constraints[idx++] = obj_to_rank_in_lcomm[i];
|
||||
|
||||
tm_topology->oversub_fact = 1;
|
||||
|
||||
@ -841,12 +859,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
OPAL_OUTPUT_VERBOSE((10, ompi_topo_base_framework.framework_output,
|
||||
"Nb objs for level %i : %lu | arity %i\n ",
|
||||
i, tm_topology->nb_nodes[i],tm_topology->arity[i]));
|
||||
dump_int_array(10, ompi_topo_base_framework.framework_output,
|
||||
"", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]);
|
||||
}
|
||||
dump_int_array(10, ompi_topo_base_framework.framework_output,
|
||||
"", "Obj id ", tm_topology->node_id, tm_topology->nb_nodes[tm_topology->nb_levels-1]);
|
||||
tm_display_topology(tm_topology);
|
||||
#endif
|
||||
tm_optimize_topology(&tm_topology);
|
||||
//tm_optimize_topology(&tm_topology);
|
||||
aff_mat = tm_build_affinity_mat(comm_pattern,num_procs_in_node);
|
||||
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
|
||||
sol = tm_compute_mapping(tm_topology, comm_tree);
|
||||
@ -866,7 +884,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
dump_int_array(10, ompi_topo_base_framework.framework_output,
|
||||
"Matching : ", "", sol->sigma, sol->sigma_length);
|
||||
#endif
|
||||
|
||||
free(obj_to_rank_in_lcomm);
|
||||
free(aff_mat->sum_row);
|
||||
free(aff_mat);
|
||||
free(comm_pattern);
|
||||
@ -874,7 +892,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
tm_free_tree(comm_tree);
|
||||
tm_free_topology(tm_topology);
|
||||
}
|
||||
|
||||
|
||||
/* Todo : Bcast + group creation */
|
||||
/* scatter the ranks */
|
||||
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_scatter(k, 1, MPI_INT,
|
||||
|
@ -4,7 +4,11 @@
|
||||
/*
|
||||
This comparison function is used to sort elements in key descending order.
|
||||
*/
|
||||
static int compFunc(const FiboNode * const node1, const FiboNode * const node2)
|
||||
int compfunc(const FiboNode * const, const FiboNode * const);
|
||||
|
||||
|
||||
|
||||
int compFunc(const FiboNode * const node1, const FiboNode * const node2)
|
||||
{
|
||||
return
|
||||
( ( ((QueueElement*)(node1))->key > ((QueueElement*)(node2))->key ) ? -1 : 1);
|
||||
|
@ -31,7 +31,7 @@ static int ilog2(int val)
|
||||
|
||||
static int verbose_level = ERROR;
|
||||
|
||||
static bucket_list_t global_bl;
|
||||
bucket_list_t global_bl;
|
||||
|
||||
int tab_cmp(const void*,const void*);
|
||||
int old_bucket_id(int,int,bucket_list_t);
|
||||
|
@ -1,11 +1,9 @@
|
||||
typedef struct _com_mat_t{
|
||||
double **comm;
|
||||
double **comm;
|
||||
int n; /*comm is of size n by n the other element are zeroes*/
|
||||
|
||||
|
||||
} com_mat_t;
|
||||
|
||||
|
||||
int *kpartition(int, com_mat_t*, int, int *, int);
|
||||
tm_tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);
|
||||
|
||||
#define HAVE_LIBSCOTCH 0 // missing configure setup?
|
||||
|
@ -47,7 +47,23 @@ typedef struct {
|
||||
} hash2_t;
|
||||
|
||||
|
||||
static tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
|
||||
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order, long int nnz);
|
||||
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
|
||||
void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i);
|
||||
int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift);
|
||||
long int init_mat(char *filename,int N, double **mat, double *sum_row);
|
||||
void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
|
||||
int *sigma, int nb_processes, int **k, int nb_compute_units);
|
||||
int nb_leaves(tm_tree_t *comm_tree);
|
||||
int nb_lines(char *filename);
|
||||
void print_1D_tab(int *tab,int N);
|
||||
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
|
||||
void tm_finalize();
|
||||
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
|
||||
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
|
||||
void update_comm_speed(double **comm_speed,int old_size,int new_size);
|
||||
tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
|
||||
|
||||
|
||||
/* compute the number of leaves of any subtree starting froma node of depth depth*/
|
||||
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
|
||||
@ -60,7 +76,7 @@ int compute_nb_leaves_from_level(int depth,tm_topology_t *topology)
|
||||
return res;
|
||||
}
|
||||
|
||||
void tm_finalize(void){
|
||||
void tm_finalize(){
|
||||
terminate_thread_pool();
|
||||
tm_mem_check();
|
||||
}
|
||||
|
@ -19,9 +19,8 @@ int nb_lines(char *filename);
|
||||
int nb_processing_units(tm_topology_t *topology);
|
||||
void print_1D_tab(int *tab,int N);
|
||||
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
|
||||
void tm_finalize(void);
|
||||
void tm_finalize();
|
||||
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
|
||||
/* load affinity matrix */
|
||||
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
|
||||
void update_comm_speed(double **comm_speed,int old_size,int new_size);
|
||||
|
||||
|
@ -162,14 +162,7 @@ double ** topology_to_arch(hwloc_topology_t topology)
|
||||
double **arch = NULL;
|
||||
|
||||
nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
|
||||
if( nb_proc <= 0 ) { /* if multiple levels with PUs */
|
||||
return NULL;
|
||||
}
|
||||
arch = (double**)MALLOC(sizeof(double*)*nb_proc);
|
||||
if( NULL == arch ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for( i = 0 ; i < nb_proc ; i++ ){
|
||||
obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
|
||||
arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
|
||||
|
@ -934,7 +934,7 @@ void partial_exhaustive_search(int nb_args, void **args, int thread_id){
|
||||
work_unit_t *work = (work_unit_t *) args[7];
|
||||
pthread_mutex_t *lock = (pthread_mutex_t *) args[8];
|
||||
int *tab_i;
|
||||
int id = -1, id1, id2;
|
||||
int id=-1, id1, id2;
|
||||
int total_work = work->nb_work;
|
||||
int cur_work = 0;
|
||||
|
||||
@ -1768,6 +1768,7 @@ void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new
|
||||
int mat_order = aff_mat -> order;
|
||||
tm_tree_t **cur_group = NULL;
|
||||
int j, l;
|
||||
unsigned long int list_size;
|
||||
unsigned long int i;
|
||||
group_list_t list, **best_selection = NULL, **tab_group = NULL;
|
||||
double best_val, last_best;
|
||||
@ -1827,7 +1828,8 @@ void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new
|
||||
best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
|
||||
|
||||
list_all_possible_groups(cost_mat, tab_node, 0, arity, 0, cur_group, &list);
|
||||
assert( nb_groups == (unsigned long int)list.val );
|
||||
list_size = (int)list.val;
|
||||
assert( list_size == nb_groups);
|
||||
tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
|
||||
list_to_tab(list.next, tab_group, nb_groups);
|
||||
if(verbose_level>=INFO)
|
||||
|
@ -58,13 +58,15 @@ typedef struct {
|
||||
int *arity; /* Arity of the nodes of each level*/
|
||||
int nb_levels; /* Number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
|
||||
size_t *nb_nodes; /* Number of nodes of each level*/
|
||||
int physical_num; /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */
|
||||
int physical_num; /* Flag set to !=0 if se use physical numberig and set to 0 is we use logical numbering */
|
||||
int *node_id; /* ID of the nodes of the tree of the last level*/
|
||||
int *node_rank ; /* Rank of the nodes of the tree for the last level given its ID: this is the inverse tab of node_id*/
|
||||
|
||||
size_t *nb_free_nodes; /* Nb of available nodes of each level*/
|
||||
int **free_nodes; /* array of node that are free: useful to simulate batch scheduler*/
|
||||
double *cost; /* Cost of the communication depending on the distance:
|
||||
cost[i] is the cost for communicating at distance nb_levels-i*/
|
||||
|
||||
int *constraints; /* Array of constraints: id of the nodes where it is possible to map processes */
|
||||
int nb_constraints; /* Size of the above array */
|
||||
int oversub_fact; /* Maximum number of processes to be mapped on a given node */
|
||||
@ -135,6 +137,8 @@ tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_ty
|
||||
|
||||
*/
|
||||
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
|
||||
/* load affinity matrix */
|
||||
tm_affinity_mat_t *tm_load_aff_mat(char *com_filename);
|
||||
/*
|
||||
Alternativelly, build the affinity matrix from a array of array of matrix of size order by order
|
||||
For performance reason mat is not copied.
|
||||
@ -175,6 +179,7 @@ void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
|
||||
void tm_set_verbose_level(unsigned int level);
|
||||
unsigned int tm_get_verbose_level(void);
|
||||
/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/
|
||||
void tm_finalize();
|
||||
|
||||
/*
|
||||
Ask for exhaustive search: may be very long
|
||||
@ -182,7 +187,7 @@ Ask for exhaustive search: may be very long
|
||||
new_val != 0 : exhuative search
|
||||
*/
|
||||
void tm_set_exhaustive_search_flag(int new_val);
|
||||
int tm_get_exhaustive_search_flag(void);
|
||||
int tm_get_exhaustive_search_flag();
|
||||
|
||||
/*
|
||||
Ask for greedy k-partitionning even if scotch is available
|
||||
@ -190,7 +195,7 @@ Ask for greedy k-partitionning even if scotch is available
|
||||
new_val != 0 : greedy k-partitionning
|
||||
*/
|
||||
void tm_set_greedy_flag(int new_val);
|
||||
int tm_get_greedy_flag(void);
|
||||
int tm_get_greedy_flag();
|
||||
|
||||
|
||||
/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */
|
||||
@ -198,7 +203,7 @@ void tm_set_max_nb_threads(unsigned int val);
|
||||
|
||||
/* managing the usage of physical vs. logical core numbering when using hwloc/xml files */
|
||||
void tm_set_numbering(tm_numbering_t new_val); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
|
||||
tm_numbering_t tm_get_numbering(void); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
|
||||
tm_numbering_t tm_get_numbering(); /* TM_NUMBERING_LOGICAL or TM_NUMBERING_PHYSICAL */
|
||||
|
||||
#include "tm_malloc.h"
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user