Update to the latest version provided by Guillaume.
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Этот коммит содержится в:
родитель
fc21ffadc9
Коммит
2c00c4209a
@ -13,20 +13,25 @@
|
||||
|
||||
if topo_treematch_local
|
||||
extra_treematch_files = treematch/tm_bucket.h \
|
||||
treematch/tm_hwloc.h treematch/tm_mapping.h \
|
||||
treematch/tm_mapping.h \
|
||||
treematch/tm_timings.h treematch/tm_tree.h \
|
||||
treematch/tm_kpartitioning.h treematch/uthash.h\
|
||||
treematch/IntConstantInitializedVector.h \
|
||||
treematch/tm_mt.h \
|
||||
treematch/tm_mt.h treematch/fibo.h \
|
||||
treematch/tm_thread_pool.h treematch/tm_verbose.h \
|
||||
treematch/tm_malloc.h \
|
||||
treematch/tm_malloc.h treematch/k-partitioning.h\
|
||||
treematch/tm_solution.h treematch/tm_topology.h\
|
||||
treematch/PriorityQueue.h \
|
||||
treematch/IntConstantInitializedVector.c \
|
||||
treematch/tm_mt.c \
|
||||
treematch/tm_mt.c treematch/fibo.c \
|
||||
treematch/tm_thread_pool.c treematch/tm_verbose.c \
|
||||
treematch/tm_malloc.c \
|
||||
treematch/tm_malloc.c treematch/treematch.h \
|
||||
treematch/tm_mapping.c treematch/tm_timings.c \
|
||||
treematch/tm_bucket.c treematch/tm_tree.c \
|
||||
treematch/tm_hwloc.c treematch/tm_kpartitioning.c
|
||||
treematch/tm_topology.c treematch/tm_kpartitioning.c \
|
||||
treematch/tm_solution.c treematch/k-partitioning.c \
|
||||
treematch/PriorityQueue.c
|
||||
EXTRA_DIST = treematch/COPYING treematch/LICENSE
|
||||
endif
|
||||
|
||||
sources = \
|
||||
|
@ -70,7 +70,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* module,
|
||||
int n, const int nodes[],
|
||||
const int degrees[], const int targets[],
|
||||
const int weights[],
|
||||
struct opal_info_t *info, int reorder,
|
||||
struct ompi_info_t *info, int reorder,
|
||||
ompi_communicator_t **newcomm);
|
||||
/*
|
||||
* ******************************************************************
|
||||
|
@ -62,6 +62,9 @@ mca_topo_treematch_component_2_2_0_t mca_topo_treematch_component =
|
||||
|
||||
static int init_query(bool enable_progress_threads, bool enable_mpi_threads)
|
||||
{
|
||||
if(NULL == opal_hwloc_topology) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
@ -95,3 +98,4 @@ static int mca_topo_treematch_component_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &mca_topo_treematch_component.reorder_mode);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
* Copyright (c) 2011-2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2015 INRIA. All rights reserved.
|
||||
* Copyright (c) 2012-2015 Bordeaux Poytechnic Institute
|
||||
* Copyright (c) 2011-2016 INRIA. All rights reserved.
|
||||
* Copyright (c) 2012-2017 Bordeaux Poytechnic Institute
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -25,6 +25,7 @@
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
|
||||
#include "ompi/mca/topo/treematch/topo_treematch.h"
|
||||
#include "ompi/mca/topo/treematch/treematch/treematch.h"
|
||||
#include "ompi/mca/topo/treematch/treematch/tm_mapping.h"
|
||||
#include "ompi/mca/topo/base/base.h"
|
||||
|
||||
@ -46,6 +47,7 @@
|
||||
|
||||
#define FALLBACK() \
|
||||
do { free(nodes_roots); \
|
||||
free(lindex_to_grank); \
|
||||
if( NULL != set) hwloc_bitmap_free(set); \
|
||||
goto fallback; } \
|
||||
while(0);
|
||||
@ -92,8 +94,8 @@ static void dump_int_array( char* prolog, char* line_prolog, int* array, size_t
|
||||
size_t i;
|
||||
|
||||
fprintf(stdout,"%s : ", prolog);
|
||||
for(i = 0; i < num_procs_in_node ; i++)
|
||||
fprintf(stdout,"[$s%i:%i] ", line_prolog, i, array[i]);
|
||||
for(i = 0; i < length ; i++)
|
||||
fprintf(stdout,"%s [%lu:%i] ", line_prolog, i, array[i]);
|
||||
fprintf(stdout,"\n");
|
||||
}
|
||||
static void dump_double_array( char* prolog, char* line_prolog, double* array, size_t length )
|
||||
@ -101,8 +103,8 @@ static void dump_double_array( char* prolog, char* line_prolog, double* array, s
|
||||
size_t i;
|
||||
|
||||
fprintf(stdout,"%s : ", prolog);
|
||||
for(i = 0; i < num_procs_in_node ; i++)
|
||||
fprintf(stdout,"%s [%i:%i] ", line_prolog, i, array[i]);
|
||||
for(i = 0; i < length ; i++)
|
||||
fprintf(stdout,"%s [%lu:%lf] ", line_prolog, i, array[i]);
|
||||
fprintf(stdout,"\n");
|
||||
}
|
||||
#endif
|
||||
@ -112,7 +114,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
int n, const int nodes[],
|
||||
const int degrees[], const int targets[],
|
||||
const int weights[],
|
||||
struct opal_info_t *info, int reorder,
|
||||
struct ompi_info_t *info, int reorder,
|
||||
ompi_communicator_t **newcomm)
|
||||
{
|
||||
int err;
|
||||
@ -155,6 +157,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
int num_nodes = 0;
|
||||
int num_procs_in_node = 0;
|
||||
int rank, size;
|
||||
int *k = NULL;
|
||||
int newrank = -1;
|
||||
int hwloc_err;
|
||||
int oversubscribing_objs = 0, oversubscribed_pus = 0;
|
||||
int i, j, idx;
|
||||
@ -250,6 +254,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
* all the calls that involve collective communications, so we have to lay the logic
|
||||
* accordingly.
|
||||
*/
|
||||
|
||||
if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */
|
||||
#ifdef __DEBUG__
|
||||
if (0 == rank)
|
||||
@ -291,6 +296,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
num_objs_in_node,num_procs_in_node,
|
||||
nodes_roots,lindex_to_grank,comm_old);
|
||||
}
|
||||
|
||||
if (!oversubscribed_pus) {
|
||||
/* Update the data used to compute the correct binding */
|
||||
if(hwloc_bitmap_isincluded(root_obj->cpuset,set)){ /* processes are not bound on the machine */
|
||||
@ -306,17 +312,17 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
|
||||
if( !oversubscribing_objs && !oversubscribed_pus ) {
|
||||
if( hwloc_bitmap_isincluded(root_obj->cpuset,set) ) { /* processes are not bound on the machine */
|
||||
obj_rank = ompi_process_info.my_local_rank%num_objs_in_node;
|
||||
effective_depth = depth;
|
||||
object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank);
|
||||
if( NULL == object) FALLBACK();
|
||||
obj_rank = ompi_process_info.my_local_rank%num_objs_in_node;
|
||||
effective_depth = depth;
|
||||
object = hwloc_get_obj_by_depth(opal_hwloc_topology,effective_depth,obj_rank);
|
||||
if( NULL == object) FALLBACK();
|
||||
|
||||
hwloc_bitmap_copy(set,object->cpuset);
|
||||
hwloc_bitmap_singlify(set); /* we don't want the process to move */
|
||||
hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
|
||||
if( -1 == hwloc_err) FALLBACK();
|
||||
hwloc_bitmap_copy(set,object->cpuset);
|
||||
hwloc_bitmap_singlify(set); /* we don't want the process to move */
|
||||
hwloc_err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
|
||||
if( -1 == hwloc_err) FALLBACK();
|
||||
#ifdef __DEBUG__
|
||||
fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank);
|
||||
fprintf(stdout,"Process not bound : binding on OBJ#%i \n",obj_rank);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef __DEBUG__
|
||||
@ -385,7 +391,6 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
if (0 == mca_topo_treematch_component.reorder_mode) {
|
||||
int *k = NULL;
|
||||
int *obj_mapping = NULL;
|
||||
int newrank = -1;
|
||||
int num_objs_total = 0;
|
||||
|
||||
/* Gather comm pattern
|
||||
@ -419,7 +424,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
tm_topology_t *tm_opt_topology = NULL;
|
||||
int *obj_to_rank_in_comm = NULL;
|
||||
int *hierarchies = NULL;
|
||||
int hierarchy[MAX_LEVELS+1];
|
||||
int hierarchy[TM_MAX_LEVELS+1];
|
||||
int min;
|
||||
|
||||
/* create a table that derives the rank in comm_old from the object number */
|
||||
@ -489,27 +494,27 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
free(obj_to_rank_in_comm);
|
||||
|
||||
hierarchy[0] = numlevels;
|
||||
assert(numlevels < MAX_LEVELS);
|
||||
assert(numlevels < TM_MAX_LEVELS);
|
||||
|
||||
for(i = 0 ; i < hierarchy[0]; i++)
|
||||
hierarchy[i+1] = tracker[i]->arity;
|
||||
for(; i < (MAX_LEVELS+1); i++) /* fill up everything else with -1 */
|
||||
for(; i < (TM_MAX_LEVELS+1); i++) /* fill up everything else with -1 */
|
||||
hierarchy[i] = -1;
|
||||
|
||||
if( 0 == rank ) {
|
||||
hierarchies = (int *)malloc(num_nodes*(MAX_LEVELS+1)*sizeof(int));
|
||||
memcpy(hierarchies, hierarchy, (MAX_LEVELS+1)*sizeof(int));
|
||||
hierarchies = (int *)malloc(num_nodes*(TM_MAX_LEVELS+1)*sizeof(int));
|
||||
memcpy(hierarchies, hierarchy, (TM_MAX_LEVELS+1)*sizeof(int));
|
||||
}
|
||||
|
||||
/* gather hierarchies iff more than 1 node! */
|
||||
if ( num_nodes > 1 ) {
|
||||
if( rank != 0 ) {
|
||||
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(MAX_LEVELS+1), MPI_INT, 0,
|
||||
if (OMPI_SUCCESS != (err = MCA_PML_CALL(send(hierarchy,(TM_MAX_LEVELS+1), MPI_INT, 0,
|
||||
111, MCA_PML_BASE_SEND_STANDARD, comm_old))))
|
||||
ERR_EXIT(err);
|
||||
} else {
|
||||
for(i = 1; i < num_nodes ; i++)
|
||||
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(MAX_LEVELS+1), (MAX_LEVELS+1), MPI_INT,
|
||||
if (OMPI_SUCCESS != ( err = MCA_PML_CALL(irecv(hierarchies+i*(TM_MAX_LEVELS+1), (TM_MAX_LEVELS+1), MPI_INT,
|
||||
nodes_roots[i], 111, comm_old, &reqs[i-1])))){
|
||||
free(hierarchies);
|
||||
ERR_EXIT(err);
|
||||
@ -524,23 +529,25 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
}
|
||||
|
||||
if ( 0 == rank ) {
|
||||
tree_t *comm_tree = NULL;
|
||||
tm_tree_t *comm_tree = NULL;
|
||||
tm_solution_t *sol = NULL;
|
||||
tm_affinity_mat_t *aff_mat = NULL;
|
||||
double **comm_pattern = NULL;
|
||||
int *matching = NULL;
|
||||
|
||||
#ifdef __DEBUG__
|
||||
dump_int_array("hierarchies : ", "", hierarchies, num_nodes*(MAX_LEVELS+1));
|
||||
dump_int_array("hierarchies : ", "", hierarchies, num_nodes*(TM_MAX_LEVELS+1));
|
||||
#endif
|
||||
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
|
||||
tm_topology->nb_levels = hierarchies[0];
|
||||
|
||||
/* extract min depth */
|
||||
for(i = 1 ; i < num_nodes ; i++)
|
||||
if (hierarchies[i*(MAX_LEVELS+1)] < tm_topology->nb_levels)
|
||||
tm_topology->nb_levels = hierarchies[i*(MAX_LEVELS+1)];
|
||||
if (hierarchies[i*(TM_MAX_LEVELS+1)] < tm_topology->nb_levels)
|
||||
tm_topology->nb_levels = hierarchies[i*(TM_MAX_LEVELS+1)];
|
||||
|
||||
/* Crush levels in hierarchies too long (ie > tm_topology->nb_levels)*/
|
||||
for(i = 0; i < num_nodes ; i++) {
|
||||
int *base_ptr = hierarchies + i*(MAX_LEVELS+1);
|
||||
int *base_ptr = hierarchies + i*(TM_MAX_LEVELS+1);
|
||||
int suppl = *base_ptr - tm_topology->nb_levels;
|
||||
for(j = 1 ; j <= suppl ; j++)
|
||||
*(base_ptr + tm_topology->nb_levels) *= *(base_ptr + tm_topology->nb_levels + j);
|
||||
@ -553,8 +560,8 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
for(i = 1; i < tm_topology->nb_levels; i++) { /* compute the minimum for each level */
|
||||
min = hierarchies[i];
|
||||
for(j = 1; j < num_nodes ; j++)
|
||||
if( hierarchies[j*(MAX_LEVELS+1) + i] < min)
|
||||
min = hierarchies[j*(MAX_LEVELS+1) + i];
|
||||
if( hierarchies[j*(TM_MAX_LEVELS+1) + i] < min)
|
||||
min = hierarchies[j*(TM_MAX_LEVELS+1) + i];
|
||||
tm_topology->arity[i] = min;
|
||||
}
|
||||
} else {
|
||||
@ -568,24 +575,58 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
fprintf(stdout,"topo_arity[%i] = %i\n", i, tm_topology->arity[i]);
|
||||
#endif
|
||||
/* compute the number of processing elements */
|
||||
tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels, sizeof(int));
|
||||
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
|
||||
tm_topology->nb_nodes[0] = 1;
|
||||
for(i = 1 ; i < tm_topology->nb_levels; i++)
|
||||
tm_topology->nb_nodes[i] = tm_topology->nb_nodes[i-1] * tm_topology->arity[i-1];
|
||||
|
||||
/* Build process id tab */
|
||||
tm_topology->node_id = (int **)calloc(tm_topology->nb_levels, sizeof(int*));
|
||||
tm_topology->node_rank = (int **)malloc(sizeof(int *) * tm_topology->nb_levels);
|
||||
for(i = 0; i < tm_topology->nb_levels; i++) {
|
||||
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
for (j = 0; j < tm_topology->nb_nodes[i]; j++)
|
||||
tm_topology->node_id[i][j] = obj_mapping[j];
|
||||
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
/*note : we make the hypothesis that logical indexes in hwloc range from
|
||||
0 to N, are contiguous and crescent. */
|
||||
|
||||
for( j = 0 ; j < tm_topology->nb_nodes[i] ; j++ ){
|
||||
tm_topology->node_id[i][j] = j;
|
||||
tm_topology->node_rank[i][j] = j;
|
||||
|
||||
/* Should use object->logical_index */
|
||||
/* obj = hwloc_get_obj_by_depth(topo,i,j%num_objs_in_node);
|
||||
id = obj->logical_index + (num_objs_in_node)*(j/num_obj_in_node)*/
|
||||
/*
|
||||
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
|
||||
topology->node_id[i][j] = id;
|
||||
topology->node_rank[i][id] = j;
|
||||
*/
|
||||
}
|
||||
}
|
||||
/* unused for now*/
|
||||
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
|
||||
|
||||
tm_topology->nb_proc_units = num_objs_total;
|
||||
|
||||
tm_topology->nb_constraints = 0;
|
||||
for(i = 0; i < tm_topology->nb_proc_units ; i++)
|
||||
if (obj_mapping[i] != -1)
|
||||
tm_topology->nb_constraints++;
|
||||
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
|
||||
for(idx = 0,i = 0; i < tm_topology->nb_proc_units ; i++)
|
||||
if (obj_mapping[i] != -1)
|
||||
tm_topology->constraints[idx++] = obj_mapping[i];
|
||||
|
||||
tm_topology->oversub_fact = 1;
|
||||
|
||||
#ifdef __DEBUG__
|
||||
assert(num_objs_total == tm_topology->nb_nodes[tm_topology->nb_levels-1]);
|
||||
|
||||
for(i = 0; i < tm_topology->nb_levels ; i++) {
|
||||
fprintf(stdout,"tm topo node_id for level [%i] : ",i);
|
||||
dump_int_array("", "", obj_mapping, tm_topology->nb_nodes[i]);
|
||||
}
|
||||
display_topology(tm_topology);
|
||||
tm_display_topology(tm_topology);
|
||||
#endif
|
||||
|
||||
comm_pattern = (double **)malloc(size*sizeof(double *));
|
||||
@ -600,32 +641,31 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
#ifdef __DEBUG__
|
||||
fprintf(stdout,"==== COMM PATTERN ====\n");
|
||||
for( i = 0 ; i < size ; i++) {
|
||||
dump_double_array("", "", comm_pattern, size);
|
||||
dump_double_array("", "", comm_pattern[i], size);
|
||||
}
|
||||
#endif
|
||||
k = (int *)calloc(num_objs_total, sizeof(int));
|
||||
matching = (int *)calloc(size, sizeof(int));
|
||||
tm_optimize_topology(&tm_topology);
|
||||
aff_mat = tm_build_affinity_mat(comm_pattern,size);
|
||||
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
|
||||
sol = tm_compute_mapping(tm_topology, comm_tree);
|
||||
|
||||
k = (int *)calloc(sol->k_length, sizeof(int));
|
||||
for(idx = 0 ; idx < sol->k_length ; idx++)
|
||||
k[idx] = sol->k[idx][0];
|
||||
|
||||
tm_opt_topology = optimize_topology(tm_topology);
|
||||
comm_tree = build_tree_from_topology(tm_opt_topology, comm_pattern, size, NULL, NULL);
|
||||
map_topology_simple(tm_opt_topology, comm_tree, matching, size, k);
|
||||
#ifdef __DEBUG__
|
||||
|
||||
fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
|
||||
dump_int_array("Rank permutation sigma/k : ", "", k, num_objs_total);
|
||||
dump_int_array("Matching : ", "", matching, size);
|
||||
assert(size == sol->sigma_length);
|
||||
dump_int_array("Matching : ", "",sol->sigma, sol->sigma_length);
|
||||
#endif
|
||||
free(comm_pattern);
|
||||
free(comm_tree);
|
||||
free(matching);
|
||||
free(obj_mapping);
|
||||
for(i = 0 ; i < tm_topology->nb_levels ; i++)
|
||||
free(tm_topology->node_id[i]);
|
||||
free(tm_topology->node_id);
|
||||
free(tm_topology->nb_nodes);
|
||||
free(tm_topology->arity);
|
||||
free(tm_topology);
|
||||
FREE_topology(tm_opt_topology);
|
||||
free(comm_pattern);
|
||||
free(aff_mat->sum_row);
|
||||
free(aff_mat);
|
||||
tm_free_solution(sol);
|
||||
tm_free_tree(comm_tree);
|
||||
tm_free_topology(tm_topology);
|
||||
}
|
||||
}
|
||||
|
||||
@ -648,15 +688,12 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
|
||||
(*newcomm)->c_topo = topo_module;
|
||||
(*newcomm)->c_topo->reorder = reorder;
|
||||
|
||||
} else { /* partially distributed reordering */
|
||||
ompi_communicator_t *localcomm = NULL;
|
||||
int *matching = (int *)calloc(num_procs_in_node,sizeof(int));
|
||||
int *lrank_to_grank = (int *)calloc(num_procs_in_node,sizeof(int));
|
||||
int *grank_to_lrank = (int *)calloc(size,sizeof(int));
|
||||
hwloc_obj_t object;
|
||||
opal_hwloc_locality_t locality;
|
||||
char set_as_string[64];
|
||||
opal_value_t kv;
|
||||
|
||||
if (OMPI_SUCCESS != (err = ompi_comm_split(comm_old, colors[rank], rank,
|
||||
&localcomm, false)))
|
||||
@ -696,8 +733,9 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
/* The root has now the entire information, so let's crunch it */
|
||||
if (rank == lindex_to_grank[0]) {
|
||||
tm_topology_t *tm_topology = NULL;
|
||||
tm_topology_t *tm_opt_topology = NULL;
|
||||
tree_t *comm_tree = NULL;
|
||||
tm_tree_t *comm_tree = NULL;
|
||||
tm_solution_t *sol = NULL;
|
||||
tm_affinity_mat_t *aff_mat = NULL;
|
||||
double **comm_pattern = NULL;
|
||||
|
||||
comm_pattern = (double **)malloc(num_procs_in_node*sizeof(double *));
|
||||
@ -717,7 +755,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
fprintf(stdout,"========== COMM PATTERN ============= \n");
|
||||
for(i = 0 ; i < num_procs_in_node ; i++){
|
||||
fprintf(stdout," %i : ",i);
|
||||
dump_double_array("", "", comm_pattern, num_procs_in_node);
|
||||
dump_double_array("", "", comm_pattern[i], num_procs_in_node);
|
||||
}
|
||||
fprintf(stdout,"======================= \n");
|
||||
#endif
|
||||
@ -725,92 +763,92 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module,
|
||||
tm_topology = (tm_topology_t *)malloc(sizeof(tm_topology_t));
|
||||
tm_topology->nb_levels = numlevels;
|
||||
tm_topology->arity = (int *)calloc(tm_topology->nb_levels, sizeof(int));
|
||||
tm_topology->nb_nodes = (int *)calloc(tm_topology->nb_levels, sizeof(int));
|
||||
tm_topology->nb_nodes = (size_t *)calloc(tm_topology->nb_levels, sizeof(size_t));
|
||||
tm_topology->node_id = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
|
||||
tm_topology->node_rank = (int **)malloc(tm_topology->nb_levels*sizeof(int *));
|
||||
|
||||
for(i = 0 ; i < tm_topology->nb_levels ; i++){
|
||||
int nb_objs = hwloc_get_nbobjs_by_depth(opal_hwloc_topology, tracker[i]->depth);
|
||||
tm_topology->nb_nodes[i] = nb_objs;
|
||||
tm_topology->arity[i] = tracker[i]->arity;
|
||||
tm_topology->node_id[i] = (int*)malloc(sizeof(int)*nb_objs);
|
||||
for(j = 0; j < num_procs_in_node; j++)
|
||||
tm_topology->node_id[i][j] = localrank_to_objnum[j];
|
||||
for(; j < nb_objs; tm_topology->node_id[i][j] = -1, j++); /* complete with empty */
|
||||
tm_topology->node_id[i] = (int *)calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
tm_topology->node_rank[i] = (int * )calloc(tm_topology->nb_nodes[i], sizeof(int));
|
||||
for(j = 0; j < tm_topology->nb_nodes[i] ; j++){
|
||||
tm_topology->node_id[i][j] = j;
|
||||
tm_topology->node_rank[i][j] = j;
|
||||
}
|
||||
}
|
||||
|
||||
/* unused for now*/
|
||||
tm_topology->cost = (double*)calloc(tm_topology->nb_levels,sizeof(double));
|
||||
|
||||
tm_topology->nb_proc_units = num_objs_in_node;
|
||||
//tm_topology->nb_proc_units = num_procs_in_node;
|
||||
tm_topology->nb_constraints = 0;
|
||||
for(i = 0; i < num_procs_in_node ; i++)
|
||||
if (localrank_to_objnum[i] != -1)
|
||||
tm_topology->nb_constraints++;
|
||||
|
||||
tm_topology->constraints = (int *)calloc(tm_topology->nb_constraints,sizeof(int));
|
||||
for(idx = 0,i = 0; i < num_procs_in_node ; i++)
|
||||
if (localrank_to_objnum[i] != -1)
|
||||
tm_topology->constraints[idx++] = localrank_to_objnum[i];
|
||||
|
||||
tm_topology->oversub_fact = 1;
|
||||
|
||||
#ifdef __DEBUG__
|
||||
assert(num_objs_in_node == tm_topology->nb_nodes[tm_topology->nb_levels-1]);
|
||||
fprintf(stdout,"Levels in topo : %i | num procs in node : %i\n",tm_topology->nb_levels,num_procs_in_node);
|
||||
for(i = 0; i < tm_topology->nb_levels ; i++){
|
||||
fprintf(stdout,"Nb objs for level %i : %i | arity %i\n ",i,tm_topology->nb_nodes[i],tm_topology->arity[i]);
|
||||
dump_int_array("", "Obj id ", tm_topology->node_id[i], tm_topology->nb_nodes[i]);
|
||||
}
|
||||
display_topology(tm_topology);
|
||||
tm_display_topology(tm_topology);
|
||||
#endif
|
||||
tm_optimize_topology(&tm_topology);
|
||||
aff_mat = tm_build_affinity_mat(comm_pattern,num_procs_in_node);
|
||||
comm_tree = tm_build_tree_from_topology(tm_topology,aff_mat, NULL, NULL);
|
||||
sol = tm_compute_mapping(tm_topology, comm_tree);
|
||||
|
||||
tm_opt_topology = optimize_topology(tm_topology);
|
||||
comm_tree = build_tree_from_topology(tm_opt_topology, comm_pattern, num_procs_in_node, NULL, NULL);
|
||||
map_topology_simple(tm_opt_topology, comm_tree, matching, num_procs_in_node, NULL);
|
||||
k = (int *)calloc(sol->k_length, sizeof(int));
|
||||
for(idx = 0 ; idx < sol->k_length ; idx++)
|
||||
k[idx] = sol->k[idx][0];
|
||||
|
||||
#ifdef __DEBUG__
|
||||
dump_int_array("Matching:", "", matching, num_procs_in_node);
|
||||
fprintf(stdout,"====> nb levels : %i\n",tm_topology->nb_levels);
|
||||
dump_int_array("Rank permutation sigma/k : ", "", k, num_procs_in_node);
|
||||
assert(num_procs_in_node == sol->sigma_length);
|
||||
dump_int_array("Matching : ", "",sol->sigma, sol->sigma_length);
|
||||
#endif
|
||||
|
||||
free(aff_mat->sum_row);
|
||||
free(aff_mat);
|
||||
free(comm_pattern);
|
||||
for(i = 0; i < tm_topology->nb_levels; i++)
|
||||
free(tm_topology->node_id[i]);
|
||||
free(tm_topology->node_id);
|
||||
free(tm_topology->nb_nodes);
|
||||
free(tm_topology->arity);
|
||||
free(tm_topology);
|
||||
FREE_topology(tm_opt_topology);
|
||||
tm_free_solution(sol);
|
||||
tm_free_tree(comm_tree);
|
||||
tm_free_topology(tm_topology);
|
||||
}
|
||||
|
||||
/* Todo : Bcast + group creation */
|
||||
/* scatter the ranks */
|
||||
if (OMPI_SUCCESS != (err = localcomm->c_coll->coll_bcast(matching, num_procs_in_node,
|
||||
MPI_INT,0,localcomm,
|
||||
localcomm->c_coll->coll_bcast_module)))
|
||||
MPI_INT,0,localcomm,
|
||||
localcomm->c_coll->coll_bcast_module)))
|
||||
ERR_EXIT(err);
|
||||
|
||||
object = hwloc_get_obj_by_depth(opal_hwloc_topology,
|
||||
effective_depth, matching[ompi_process_info.my_local_rank]);
|
||||
if( NULL == object) goto fallback;
|
||||
hwloc_bitmap_copy(set, object->cpuset);
|
||||
hwloc_bitmap_singlify(set);
|
||||
err = hwloc_set_cpubind(opal_hwloc_topology,set,0);
|
||||
if( -1 == err) goto fallback;
|
||||
if ( 0 == rank )
|
||||
free(k);
|
||||
|
||||
/* Report new binding to ORTE/OPAL */
|
||||
/* hwloc_bitmap_list_asprintf(&orte_process_info.cpuset,set); */
|
||||
err = hwloc_bitmap_snprintf(set_as_string, 64, set);
|
||||
|
||||
#ifdef __DEBUG__
|
||||
fprintf(stdout,"Bitmap str size : %i\n", err);
|
||||
#endif
|
||||
|
||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
||||
kv.key = strdup(OPAL_PMIX_CPUSET);
|
||||
kv.type = OPAL_STRING;
|
||||
kv.data.string = strdup(set_as_string);
|
||||
|
||||
(void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv);
|
||||
OBJ_DESTRUCT(&kv);
|
||||
|
||||
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
|
||||
ompi_process_info.cpuset,set_as_string);
|
||||
OBJ_CONSTRUCT(&kv, opal_value_t);
|
||||
kv.key = strdup(OPAL_PMIX_LOCALITY);
|
||||
kv.type = OPAL_UINT16;
|
||||
kv.data.uint16 = locality;
|
||||
(void)opal_pmix.store_local((opal_process_name_t*)OMPI_PROC_MY_NAME, &kv);
|
||||
OBJ_DESTRUCT(&kv);
|
||||
|
||||
if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old,
|
||||
comm_old->c_local_group,
|
||||
newcomm))) {
|
||||
/* this needs to be optimized but will do for now */
|
||||
if (OMPI_SUCCESS != (err = ompi_comm_split(localcomm, 0, newrank, newcomm, false)))
|
||||
ERR_EXIT(err);
|
||||
} else {
|
||||
/* Attach the dist_graph to the newly created communicator */
|
||||
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
|
||||
(*newcomm)->c_topo = topo_module;
|
||||
(*newcomm)->c_topo->reorder = reorder;
|
||||
}
|
||||
/* end of TODO */
|
||||
|
||||
/* Attach the dist_graph to the newly created communicator */
|
||||
(*newcomm)->c_flags |= OMPI_COMM_DIST_GRAPH;
|
||||
(*newcomm)->c_topo = topo_module;
|
||||
(*newcomm)->c_topo->reorder = reorder;
|
||||
|
||||
free(matching);
|
||||
free(grank_to_lrank);
|
||||
free(lrank_to_grank);
|
||||
|
@ -2,13 +2,12 @@
|
||||
#include <stdio.h>
|
||||
#include "IntConstantInitializedVector.h"
|
||||
|
||||
|
||||
int intCIV_isInitialized(int_CIVector * v, int i)
|
||||
{
|
||||
if(v->top == 0)
|
||||
return 0;
|
||||
if(v->from[i] >= 0)
|
||||
if(v->from[i] < v->top && v->to[v->from[i]] == i)
|
||||
if(v->from[i] < v->top && v->to[v->from[i]] == i)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
@ -45,7 +44,7 @@ int intCIV_set(int_CIVector * v, int i, int val)
|
||||
v->top++;
|
||||
}
|
||||
v->vec[i] = val;
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intCIV_get(int_CIVector * v, int i)
|
||||
|
@ -12,5 +12,4 @@ void intCIV_exit(int_CIVector * v);
|
||||
int intCIV_set(int_CIVector * v, int i, int val);
|
||||
int intCIV_get(int_CIVector * v, int i);
|
||||
|
||||
|
||||
#endif /*INTEGER_CONSTANT_INITIALIZED_VECTOR*/
|
||||
|
174
ompi/mca/topo/treematch/treematch/PriorityQueue.c
Обычный файл
174
ompi/mca/topo/treematch/treematch/PriorityQueue.c
Обычный файл
@ -0,0 +1,174 @@
|
||||
#include <stdlib.h>
|
||||
#include "PriorityQueue.h"
|
||||
|
||||
/*
|
||||
This comparison function is used to sort elements in key descending order.
|
||||
*/
|
||||
int compfunc(const FiboNode * const, const FiboNode * const);
|
||||
|
||||
|
||||
|
||||
int compFunc(const FiboNode * const node1, const FiboNode * const node2)
|
||||
{
|
||||
return
|
||||
( ( ((QueueElement*)(node1))->key > ((QueueElement*)(node2))->key ) ? -1 : 1);
|
||||
}
|
||||
|
||||
int PQ_init(PriorityQueue * const q, int size)
|
||||
{
|
||||
int i;
|
||||
q->size = size;
|
||||
q->elements = malloc(sizeof(QueueElement *) * size);
|
||||
for(i=0; i < size; i++)
|
||||
q->elements[i]=NULL;
|
||||
return fiboTreeInit((FiboTree *)q, compFunc);
|
||||
}
|
||||
|
||||
void PQ_exit(PriorityQueue * const q)
|
||||
{
|
||||
|
||||
int i;
|
||||
for(i = 0; i < q->size; i++)
|
||||
{
|
||||
if(q->elements[i] != NULL)
|
||||
free(q->elements[i]);
|
||||
}
|
||||
if(q->elements != NULL)
|
||||
free(q->elements);
|
||||
fiboTreeExit((FiboTree *)q);
|
||||
}
|
||||
void PQ_free(PriorityQueue * const q)
|
||||
{
|
||||
int i;
|
||||
for(i = 0; i < q->size; i++)
|
||||
{
|
||||
if(q->elements[i] != NULL)
|
||||
free(q->elements[i]);
|
||||
}
|
||||
fiboTreeFree((FiboTree *)q);
|
||||
}
|
||||
|
||||
int PQ_isEmpty(PriorityQueue * const q)
|
||||
{
|
||||
FiboTree * tree = (FiboTree *)q;
|
||||
/* if the tree root is linked to itself then the tree is empty */
|
||||
if(&(tree->rootdat) == (tree->rootdat.linkdat.nextptr))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void PQ_insertElement(PriorityQueue * const q, QueueElement * const e)
|
||||
{
|
||||
if(e->value >= 0 && e->value < q->size)
|
||||
{
|
||||
fiboTreeAdd((FiboTree *)q, (FiboNode *)(e));
|
||||
q->elements[e->value] = e;
|
||||
e->isInQueue = 1;
|
||||
}
|
||||
}
|
||||
void PQ_deleteElement(PriorityQueue * const q, QueueElement * const e)
|
||||
{
|
||||
fiboTreeDel((FiboTree *)q, (FiboNode *)(e));
|
||||
q->elements[e->value] = NULL;
|
||||
e->isInQueue = 0;
|
||||
}
|
||||
|
||||
void PQ_insert(PriorityQueue * const q, int val, double key)
|
||||
{
|
||||
if( val >= 0 && val < q->size)
|
||||
{
|
||||
QueueElement * e = malloc(sizeof(QueueElement));
|
||||
e->value = val;
|
||||
e->key = key;
|
||||
PQ_insertElement(q, e);
|
||||
}
|
||||
}
|
||||
|
||||
void PQ_delete(PriorityQueue * const q, int val)
|
||||
{
|
||||
QueueElement * e = q->elements[val];
|
||||
PQ_deleteElement(q, e);
|
||||
free(e);
|
||||
}
|
||||
|
||||
QueueElement * PQ_findMaxElement(PriorityQueue * const q)
|
||||
{
|
||||
QueueElement * e = (QueueElement *)(fiboTreeMin((FiboTree *)q));
|
||||
return e;
|
||||
}
|
||||
QueueElement * PQ_deleteMaxElement(PriorityQueue * const q)
|
||||
{
|
||||
QueueElement * e = (QueueElement *)(fiboTreeMin((FiboTree *)q));
|
||||
if(e != NULL)
|
||||
{
|
||||
PQ_deleteElement(q, e);
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
double PQ_findMaxKey(PriorityQueue * const q)
|
||||
{
|
||||
QueueElement * e = PQ_findMaxElement(q);
|
||||
if(e!=NULL)
|
||||
return e->key;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PQ_deleteMax(PriorityQueue * const q)
|
||||
{
|
||||
QueueElement * e = PQ_deleteMaxElement(q);
|
||||
int res = -1;
|
||||
if(e != NULL)
|
||||
res = e->value;
|
||||
free(e);
|
||||
return res;
|
||||
}
|
||||
|
||||
void PQ_increaseElementKey(PriorityQueue * const q, QueueElement * const e, double i)
|
||||
{
|
||||
if(e->isInQueue)
|
||||
{
|
||||
PQ_deleteElement(q, e);
|
||||
e->key += i;
|
||||
PQ_insertElement(q, e);
|
||||
}
|
||||
}
|
||||
void PQ_decreaseElementKey(PriorityQueue * const q, QueueElement * const e, double i)
|
||||
{
|
||||
if(e->isInQueue)
|
||||
{
|
||||
PQ_deleteElement(q, e);
|
||||
e->key -= i;
|
||||
PQ_insertElement(q, e);
|
||||
}
|
||||
}
|
||||
void PQ_adjustElementKey(PriorityQueue * const q, QueueElement * const e, double i)
|
||||
{
|
||||
if(e->isInQueue)
|
||||
{
|
||||
PQ_deleteElement(q, e);
|
||||
e->key = i;
|
||||
PQ_insertElement(q, e);
|
||||
}
|
||||
}
|
||||
|
||||
void PQ_increaseKey(PriorityQueue * const q, int val, double i)
|
||||
{
|
||||
QueueElement * e = q->elements[val];
|
||||
if(e != NULL)
|
||||
PQ_increaseElementKey(q, e, i);
|
||||
}
|
||||
|
||||
void PQ_decreaseKey(PriorityQueue * const q, int val, double i)
|
||||
{
|
||||
QueueElement * e = q->elements[val];
|
||||
if(e != NULL)
|
||||
PQ_decreaseElementKey(q, e, i);
|
||||
}
|
||||
|
||||
void PQ_adjustKey(PriorityQueue * const q, int val, double i)
|
||||
{
|
||||
QueueElement * e = q->elements[val];
|
||||
if(e != NULL)
|
||||
PQ_adjustElementKey(q, e, i);
|
||||
}
|
108
ompi/mca/topo/treematch/treematch/PriorityQueue.h
Обычный файл
108
ompi/mca/topo/treematch/treematch/PriorityQueue.h
Обычный файл
@ -0,0 +1,108 @@
|
||||
#ifndef PRIORITY_QUEUE
|
||||
#define PRIORITY_QUEUE
|
||||
|
||||
#include "fibo.h"
|
||||
|
||||
/*
|
||||
This is the struct for our elements in a PriorityQueue.
|
||||
The node is at first place so we only have to use a cast to switch between QueueElement's pointer and Fibonode's pointer.
|
||||
*/
|
||||
typedef struct QueueElement_
|
||||
{
|
||||
FiboNode node; /*the node used to insert the element in a FiboTree*/
|
||||
double key; /*the key of the element, elements are sorted in a descending order according to their key*/
|
||||
int value;
|
||||
int isInQueue;
|
||||
} QueueElement;
|
||||
|
||||
typedef struct PriorityQueue_
|
||||
{
|
||||
FiboTree tree;
|
||||
QueueElement ** elements; /*a vector of element with their value as key so we can easily retreive an element from its value */
|
||||
int size; /*the size allocated to the elements vector*/
|
||||
} PriorityQueue;
|
||||
|
||||
|
||||
/*
|
||||
PQ_init initiates a PriorityQueue with a size given in argument and sets compFunc as comparison function. Note that you have to allocate memory to the PriorityQueue pointer before calling this function.
|
||||
Returns :
|
||||
0 if success
|
||||
!0 if failed
|
||||
|
||||
PQ_free simply empties the PriorityQueue but does not free the memory used by its elements.
|
||||
PQ_exit destroys the PriorityQueue without freeing elements. The PriorityQueue is no longer usable without using PQ_init again.
|
||||
Note that the PriorityQueue pointer is not deallocated.
|
||||
*/
|
||||
int PQ_init(PriorityQueue * const, int size);
|
||||
void PQ_free(PriorityQueue * const);
|
||||
void PQ_exit(PriorityQueue * const);
|
||||
|
||||
/*
|
||||
PQ_isEmpty returns 1 if the PriorityQueue is empty, 0 otherwise.
|
||||
*/
|
||||
int PQ_isEmpty(PriorityQueue * const);
|
||||
|
||||
/*
|
||||
PQ_insertElement inserts the given QueueElement in the given PriorityQueue
|
||||
*/
|
||||
void PQ_insertElement(PriorityQueue * const, QueueElement * const);
|
||||
/*
|
||||
PQ_deleteElement delete the element given in argument from the PriorityQueue.
|
||||
*/
|
||||
void PQ_deleteElement(PriorityQueue * const, QueueElement * const);
|
||||
|
||||
/*
|
||||
PQ_insert inserts an element in the PriorityQueue with the value and key given in argument.
|
||||
*/
|
||||
void PQ_insert(PriorityQueue * const, int val, double key);
|
||||
/*
|
||||
PQ_delete removes the first element found with the value given in argument and frees it.
|
||||
*/
|
||||
void PQ_delete(PriorityQueue * const, int val);
|
||||
|
||||
|
||||
/*
|
||||
PQ_findMaxElement returns the QueueElement with the greatest key in the given PriorityQueue
|
||||
*/
|
||||
QueueElement * PQ_findMaxElement(PriorityQueue * const);
|
||||
/*
|
||||
PQ_deleteMaxElement returns the QueueElement with the geatest key in the given PriorityQueue and removes it from the queue.
|
||||
*/
|
||||
QueueElement * PQ_deleteMaxElement(PriorityQueue * const);
|
||||
|
||||
/*
|
||||
PQ_findMax returns the key of the element with the geatest key in the given PriorityQueue
|
||||
*/
|
||||
double PQ_findMaxKey(PriorityQueue * const);
|
||||
/*
|
||||
PQ_deleteMax returns the value of the element with the greatest key in the given PriorityQueue and removes it from the queue.
|
||||
*/
|
||||
int PQ_deleteMax(PriorityQueue * const);
|
||||
|
||||
/*
|
||||
PQ_increaseElementKey adds the value of i to the key of the given QueueElement
|
||||
*/
|
||||
void PQ_increaseElementKey(PriorityQueue * const, QueueElement * const, double i);
|
||||
/*
|
||||
PQ_decreaseElementKey substracts the value of i from the key of the given QueueElement
|
||||
*/
|
||||
void PQ_decreaseElementKey(PriorityQueue * const, QueueElement * const, double i);
|
||||
/*
|
||||
PQ_adjustElementKey sets to i the key of the given QueueElement.
|
||||
*/
|
||||
void PQ_adjustElementKey(PriorityQueue * const, QueueElement * const, double i);
|
||||
|
||||
/*
|
||||
PQ_increaseKey adds i to the key of the first element found with a value equal to val in the PriorityQueue.
|
||||
*/
|
||||
void PQ_increaseKey(PriorityQueue * const, int val, double i);
|
||||
/*
|
||||
PQ_decreaseKey substracts i from the key of the first element found with a value equal to val in the PriorityQueue.
|
||||
*/
|
||||
void PQ_decreaseKey(PriorityQueue * const, int val, double i);
|
||||
/*
|
||||
PQ_adjustKey sets to i the key of the first element found with a value equal to val in the PriorityQueue.
|
||||
*/
|
||||
void PQ_adjustKey(PriorityQueue * const, int val, double i);
|
||||
|
||||
#endif /*PRIORITY_QUEUE*/
|
372
ompi/mca/topo/treematch/treematch/fibo.c
Обычный файл
372
ompi/mca/topo/treematch/treematch/fibo.c
Обычный файл
@ -0,0 +1,372 @@
|
||||
/* Copyright 2010 IPB, INRIA & CNRS
|
||||
**
|
||||
** This file originally comes from the Scotch software package for
|
||||
** static mapping, graph partitioning and sparse matrix ordering.
|
||||
**
|
||||
** This software is governed by the CeCILL-B license under French law
|
||||
** and abiding by the rules of distribution of free software. You can
|
||||
** use, modify and/or redistribute the software under the terms of the
|
||||
** CeCILL-B license as circulated by CEA, CNRS and INRIA at the following
|
||||
** URL: "http://www.cecill.info".
|
||||
**
|
||||
** As a counterpart to the access to the source code and rights to copy,
|
||||
** modify and redistribute granted by the license, users are provided
|
||||
** only with a limited warranty and the software's author, the holder of
|
||||
** the economic rights, and the successive licensors have only limited
|
||||
** liability.
|
||||
**
|
||||
** In this respect, the user's attention is drawn to the risks associated
|
||||
** with loading, using, modifying and/or developing or reproducing the
|
||||
** software by the user in light of its specific status of free software,
|
||||
** that may mean that it is complicated to manipulate, and that also
|
||||
** therefore means that it is reserved for developers and experienced
|
||||
** professionals having in-depth computer knowledge. Users are therefore
|
||||
** encouraged to load and test the software's suitability as regards
|
||||
** their requirements in conditions enabling the security of their
|
||||
** systems and/or data to be ensured and, more generally, to use and
|
||||
** operate it in the same conditions as regards security.
|
||||
**
|
||||
** The fact that you are presently reading this means that you have had
|
||||
** knowledge of the CeCILL-B license and that you accept its terms.
|
||||
*/
|
||||
/************************************************************/
|
||||
/** **/
|
||||
/** NAME : fibo.c **/
|
||||
/** **/
|
||||
/** AUTHOR : Francois PELLEGRINI **/
|
||||
/** **/
|
||||
/** FUNCTION : This module handles Fibonacci trees. **/
|
||||
/** **/
|
||||
/** DATES : # Version 1.0 : from : 01 may 2010 **/
|
||||
/** to 12 may 2010 **/
|
||||
/** **/
|
||||
/************************************************************/
|
||||
|
||||
/*
|
||||
** The defines and includes.
|
||||
*/
|
||||
|
||||
#define FIBO
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#include <stdio.h>
|
||||
#include "fibo.h"
|
||||
|
||||
/* Helper macros which can be redefined at compile time. */
|
||||
|
||||
#ifndef INT
|
||||
#define INT int /* "long long" can be used on 64-bit systems */
|
||||
#endif /* INT */
|
||||
|
||||
#ifndef errorPrint
|
||||
#define errorPrint(s) fprintf (stderr, s)
|
||||
#endif /* errorPrint */
|
||||
|
||||
#ifndef memAlloc
|
||||
#define memAlloc malloc
|
||||
#define memSet memset
|
||||
#define memFree free
|
||||
#endif /* memAlloc */
|
||||
|
||||
/*********************************************/
|
||||
/* */
|
||||
/* These routines deal with Fibonacci trees. */
|
||||
/* */
|
||||
/*********************************************/
|
||||
|
||||
/* This routine initializes a Fibonacci
|
||||
** tree structure.
|
||||
** It returns:
|
||||
** - 0 : in case of success.
|
||||
** - !0 : on error.
|
||||
*/
|
||||
|
||||
int
|
||||
fiboTreeInit (
|
||||
FiboTree * const treeptr,
|
||||
int (* cmpfptr) (const FiboNode * const, const FiboNode * const))
|
||||
{
|
||||
if ((treeptr->degrtab = (FiboNode **) memAlloc ((sizeof (INT) << 3) * sizeof (FiboNode *))) == NULL) /* As many cells as there are bits in an INT */
|
||||
return (1);
|
||||
|
||||
memSet (treeptr->degrtab, 0, (sizeof (INT) << 3) * sizeof (FiboNode *)); /* Make degree array ready for consolidation: all cells set to NULL */
|
||||
|
||||
treeptr->rootdat.linkdat.prevptr = /* Link root node to itself */
|
||||
treeptr->rootdat.linkdat.nextptr = &treeptr->rootdat;
|
||||
treeptr->cmpfptr = cmpfptr;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* This routine flushes the contents of
|
||||
** the given Fibonacci tree.
|
||||
** It returns:
|
||||
** - VOID : in all cases.
|
||||
*/
|
||||
|
||||
void
|
||||
fiboTreeExit (
|
||||
FiboTree * const treeptr)
|
||||
{
|
||||
if (treeptr->degrtab != NULL)
|
||||
memFree (treeptr->degrtab);
|
||||
}
|
||||
|
||||
/* This routine flushes the contents of
|
||||
** the given Fibonacci tree. It does not
|
||||
** free any of its contents, but instead
|
||||
** makes the tree structure look empty again.
|
||||
** It returns:
|
||||
** - VOID : in all cases.
|
||||
*/
|
||||
|
||||
void
|
||||
fiboTreeFree (
|
||||
FiboTree * const treeptr)
|
||||
{
|
||||
treeptr->rootdat.linkdat.prevptr = /* Link root node to itself */
|
||||
treeptr->rootdat.linkdat.nextptr = &treeptr->rootdat;
|
||||
}
|
||||
|
||||
/* This routine perform the consolidation
|
||||
** of roots per degree. It returns the best
|
||||
** element found because this element is not
|
||||
** recorded in the data structure itself.
|
||||
** It returns:
|
||||
** - !NULL : pointer to best element found.
|
||||
** - NULL : Fibonacci tree is empty.
|
||||
*/
|
||||
|
||||
FiboNode *
|
||||
fiboTreeConsolidate (
|
||||
FiboTree * const treeptr)
|
||||
{
|
||||
FiboNode ** restrict degrtab;
|
||||
int degrmax;
|
||||
int degrval;
|
||||
FiboNode * rootptr;
|
||||
FiboNode * nextptr;
|
||||
FiboNode * bestptr;
|
||||
|
||||
degrtab = treeptr->degrtab;
|
||||
|
||||
for (rootptr = treeptr->rootdat.linkdat.nextptr, nextptr = rootptr->linkdat.nextptr, degrmax = 0; /* For all roots in root list */
|
||||
rootptr != &treeptr->rootdat; ) {
|
||||
degrval = rootptr->deflval >> 1; /* Get degree, getting rid of flag part */
|
||||
#ifdef FIBO_DEBUG
|
||||
if (degrval >= (sizeof (INT) << 3))
|
||||
errorPrint ("fiboTreeConsolidate: invalid node degree");
|
||||
#endif /* FIBO_DEBUG */
|
||||
if (degrtab[degrval] == NULL) { /* If no tree with same degree already found */
|
||||
if (degrval > degrmax) /* Record highest degree found */
|
||||
degrmax = degrval;
|
||||
|
||||
degrtab[degrval] = rootptr; /* Record tree as first tree with this degree */
|
||||
rootptr = nextptr; /* Process next root in list during next iteration */
|
||||
nextptr = rootptr->linkdat.nextptr;
|
||||
}
|
||||
else {
|
||||
FiboNode * oldrptr; /* Root which will no longer be a root */
|
||||
FiboNode * chldptr;
|
||||
|
||||
oldrptr = degrtab[degrval]; /* Assume old root is worse */
|
||||
if (treeptr->cmpfptr (oldrptr, rootptr) <= 0) { /* If old root is still better */
|
||||
oldrptr = rootptr; /* This root will be be linked to it */
|
||||
rootptr = degrtab[degrval]; /* We will go on processing this root */
|
||||
}
|
||||
|
||||
degrtab[degrval] = NULL; /* Remaining root changes degree so leaves this cell */
|
||||
fiboTreeUnlink (oldrptr); /* Old root is no longer a root */
|
||||
oldrptr->deflval &= ~1; /* Whatever old root flag was, it is reset to 0 */
|
||||
oldrptr->pareptr = rootptr; /* Remaining root is now father of old root */
|
||||
|
||||
chldptr = rootptr->chldptr; /* Get first child of remaining root */
|
||||
if (chldptr != NULL) { /* If remaining root had already some children, link old root with them */
|
||||
rootptr->deflval += 2; /* Increase degree by 1, that is, by 2 with left shift in deflval */
|
||||
fiboTreeLinkAfter (chldptr, oldrptr);
|
||||
}
|
||||
else { /* Old root becomes first child of remaining root */
|
||||
rootptr->deflval = 2; /* Real degree set to 1, and flag set to 0 */
|
||||
rootptr->chldptr = oldrptr;
|
||||
oldrptr->linkdat.prevptr = /* Chain old root to oneself as only child */
|
||||
oldrptr->linkdat.nextptr = oldrptr;
|
||||
}
|
||||
} /* Process again remaining root as its degree has changed */
|
||||
}
|
||||
|
||||
bestptr = NULL;
|
||||
for (degrval = 0; degrval <= degrmax; degrval ++) {
|
||||
if (degrtab[degrval] != NULL) { /* If some tree is found */
|
||||
bestptr = degrtab[degrval]; /* Record it as potential best */
|
||||
degrtab[degrval] = NULL; /* Clean-up used part of array */
|
||||
degrval ++; /* Go on at next cell in next loop */
|
||||
break;
|
||||
}
|
||||
}
|
||||
for ( ; degrval <= degrmax; degrval ++) { /* For remaining roots once a potential best root has been found */
|
||||
if (degrtab[degrval] != NULL) {
|
||||
if (treeptr->cmpfptr (degrtab[degrval], bestptr) < 0) /* If new root is better */
|
||||
bestptr = degrtab[degrval]; /* Record new root as best root */
|
||||
degrtab[degrval] = NULL; /* Clean-up used part of array */
|
||||
}
|
||||
}
|
||||
|
||||
return (bestptr);
|
||||
}
|
||||
|
||||
/* This routine returns the node of minimum
|
||||
** key in the given tree. The node is searched
|
||||
** for each time this routine is called, so this
|
||||
** information should be recorded if needed.
|
||||
** This is the non-macro version, for testing
|
||||
** and setting up breakpoints.
|
||||
** It returns:
|
||||
** - !NULL : pointer to best element found.
|
||||
** - NULL : Fibonacci tree is empty.
|
||||
*/
|
||||
|
||||
#ifndef fiboTreeMin
|
||||
|
||||
FiboNode *
|
||||
fiboTreeMin (
|
||||
FiboTree * const treeptr)
|
||||
{
|
||||
FiboNode * bestptr;
|
||||
|
||||
bestptr = fiboTreeMinMacro (treeptr);
|
||||
|
||||
#ifdef FIBO_DEBUG
|
||||
fiboTreeCheck (treeptr);
|
||||
#endif /* FIBO_DEBUG */
|
||||
|
||||
return (bestptr);
|
||||
}
|
||||
|
||||
#endif /* fiboTreeMin */
|
||||
|
||||
/* This routine adds the given node to the
|
||||
** given tree. This is the non-macro version,
|
||||
** for testing and setting up breakpoints.
|
||||
** It returns:
|
||||
** - void : in all cases.
|
||||
*/
|
||||
|
||||
#ifndef fiboTreeAdd
|
||||
|
||||
void
|
||||
fiboTreeAdd (
|
||||
FiboTree * const treeptr,
|
||||
FiboNode * const nodeptr)
|
||||
{
|
||||
fiboTreeAddMacro (treeptr, nodeptr);
|
||||
|
||||
#ifdef FIBO_DEBUG
|
||||
fiboTreeCheck (treeptr);
|
||||
#endif /* FIBO_DEBUG */
|
||||
}
|
||||
|
||||
#endif /* fiboTreeAdd */
|
||||
|
||||
/* This routine deletes the given node from
|
||||
** the given tree, whatever ths node is (root
|
||||
** or non root). This is the non-macro version,
|
||||
** for testing and setting up breakpoints.
|
||||
** It returns:
|
||||
** - void : in all cases.
|
||||
*/
|
||||
|
||||
#ifndef fiboTreeDel
|
||||
|
||||
void
|
||||
fiboTreeDel (
|
||||
FiboTree * const treeptr,
|
||||
FiboNode * const nodeptr)
|
||||
{
|
||||
fiboTreeDelMacro (treeptr, nodeptr);
|
||||
|
||||
#ifdef FIBO_DEBUG
|
||||
nodeptr->pareptr =
|
||||
nodeptr->chldptr =
|
||||
nodeptr->linkdat.prevptr =
|
||||
nodeptr->linkdat.nextptr = NULL;
|
||||
|
||||
fiboTreeCheck (treeptr);
|
||||
#endif /* FIBO_DEBUG */
|
||||
}
|
||||
|
||||
#endif /* fiboTreeDel */
|
||||
|
||||
/* This routine checks the consistency of the
|
||||
** given linked list.
|
||||
** It returns:
|
||||
** - !NULL : pointer to the vertex.
|
||||
** - NULL : if no such vertex available.
|
||||
*/
|
||||
|
||||
#ifdef FIBO_DEBUG
|
||||
|
||||
static
|
||||
int
|
||||
fiboTreeCheck2 (
|
||||
const FiboNode * const nodeptr)
|
||||
{
|
||||
FiboNode * chldptr;
|
||||
int degrval;
|
||||
|
||||
degrval = 0;
|
||||
chldptr = nodeptr->chldptr;
|
||||
if (chldptr != NULL) {
|
||||
do {
|
||||
if (chldptr->linkdat.nextptr->linkdat.prevptr != chldptr) {
|
||||
errorPrint ("fiboTreeCheck: bad child linked list");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (chldptr->pareptr != nodeptr) {
|
||||
errorPrint ("fiboTreeCheck: bad child parent");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (fiboTreeCheck2 (chldptr) != 0)
|
||||
return (1);
|
||||
|
||||
degrval ++;
|
||||
chldptr = chldptr->linkdat.nextptr;
|
||||
} while (chldptr != nodeptr->chldptr);
|
||||
}
|
||||
|
||||
if (degrval != (nodeptr->deflval >> 1)) { /* Real node degree is obtained by discarding lowest bit */
|
||||
errorPrint ("fiboTreeCheck2: invalid child information");
|
||||
return (1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
fiboTreeCheck (
|
||||
const FiboTree * const treeptr)
|
||||
{
|
||||
FiboNode * nodeptr;
|
||||
|
||||
for (nodeptr = treeptr->rootdat.linkdat.nextptr;
|
||||
nodeptr != &treeptr->rootdat; nodeptr = nodeptr->linkdat.nextptr) {
|
||||
if (nodeptr->linkdat.nextptr->linkdat.prevptr != nodeptr) {
|
||||
errorPrint ("fiboTreeCheck: bad root linked list");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (nodeptr->pareptr != NULL) {
|
||||
errorPrint ("fiboTreeCheck: bad root parent");
|
||||
return (1);
|
||||
}
|
||||
|
||||
if (fiboTreeCheck2 (nodeptr) != 0)
|
||||
return (1);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#endif /* FIBO_DEBUG */
|
205
ompi/mca/topo/treematch/treematch/fibo.h
Обычный файл
205
ompi/mca/topo/treematch/treematch/fibo.h
Обычный файл
@ -0,0 +1,205 @@
|
||||
/* Copyright 2010 IPB, INRIA & CNRS
|
||||
**
|
||||
** This file originally comes from the Scotch software package for
|
||||
** static mapping, graph partitioning and sparse matrix ordering.
|
||||
**
|
||||
** This software is governed by the CeCILL-B license under French law
|
||||
** and abiding by the rules of distribution of free software. You can
|
||||
** use, modify and/or redistribute the software under the terms of the
|
||||
** CeCILL-B license as circulated by CEA, CNRS and INRIA at the following
|
||||
** URL: "http://www.cecill.info".
|
||||
**
|
||||
** As a counterpart to the access to the source code and rights to copy,
|
||||
** modify and redistribute granted by the license, users are provided
|
||||
** only with a limited warranty and the software's author, the holder of
|
||||
** the economic rights, and the successive licensors have only limited
|
||||
** liability.
|
||||
**
|
||||
** In this respect, the user's attention is drawn to the risks associated
|
||||
** with loading, using, modifying and/or developing or reproducing the
|
||||
** software by the user in light of its specific status of free software,
|
||||
** that may mean that it is complicated to manipulate, and that also
|
||||
** therefore means that it is reserved for developers and experienced
|
||||
** professionals having in-depth computer knowledge. Users are therefore
|
||||
** encouraged to load and test the software's suitability as regards
|
||||
** their requirements in conditions enabling the security of their
|
||||
** systems and/or data to be ensured and, more generally, to use and
|
||||
** operate it in the same conditions as regards security.
|
||||
**
|
||||
** The fact that you are presently reading this means that you have had
|
||||
** knowledge of the CeCILL-B license and that you accept its terms.
|
||||
*/
|
||||
/************************************************************/
|
||||
/** **/
|
||||
/** NAME : fibo.h **/
|
||||
/** **/
|
||||
/** AUTHOR : Francois PELLEGRINI **/
|
||||
/** **/
|
||||
/** FUNCTION : This module contains the definitions of **/
|
||||
/** the generic Fibonacci trees. **/
|
||||
/** **/
|
||||
/** DATES : # Version 1.0 : from : 01 may 2010 **/
|
||||
/** to 12 may 2010 **/
|
||||
/** **/
|
||||
/** NOTES : # Since this module has originally been **/
|
||||
/** designed as a gain keeping data **/
|
||||
/** structure for local optimization **/
|
||||
/** algorithms, the computation of the **/
|
||||
/** best node is only done when actually **/
|
||||
/** searching for it. **/
|
||||
/** This is most useful when many **/
|
||||
/** insertions and deletions can take **/
|
||||
/** place in the mean time. This is why **/
|
||||
/** this data structure does not keep **/
|
||||
/** track of the best node, unlike most **/
|
||||
/** implementations do. **/
|
||||
/** **/
|
||||
/************************************************************/
|
||||
|
||||
/*
|
||||
** The type and structure definitions.
|
||||
*/
|
||||
|
||||
/* The doubly linked list structure. */
|
||||
|
||||
typedef struct FiboLink_ {
|
||||
struct FiboNode_ * prevptr; /*+ Pointer to previous sibling element +*/
|
||||
struct FiboNode_ * nextptr; /*+ Pointer to next sibling element +*/
|
||||
} FiboLink;
|
||||
|
||||
/* The tree node data structure. The deflval
|
||||
variable merges degree and flag variables.
|
||||
The degree of a node is smaller than
|
||||
"bitsizeof (INT)", so it can hold on an
|
||||
"int". The flag value is stored in the
|
||||
lowest bit of the value. */
|
||||
|
||||
|
||||
typedef struct FiboNode_ {
|
||||
struct FiboNode_ * pareptr; /*+ Pointer to parent element, if any +*/
|
||||
struct FiboNode_ * chldptr; /*+ Pointer to first child element, if any +*/
|
||||
FiboLink linkdat; /*+ Pointers to sibling elements +*/
|
||||
int deflval; /*+ Lowest bit: flag value; other bits: degree value +*/
|
||||
} FiboNode;
|
||||
|
||||
/* The tree data structure. The fake dummy node aims
|
||||
at handling root node insertion without any test.
|
||||
This is important as many insertions have to be
|
||||
performed. */
|
||||
|
||||
typedef struct FiboTree_ {
|
||||
FiboNode rootdat; /*+ Dummy node for fast root insertion +*/
|
||||
FiboNode ** restrict degrtab; /*+ Consolidation array of size "bitsizeof (INT)" +*/
|
||||
int (* cmpfptr) (const FiboNode * const, const FiboNode * const); /*+ Comparison routine +*/
|
||||
} FiboTree;
|
||||
|
||||
/*
|
||||
** The marco definitions.
|
||||
*/
|
||||
|
||||
/* This is the core of the module. All of
|
||||
the algorithms have been de-recursived
|
||||
and written as macros. */
|
||||
|
||||
#define fiboTreeLinkAfter(o,n) do { \
|
||||
FiboNode * nextptr; \
|
||||
nextptr = (o)->linkdat.nextptr; \
|
||||
(n)->linkdat.nextptr = nextptr; \
|
||||
(n)->linkdat.prevptr = (o); \
|
||||
nextptr->linkdat.prevptr = (n); \
|
||||
(o)->linkdat.nextptr = (n); \
|
||||
} while (0)
|
||||
|
||||
#define fiboTreeUnlink(n) do { \
|
||||
(n)->linkdat.prevptr->linkdat.nextptr = (n)->linkdat.nextptr; \
|
||||
(n)->linkdat.nextptr->linkdat.prevptr = (n)->linkdat.prevptr; \
|
||||
} while (0)
|
||||
|
||||
#define fiboTreeAddMacro(t,n) do { \
|
||||
(n)->pareptr = NULL; \
|
||||
(n)->chldptr = NULL; \
|
||||
(n)->deflval = 0; \
|
||||
fiboTreeLinkAfter (&((t)->rootdat), (n)); \
|
||||
} while (0)
|
||||
|
||||
#define fiboTreeMinMacro(t) (fiboTreeConsolidate (t))
|
||||
|
||||
#define fiboTreeCutChildren(t,n) do { \
|
||||
FiboNode * chldptr; \
|
||||
chldptr = (n)->chldptr; \
|
||||
if (chldptr != NULL) { \
|
||||
FiboNode * cendptr; \
|
||||
cendptr = chldptr; \
|
||||
do { \
|
||||
FiboNode * nextptr; \
|
||||
nextptr = chldptr->linkdat.nextptr; \
|
||||
chldptr->pareptr = NULL; \
|
||||
fiboTreeLinkAfter (&((t)->rootdat), chldptr); \
|
||||
chldptr = nextptr; \
|
||||
} while (chldptr != cendptr); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define fiboTreeDelMacro(t,n) do { \
|
||||
FiboNode * pareptr; \
|
||||
FiboNode * rghtptr; \
|
||||
pareptr = (n)->pareptr; \
|
||||
fiboTreeUnlink (n); \
|
||||
fiboTreeCutChildren ((t), (n)); \
|
||||
if (pareptr == NULL) \
|
||||
break; \
|
||||
rghtptr = (n)->linkdat.nextptr; \
|
||||
while (1) { \
|
||||
FiboNode * gdpaptr; \
|
||||
int deflval; \
|
||||
deflval = pareptr->deflval - 2; \
|
||||
pareptr->deflval = deflval | 1; \
|
||||
gdpaptr = pareptr->pareptr; \
|
||||
pareptr->chldptr = (deflval <= 1) ? NULL : rghtptr; \
|
||||
if (((deflval & 1) == 0) || (gdpaptr == NULL)) \
|
||||
break; \
|
||||
rghtptr = pareptr->linkdat.nextptr; \
|
||||
fiboTreeUnlink (pareptr); \
|
||||
pareptr->pareptr = NULL; \
|
||||
fiboTreeLinkAfter (&((t)->rootdat), pareptr); \
|
||||
pareptr = gdpaptr; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
** The function prototypes.
|
||||
*/
|
||||
|
||||
/* This set of definitions allows the user
|
||||
to specify whether he prefers to use
|
||||
the fibonacci routines as macros or as
|
||||
regular functions, for instance for
|
||||
debugging. */
|
||||
|
||||
#define fiboTreeAdd fiboTreeAddMacro
|
||||
/* #define fiboTreeDel fiboTreeDelMacro */
|
||||
/* #define fiboTreeMin fiboTreeMinMacro */
|
||||
|
||||
#ifndef FIBO
|
||||
#define static
|
||||
#endif
|
||||
|
||||
int fiboTreeInit (FiboTree * const, int (*) (const FiboNode * const, const FiboNode * const));
|
||||
void fiboTreeExit (FiboTree * const);
|
||||
void fiboTreeFree (FiboTree * const);
|
||||
FiboNode * fiboTreeConsolidate (FiboTree * const);
|
||||
#ifndef fiboTreeAdd
|
||||
void fiboTreeAdd (FiboTree * const, FiboNode * const);
|
||||
#endif /* fiboTreeAdd */
|
||||
#ifndef fiboTreeDel
|
||||
void fiboTreeDel (FiboTree * const, FiboNode * const);
|
||||
#endif /* fiboTreeDel */
|
||||
#ifndef fiboTreeMin
|
||||
FiboNode * fiboTreeMin (FiboTree * const);
|
||||
#endif /* fiboTreeMin */
|
||||
#ifdef FIBO_DEBUG
|
||||
int fiboTreeCheck (const FiboTree * const);
|
||||
static int fiboTreeCheck2 (const FiboNode * const);
|
||||
#endif /* FIBO_DEBUG */
|
||||
|
||||
#undef static
|
339
ompi/mca/topo/treematch/treematch/k-partitioning.c
Обычный файл
339
ompi/mca/topo/treematch/treematch/k-partitioning.c
Обычный файл
@ -0,0 +1,339 @@
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "k-partitioning.h"
|
||||
#include "tm_mt.h"
|
||||
#include "tm_verbose.h"
|
||||
|
||||
void memory_allocation(PriorityQueue ** Q, PriorityQueue ** Qinst, double *** D, int n, int k);
|
||||
void initialization(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int k, int * const deficit, int * const surplus);
|
||||
void algo(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int * const deficit, int * const surplus);
|
||||
double nextGain(PriorityQueue * const Qpart, PriorityQueue * const Q, int * const deficit, int * const surplus);
|
||||
void balancing(int n, int deficit, int surplus, double ** const D, int * const part);
|
||||
void destruction(PriorityQueue * Qpart, PriorityQueue * Q, PriorityQueue * Qinst, double ** D, int n, int k);
|
||||
|
||||
void allocate_vertex2(int u, int *res, double **comm, int n, int *size, int max_size);
|
||||
double eval_cost2(int *,int,double **);
|
||||
int *kpartition_greedy2(int k, double **comm, int n, int nb_try_max, int *constraints, int nb_constraints);
|
||||
int* build_p_vector(double **comm, int n, int k, int greedy_trials, int * constraints, int nb_constraints);
|
||||
|
||||
int* kPartitioning(double ** comm, int n, int k, int * constraints, int nb_constraints, int greedy_trials)
|
||||
{
|
||||
/* ##### declarations & allocations ##### */
|
||||
|
||||
PriorityQueue Qpart, *Q = NULL, *Qinst = NULL;
|
||||
double **D = NULL;
|
||||
int deficit, surplus, *part = NULL;
|
||||
int real_n = n-nb_constraints;
|
||||
|
||||
part = build_p_vector(comm, n, k, greedy_trials, constraints, nb_constraints);
|
||||
|
||||
memory_allocation(&Q, &Qinst, &D, real_n, k);
|
||||
|
||||
/* ##### Initialization ##### */
|
||||
|
||||
initialization(part, comm, &Qpart, Q, Qinst, D, real_n, k, &deficit, &surplus);
|
||||
|
||||
/* ##### Main loop ##### */
|
||||
while((nextGain(&Qpart, Q, &deficit, &surplus))>0)
|
||||
{
|
||||
algo(part, comm, &Qpart, Q, Qinst, D, real_n, &deficit, &surplus);
|
||||
}
|
||||
|
||||
/* ##### Balancing the partition ##### */
|
||||
balancing(real_n, deficit, surplus, D, part); /*if partition isn't balanced we have to make one last move*/
|
||||
|
||||
/* ##### Memory deallocation ##### */
|
||||
destruction(&Qpart, Q, Qinst, D, real_n, k);
|
||||
|
||||
return part;
|
||||
}
|
||||
|
||||
void memory_allocation(PriorityQueue ** Q, PriorityQueue ** Qinst, double *** D, int n, int k)
|
||||
{
|
||||
int i;
|
||||
*Q = calloc(k, sizeof(PriorityQueue)); /*one Q for each partition*/
|
||||
*Qinst = calloc(n, sizeof(PriorityQueue)); /*one Qinst for each vertex*/
|
||||
*D = malloc(sizeof(double *) * n); /*D's size is n * k*/
|
||||
for(i=0; i < n; ++i)
|
||||
(*D)[i] = calloc(k, sizeof(double));
|
||||
}
|
||||
|
||||
void initialization(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int k, int * const deficit, int * const surplus)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
/* ##### PriorityQueue initializations ##### */
|
||||
/* We initialize Qpart with a size of k because it contains the subsets's indexes. */
|
||||
PQ_init(Qpart, k);
|
||||
|
||||
/* We initialize each Q[i] with a size of n because each vertex is in one of these queue at any time. */
|
||||
/* However we could set a size of (n/k)+1 as this is the maximum size of a subset when the partition is not balanced. */
|
||||
for(i=0; i<k; ++i)
|
||||
PQ_init(&Q[i], n);
|
||||
|
||||
/* We initialize each Qinst[i] with a size of k because fo each vertex i, Qinst[i] contains the D(i,j) values for j = 0...(k-1) */
|
||||
for(i=0; i<n; ++i)
|
||||
PQ_init(&Qinst[i], k);
|
||||
|
||||
/* ##### Computing the D(i,j) values ##### */
|
||||
for(i=0; i < n; ++i) /*for each vertex i*/
|
||||
{
|
||||
for(j=0; j < n; ++j) /*and for each vertex j*/
|
||||
{
|
||||
D[i][part[j]] += matrice[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
/* ##### Filling up the queues ##### */
|
||||
/* ### Qinst ### */
|
||||
for(i=0; i < n; ++i) /*for each vertex i*/
|
||||
for(j=0; j < k; ++j) /*and for each subset j*/
|
||||
PQ_insert(&Qinst[i], j, D[i][j]); /*we insert the corresponding D(i,j) value in Qinst[i]*/
|
||||
|
||||
/* ### Q ### */
|
||||
for(i=0; i<n; ++i) /*for each vertex i*/
|
||||
PQ_insert(&Q[part[i]], i, PQ_findMaxKey(&Qinst[i])-D[i][part[i]]); /*we insert in Q[part[i]] the vertex i with its highest possible gain*/
|
||||
|
||||
/* ### Qpart ### */
|
||||
for(i=0; i < k; ++i) /*for each subset i*/
|
||||
PQ_insert(Qpart, i, PQ_findMaxKey(&Q[i])); /*we insert it in Qpart with the highest possible gain by one of its vertex as key*/
|
||||
|
||||
|
||||
/* ##### Initialization of deficit/surplus ##### */
|
||||
*surplus = *deficit = 0;
|
||||
}
|
||||
|
||||
void algo(int * const part, double ** const matrice, PriorityQueue * const Qpart, PriorityQueue * const Q, PriorityQueue * const Qinst, double ** const D, int n, int * const deficit, int * const surplus)
|
||||
{
|
||||
int p,u,v,j;
|
||||
double d;
|
||||
if(*deficit == *surplus) /*if the current partition is balanced*/
|
||||
{
|
||||
p = PQ_deleteMax(Qpart); /*we get the subset with the highest possible gain in p and remove it from Qpart*/
|
||||
u = PQ_deleteMax(&Q[p]); /*then we get the vertex with this highest possible gain in u and remove it from Q[p] */
|
||||
*deficit = part[u]; /*p becomes the deficit */
|
||||
}
|
||||
else /*the current partition is not balanced*/
|
||||
{
|
||||
u = PQ_deleteMax(&Q[*surplus]); /*we get the vertex with the highest possible gain in surplus and remove it from Q[surplus] */
|
||||
PQ_delete(Qpart, part[u]); /*then we remove surplus from Qpart (note that u is from surplus so part[u] is surplus) */
|
||||
}
|
||||
d = PQ_findMaxKey(&Q[part[u]]); /*we get the next highest possible gain in part[u] (without taking u in account as we already removed it from Q[part[u])*/
|
||||
PQ_insert(Qpart, part[u], d); /*we put part[u] back in Qpart with its new highest possible gain*/
|
||||
j = PQ_deleteMax(&Qinst[u]); /*we get from Qinst[u] the subset in which we have to move u to get the highest gain.*/
|
||||
if ( j < 0){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Error Max element in priority queue negative!\n");
|
||||
exit(-1);
|
||||
}
|
||||
*surplus = j; /*this subset becomes surplus*/
|
||||
|
||||
for(v=0; v < n; ++v) /*we scan though all edges (u,v) */
|
||||
{
|
||||
j = part[u]; /*we set j to the starting subset */
|
||||
D[v][j]= D[v][j] - matrice[u][v]; /*we compute the new D[v, i] (here j has the value of the starting subset of u, that's why we say i) */
|
||||
PQ_adjustKey(&Qinst[v], j, D[v][j]); /*we update this gain in Qinst[v]*/
|
||||
j = *surplus; /*we put back the arrival subset in j*/
|
||||
D[v][j] = D[v][j] + matrice[u][v]; /*matrice[u][v]; we compute the new D[v, j]*/
|
||||
PQ_adjustKey(&Qinst[v], j, D[v][j]);/*we update this gain in Qinst[v]*/
|
||||
d = PQ_findMaxKey(&Qinst[v]) - D[v][part[v]]; /*we compute v's new highest possible gain*/
|
||||
PQ_adjustKey(&Q[part[v]], v, d); /*we update it in Q[p[v]]*/
|
||||
d = PQ_findMaxKey(&Q[part[v]]); /*we get the highest possible gain in v's subset*/
|
||||
PQ_adjustKey(Qpart, part[v], d); /*we update it in Qpart*/
|
||||
}
|
||||
part[u] = *surplus; /*we move u from i to j (here surplus has the value of j the arrival subset)*/
|
||||
|
||||
d = PQ_findMaxKey(&Qinst[u]) - D[u][part[u]]; /*we compute the new u's highest possible gain*/
|
||||
if(!PQ_isEmpty(&Qinst[u])) /*if at least one more move of u is possible*/
|
||||
PQ_insert(&Q[part[u]], u, d); /*we insert u in the Q queue of its new subset*/
|
||||
PQ_adjustKey(Qpart, part[u], d); /*we update the new highest possible gain in u's subset*/
|
||||
}
|
||||
|
||||
double nextGain(PriorityQueue * const Qpart, PriorityQueue * const Q, int * const deficit, int * const surplus)
|
||||
{
|
||||
double res;
|
||||
if(*deficit == *surplus) /*if the current partition is balanced*/
|
||||
res = PQ_findMaxKey(Qpart); /*we get the highest possible gain*/
|
||||
else /*the current partition is not balanced*/
|
||||
res = PQ_findMaxKey(&Q[*surplus]); /*we get the highest possible gain from surplus*/
|
||||
return res;
|
||||
}
|
||||
|
||||
void balancing(int n, int deficit, int surplus, double ** const D, int * const part)
|
||||
{
|
||||
if(surplus != deficit) /*if the current partition is not balanced*/
|
||||
{
|
||||
int i;
|
||||
PriorityQueue moves; /*we use a queue to store the possible moves from surplus to deficit*/
|
||||
PQ_init(&moves, n);
|
||||
for(i=0; i<n; ++i) /*for each vertex*/
|
||||
{
|
||||
if(part[i] == surplus) /*if i is from surplus*/
|
||||
PQ_insert(&moves, i, D[i][deficit]-D[i][surplus]); /*we insert i in moves with the gain we get from moving i from surplus to deficit as key */
|
||||
}
|
||||
part[PQ_deleteMax(&moves)] = deficit; /*we put the i from moves with the highest gain in deficit*/
|
||||
PQ_exit(&moves);
|
||||
}
|
||||
}
|
||||
|
||||
void destruction(PriorityQueue * Qpart, PriorityQueue * Q, PriorityQueue * Qinst, double ** D, int n, int k)
|
||||
{
|
||||
int i;
|
||||
PQ_exit(Qpart);
|
||||
for(i=0; i<k; ++i)
|
||||
PQ_exit(&Q[i]);
|
||||
free(Q);
|
||||
for(i=0; i<n; ++i)
|
||||
{
|
||||
PQ_exit(&Qinst[i]);
|
||||
}
|
||||
free(Qinst);
|
||||
|
||||
for(i=0; i<n; ++i)
|
||||
free(D[i]);
|
||||
free(D);
|
||||
}
|
||||
|
||||
|
||||
int *kpartition_greedy2(int k, double **comm, int n, int nb_try_max, int *constraints, int nb_constraints)
|
||||
{
|
||||
int *res = NULL, *best_res=NULL, *size = NULL;
|
||||
int i,j,nb_trials;
|
||||
int max_size;
|
||||
double cost, best_cost = -1;
|
||||
|
||||
for( nb_trials = 0 ; nb_trials < nb_try_max ; nb_trials++ ){
|
||||
res = (int *)malloc(sizeof(int)*n);
|
||||
for ( i = 0 ; i < n ; ++i )
|
||||
res[i] = -1;
|
||||
|
||||
size = (int *)calloc(k,sizeof(int));
|
||||
max_size = n/k;
|
||||
|
||||
/* put "dumb" vertices in the correct partition if there are any*/
|
||||
if (nb_constraints){ /*if there are at least one constraint*/
|
||||
int nb_real_nodes = n-nb_constraints; /*this is the number of "real" nodes by opposition to the dumb ones*/
|
||||
for(i=0; i<nb_constraints; ++i) /*for each constraint*/
|
||||
{
|
||||
int i_part = constraints[i]/max_size; /*we compute its partition*/
|
||||
res[nb_real_nodes+i] = i_part; /*and we set it in partition vector*/
|
||||
size[i_part]++; /*we update the partition's size*/
|
||||
}
|
||||
}
|
||||
|
||||
/* choose k initial "true" vertices at random and put them in a different partition */
|
||||
for ( i = 0 ; i < k ; ++i ){
|
||||
/* if the partition is full of dumb vertices go to next partition*/
|
||||
if(size[i] >= max_size)
|
||||
continue;
|
||||
/* find a vertex not already partitionned*/
|
||||
do{
|
||||
/* call the mersenne twister PRNG of tm_mt.c*/
|
||||
j = genrand_int32() % n;
|
||||
} while ( res[j] != -1 );
|
||||
/* allocate and update size of partition*/
|
||||
res[j] = i;
|
||||
/* printf("random: %d -> %d\n",j,i); */
|
||||
size[i]++;
|
||||
}
|
||||
|
||||
/* allocate each unallocated vertices in the partition that maximize the communication*/
|
||||
for( i = 0 ; i < n ; ++i )
|
||||
if( res[i] == -1)
|
||||
allocate_vertex2(i, res, comm, n-nb_constraints, size, max_size);
|
||||
|
||||
cost = eval_cost2(res,n-nb_constraints,comm);
|
||||
/*print_1D_tab(res,n);
|
||||
printf("cost=%.2f\n",cost);*/
|
||||
if((cost<best_cost) || (best_cost == -1)){
|
||||
best_cost=cost;
|
||||
free(best_res);
|
||||
best_res=res;
|
||||
}else
|
||||
free(res);
|
||||
|
||||
free(size);
|
||||
}
|
||||
|
||||
/*print_1D_tab(best_res,n);
|
||||
printf("best_cost=%.2f\n",best_cost);
|
||||
*/
|
||||
return best_res;
|
||||
}
|
||||
|
||||
void allocate_vertex2(int u, int *res, double **comm, int n, int *size, int max_size)
|
||||
{
|
||||
int i,best_part = -1;
|
||||
double cost, best_cost = -1;
|
||||
|
||||
/*printf("\n");
|
||||
print_1D_tab(res,n);*/
|
||||
for( i = 0 ; i < n ; ++i){
|
||||
if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
|
||||
cost = comm[u][i];
|
||||
if (( cost > best_cost)){
|
||||
best_cost = cost;
|
||||
best_part = res[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* printf("size[%d]: %d\n",best_part, size[best_part]);*/
|
||||
/* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
|
||||
|
||||
res[u] = best_part;
|
||||
size[best_part]++;
|
||||
}
|
||||
|
||||
double eval_cost2(int *partition, int n, double **comm)
|
||||
{
|
||||
double cost = 0;
|
||||
int i,j;
|
||||
|
||||
for( i = 0 ; i < n ; ++i )
|
||||
for( j = i+1 ; j < n ; ++j )
|
||||
if(partition[i] != partition[j])
|
||||
cost += comm[i][j];
|
||||
|
||||
return cost;
|
||||
}
|
||||
|
||||
int* build_p_vector(double **comm, int n, int k, int greedy_trials, int * constraints, int nb_constraints)
|
||||
{
|
||||
int * part = NULL;
|
||||
if(greedy_trials>0) /*if greedy_trials > 0 then we use kpartition_greedy with greedy_trials trials*/
|
||||
{
|
||||
part = kpartition_greedy2(k, comm, n, greedy_trials, constraints, nb_constraints);
|
||||
}
|
||||
else
|
||||
{
|
||||
int * size = calloc(k, sizeof(int));
|
||||
int i,j;
|
||||
int nodes_per_part = n/k;
|
||||
int nb_real_nodes = n-nb_constraints;
|
||||
part = malloc(sizeof(int) * n);
|
||||
for(i=0; i<nb_constraints; i++) /*for each constraints*/
|
||||
{
|
||||
int i_part = constraints[i]/nodes_per_part; /*we compute the partition where we have to put this constraint*/
|
||||
part[nb_real_nodes+i] = i_part;
|
||||
size[i_part]++;
|
||||
}
|
||||
j=0;
|
||||
/* now we have to fill the partitions with the "real" nodes */
|
||||
for(i=0; i<nb_real_nodes; i++) /*for each node*/
|
||||
{
|
||||
if(size[j] < nodes_per_part) /*if j partition isn't full*/
|
||||
{
|
||||
size[j]++;
|
||||
part[i] = j; /*then we put the node in this part*/
|
||||
}
|
||||
else /*otherwise we decrement i to get the same node in the next loop*/
|
||||
{
|
||||
i--;
|
||||
}
|
||||
j = (j+1)%k; /*and we change j to the next partition*/
|
||||
}
|
||||
free(size);
|
||||
}
|
||||
return part;
|
||||
}
|
20
ompi/mca/topo/treematch/treematch/k-partitioning.h
Обычный файл
20
ompi/mca/topo/treematch/treematch/k-partitioning.h
Обычный файл
@ -0,0 +1,20 @@
|
||||
#ifndef K_PARTITIONING
|
||||
#define K_PARTITIONING
|
||||
|
||||
#include "PriorityQueue.h"
|
||||
|
||||
/*
|
||||
kPartitioning : function to call the k-partitioning algorithm
|
||||
- comm : the communication matrix
|
||||
- n : the number of vertices (including dumb vertices)
|
||||
- k : the number of partitions
|
||||
- constraints : the list of constraints
|
||||
- nb_constraints : the number of constraints
|
||||
- greedy_trials : the number of trials to build the partition vector with kpartition_greedy
|
||||
- 0 : cyclic distribution of vertices
|
||||
- > 0 : use of kpartition_greedy with greedy_trials number of trials
|
||||
*/
|
||||
|
||||
int* kPartitioning(double ** comm, int n, int k, int * const constraints, int nb_constraints, int greedy_trials);
|
||||
|
||||
#endif /*K_PARTITIONING*/
|
@ -1,56 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
//#include "tm_hwloc.h"
|
||||
#include "tm_tree.h"
|
||||
#include "tm_mapping.h"
|
||||
#include "tm_timings.h"
|
||||
|
||||
|
||||
|
||||
int main(int argc, char**argv){;
|
||||
tree_t *comm_tree=NULL;
|
||||
double **comm,**arch;
|
||||
tm_topology_t *topology;
|
||||
int nb_processes,nb_cores;
|
||||
int *sol,*k;
|
||||
if(argc<3){
|
||||
fprintf(stderr,"Usage: %s <Architecture tgt> <communication partern file>\n",argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
topology=tgt_to_tm(argv[1],&arch);
|
||||
optimize_topology(&topology);
|
||||
nb_processes=build_comm(argv[2],&comm);
|
||||
sol=(int*)MALLOC(sizeof(int)*nb_processes);
|
||||
|
||||
nb_cores=nb_processing_units(topology);
|
||||
k=(int*)MALLOC(sizeof(int)*nb_cores);
|
||||
// TreeMatchMapping(nb_processes,nb_cores,comm,sol);
|
||||
|
||||
if(nb_processes>nb_cores){
|
||||
fprintf(stderr,"Error: to many processes (%d) for this topology (%d nodes)\n",nb_processes,nb_cores);
|
||||
exit(-1);
|
||||
}
|
||||
TIC;
|
||||
comm_tree=build_tree_from_topology(topology,comm,nb_processes,NULL,NULL);
|
||||
map_topology_simple(topology,comm_tree,sol,k);
|
||||
double duration=TOC;
|
||||
printf("mapping duration: %f\n",duration);
|
||||
printf("TreeMatch: ");
|
||||
print_sol_inv(nb_processes,sol,comm,arch);
|
||||
//print_1D_tab(k,nb_cores);
|
||||
// display_other_heuristics(topology,nb_processes,comm,arch);
|
||||
|
||||
//display_tab(arch,nb_cores);
|
||||
|
||||
FREE_topology(topology);
|
||||
//FREE_tree(comm_tree);
|
||||
FREE(sol);
|
||||
FREE(comm);
|
||||
FREE(arch);
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "tm_hwloc.h"
|
||||
#include "tm_tree.h"
|
||||
#include "tm_mapping.h"
|
||||
#include "tm_timings.h"
|
||||
|
||||
|
||||
|
||||
int main(int argc, char**argv){;
|
||||
tm_topology_t *topology;
|
||||
int nb_cores;
|
||||
double **arch;
|
||||
if(argc<2){
|
||||
fprintf(stderr,"Usage: %s <Architecture tgt>\n",argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
topology=tgt_to_tm(argv[1],&arch);
|
||||
nb_cores=nb_nodes(topology);
|
||||
|
||||
display_tab(arch,nb_cores);
|
||||
|
||||
FREE_topology(topology);
|
||||
FREE(arch);
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
@ -31,7 +31,7 @@ static int ilog2(int val)
|
||||
|
||||
static int verbose_level = ERROR;
|
||||
|
||||
bucket_list_t global_bl = {0};
|
||||
bucket_list_t global_bl;
|
||||
|
||||
int tab_cmp(const void*,const void*);
|
||||
int old_bucket_id(int,int,bucket_list_t);
|
||||
@ -47,12 +47,12 @@ void fill_buckets(bucket_list_t);
|
||||
int is_power_of_2(int);
|
||||
void partial_sort(bucket_list_t *,double **,int);
|
||||
void next_bucket_elem(bucket_list_t,int *,int *);
|
||||
int add_edge_3(tree_t *,tree_t *,int,int,int *);
|
||||
void FREE_bucket(bucket_t *);
|
||||
void FREE_tab_bucket(bucket_t **,int);
|
||||
void FREE_bucket_list(bucket_list_t);
|
||||
void partial_update_val (int nb_args, void **args);
|
||||
|
||||
int add_edge_3(tm_tree_t *,tm_tree_t *,int,int,int *);
|
||||
void free_bucket(bucket_t *);
|
||||
void free_tab_bucket(bucket_t **,int);
|
||||
void free_bucket_list(bucket_list_t);
|
||||
void partial_update_val (int nb_args, void **args, int thread_id);
|
||||
double bucket_grouping(tm_affinity_mat_t *,tm_tree_t *, tm_tree_t *, int ,int);
|
||||
int tab_cmp(const void* x1,const void* x2)
|
||||
{
|
||||
int *e1 = NULL,*e2 = NULL,i1,i2,j1,j2;
|
||||
@ -146,7 +146,7 @@ void check_bucket(bucket_t *b,double **tab,double inf, double sup)
|
||||
j = b->bucket[k].j;
|
||||
if((tab[i][j] < inf) || (tab[i][j] > sup)){
|
||||
if(verbose_level >= CRITICAL)
|
||||
printf("[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup);
|
||||
fprintf(stderr,"[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
@ -197,15 +197,20 @@ void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
|
||||
n = bucket_list->nb_buckets;
|
||||
size = N*N/n;
|
||||
/* display_bucket(bucket);*/
|
||||
bucket->bucket = (coord*)realloc(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
|
||||
if(verbose_level >= DEBUG){
|
||||
printf("Extending bucket %d (%p) from size %d to size %d!\n",
|
||||
id,bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
|
||||
}
|
||||
|
||||
bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
|
||||
bucket->bucket_len += size;
|
||||
|
||||
if(verbose_level >= DEBUG){
|
||||
printf("MALLOC/realloc: %d\n",id);
|
||||
printf("(%d,%d)\n",i,j);
|
||||
display_bucket(bucket);
|
||||
printf("\n");
|
||||
}
|
||||
/* if(verbose_level >= DEBUG){ */
|
||||
/* printf("MALLOC/realloc: %d\n",id); */
|
||||
/* printf("(%d,%d)\n",i,j); */
|
||||
/* display_bucket(bucket); */
|
||||
/* printf("\n"); */
|
||||
/* } */
|
||||
|
||||
}
|
||||
|
||||
@ -289,7 +294,13 @@ void partial_sort(bucket_list_t *bl,double **tab,int N)
|
||||
bucket_list_t bucket_list;
|
||||
int nb_buckets, nb_bits;
|
||||
|
||||
/* after these operations, nb_bucket is a power of 2 interger close to log2(N)*/
|
||||
if( N <= 0){
|
||||
if(verbose_level >= ERROR )
|
||||
fprintf(stderr,"Error: tryng to group a matrix of size %d<=0!\n",N);
|
||||
return;
|
||||
}
|
||||
|
||||
/* after these operations, nb_buckets is a power of 2 interger close to log2(N)*/
|
||||
|
||||
nb_buckets = (int)floor(CmiLog2(N));
|
||||
|
||||
@ -404,7 +415,7 @@ void next_bucket_elem(bucket_list_t bucket_list,int *i,int *j)
|
||||
}
|
||||
|
||||
|
||||
int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups)
|
||||
int add_edge_3(tm_tree_t *tab_node, tm_tree_t *parent,int i,int j,int *nb_groups)
|
||||
{
|
||||
/* printf("%d <-> %d ?\n",tab_node[i].id,tab_node[j].id); */
|
||||
if((!tab_node[i].parent) && (!tab_node[j].parent)){
|
||||
@ -453,7 +464,7 @@ int add_edge_3(tree_t *tab_node, tree_t *parent,int i,int j,int *nb_groups)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups)
|
||||
int try_add_edge(tm_tree_t *tab_node, tm_tree_t *parent,int arity,int i,int j,int *nb_groups)
|
||||
{
|
||||
assert( i != j );
|
||||
|
||||
@ -481,40 +492,40 @@ int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_
|
||||
}
|
||||
}
|
||||
|
||||
void FREE_bucket(bucket_t *bucket)
|
||||
void free_bucket(bucket_t *bucket)
|
||||
{
|
||||
FREE(bucket->bucket);
|
||||
FREE(bucket);
|
||||
}
|
||||
|
||||
void FREE_tab_bucket(bucket_t **bucket_tab,int N)
|
||||
void free_tab_bucket(bucket_t **bucket_tab,int N)
|
||||
{
|
||||
int i;
|
||||
for( i = 0 ; i < N ; i++ )
|
||||
FREE_bucket(bucket_tab[i]);
|
||||
free_bucket(bucket_tab[i]);
|
||||
FREE(bucket_tab);
|
||||
}
|
||||
|
||||
void FREE_bucket_list(bucket_list_t bucket_list)
|
||||
void free_bucket_list(bucket_list_t bucket_list)
|
||||
{
|
||||
/* Do not FREE the tab field it is used elsewhere */
|
||||
FREE_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets);
|
||||
/* Do not free the tab field it is used elsewhere */
|
||||
free_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets);
|
||||
FREE(bucket_list->pivot);
|
||||
FREE(bucket_list->pivot_tree);
|
||||
FREE(bucket_list);
|
||||
}
|
||||
|
||||
void partial_update_val (int nb_args, void **args){
|
||||
void partial_update_val (int nb_args, void **args, int thread_id){
|
||||
int inf = *(int*)args[0];
|
||||
int sup = *(int*)args[1];
|
||||
affinity_mat_t *aff_mat = (affinity_mat_t*)args[2];
|
||||
tree_t *new_tab_node = (tree_t*)args[3];
|
||||
tm_affinity_mat_t *aff_mat = (tm_affinity_mat_t*)args[2];
|
||||
tm_tree_t *new_tab_node = (tm_tree_t*)args[3];
|
||||
double *res=(double*)args[4];
|
||||
int l;
|
||||
|
||||
if(nb_args != 6){
|
||||
if(nb_args != 5){
|
||||
if(verbose_level >= ERROR)
|
||||
fprintf(stderr,"Wrong number of args in %s: %d\n",__func__, nb_args);
|
||||
fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __FUNCTION__, nb_args);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@ -524,7 +535,7 @@ void partial_update_val (int nb_args, void **args){
|
||||
}
|
||||
}
|
||||
|
||||
void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
|
||||
double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t *new_tab_node,
|
||||
int arity,int M)
|
||||
{
|
||||
bucket_list_t bucket_list;
|
||||
@ -536,10 +547,12 @@ void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_n
|
||||
int N = aff_mat->order;
|
||||
double **mat = aff_mat->mat;
|
||||
|
||||
verbose_level = get_verbose_level();
|
||||
verbose_level = tm_get_verbose_level();
|
||||
if(verbose_level >= INFO )
|
||||
printf("starting sort of N=%d elements\n",N);
|
||||
|
||||
|
||||
|
||||
TIC;
|
||||
partial_sort(&bucket_list,mat,N);
|
||||
duration = TOC;
|
||||
@ -662,8 +675,8 @@ void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_n
|
||||
printf("Bucket: %d, indice:%d\n",bucket_list->cur_bucket,bucket_list->bucket_indice);
|
||||
printf("val=%f\n",val);
|
||||
}
|
||||
FREE_bucket_list(bucket_list);
|
||||
free_bucket_list(bucket_list);
|
||||
|
||||
/* exit(-1); */
|
||||
/* display_grouping(new_tab_node,M,arity,val); */
|
||||
return val;
|
||||
}
|
||||
|
||||
|
@ -28,7 +28,8 @@ typedef struct{
|
||||
|
||||
typedef _bucket_list_t *bucket_list_t;
|
||||
|
||||
void bucket_grouping(affinity_mat_t *aff_mat,tree_t *tab_node, tree_t *new_tab_node,
|
||||
int arity,int M);
|
||||
int try_add_edge(tree_t *tab_node, tree_t *parent,int arity,int i,int j,int *nb_groups);
|
||||
double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t *new_tab_node,
|
||||
int arity,int M);
|
||||
int try_add_edge(tm_tree_t *tab_node, tm_tree_t *parent,int arity,int i,int j,int *nb_groups);
|
||||
#endif
|
||||
|
||||
|
@ -1,286 +0,0 @@
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include "tm_tree.h"
|
||||
#include "tm_mapping.h"
|
||||
#include <ctype.h>
|
||||
#include "tm_verbose.h"
|
||||
|
||||
|
||||
double ** tm_topology_to_arch(tm_topology_t *topology,double *cost);
|
||||
tm_topology_t * tgt_to_tm(char *filename,double **pcost);
|
||||
int topo_nb_proc(hwloc_topology_t topology,int N);
|
||||
double ** topology_to_arch(hwloc_topology_t topology);
|
||||
int symetric(hwloc_topology_t topology);
|
||||
tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
|
||||
tm_topology_t* get_local_topo_with_hwloc(void);
|
||||
|
||||
|
||||
|
||||
|
||||
/* transform a tgt scotch file into a topology file*/
|
||||
tm_topology_t * tgt_to_tm(char *filename, double **pcost)
|
||||
{
|
||||
tm_topology_t *topology = NULL;
|
||||
FILE *pf = NULL;
|
||||
char line[1024];
|
||||
char *s = NULL;
|
||||
double *cost = NULL;
|
||||
int i;
|
||||
|
||||
|
||||
|
||||
pf = fopen(filename,"r");
|
||||
if(!pf){
|
||||
if(get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Cannot open %s\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if(get_verbose_level() >= INFO)
|
||||
printf("Reading TGT file: %s\n",filename);
|
||||
|
||||
|
||||
fgets(line,1024,pf);
|
||||
|
||||
s = strstr(line,"tleaf");
|
||||
if(!s){
|
||||
if(get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
s += 5;
|
||||
while(isspace(*s))
|
||||
s++;
|
||||
|
||||
topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
topology->nb_levels = atoi(strtok(s," "))+1;
|
||||
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
|
||||
cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
|
||||
|
||||
for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
|
||||
topology->arity[i] = atoi(strtok(NULL," "));
|
||||
cost[i] = atoi(strtok(NULL," "));
|
||||
}
|
||||
|
||||
topology->arity[topology->nb_levels-1] = 0;
|
||||
/* cost[topology->nb_levels-1]=0; */
|
||||
|
||||
/*aggregate costs*/
|
||||
for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
|
||||
cost[i] += cost[i+1];
|
||||
|
||||
build_synthetic_proc_id(topology);
|
||||
|
||||
*pcost = cost;
|
||||
fclose(pf);
|
||||
/*
|
||||
topology->arity[0]=nb_proc;
|
||||
topology->nb_levels=decompose((int)ceil((1.0*nb_obj)/nb_proc),1,topology->arity);
|
||||
printf("levels=%d\n",topology->nb_levels);
|
||||
*/
|
||||
if(get_verbose_level() >= INFO)
|
||||
printf("Topology built from %s!\n",filename);
|
||||
|
||||
return topology;
|
||||
}
|
||||
|
||||
int topo_nb_proc(hwloc_topology_t topology,int N)
|
||||
{
|
||||
hwloc_obj_t *objs = NULL;
|
||||
int nb_proc;
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
|
||||
objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
|
||||
nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
|
||||
FREE(objs);
|
||||
return nb_proc;
|
||||
}
|
||||
|
||||
|
||||
double ** topology_to_arch(hwloc_topology_t topology)
|
||||
{
|
||||
int nb_proc,i,j;
|
||||
hwloc_obj_t obj_proc1,obj_proc2,obj_res;
|
||||
double **arch = NULL;
|
||||
|
||||
nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
|
||||
arch = (double**)MALLOC(sizeof(double*)*nb_proc);
|
||||
for( i = 0 ; i < nb_proc ; i++ ){
|
||||
obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
|
||||
arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
|
||||
for( j = 0 ; j < nb_proc ; j++ ){
|
||||
obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
|
||||
obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
|
||||
/* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
|
||||
arch[obj_proc1->os_index][obj_proc2->os_index]=speed(obj_res->depth+1);
|
||||
}
|
||||
}
|
||||
return arch;
|
||||
}
|
||||
|
||||
int symetric(hwloc_topology_t topology)
|
||||
{
|
||||
int depth,i,topodepth = hwloc_topology_get_depth(topology);
|
||||
unsigned int arity;
|
||||
hwloc_obj_t obj;
|
||||
for ( depth = 0; depth < topodepth-1 ; depth++ ) {
|
||||
int N = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
|
||||
arity = obj->arity;
|
||||
|
||||
/* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
|
||||
for (i = 1; i < N; i++ ){
|
||||
obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
|
||||
if( obj->arity != arity){
|
||||
/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
tm_topology_t* hwloc_to_tm(char *filename,double **pcost)
|
||||
{
|
||||
hwloc_topology_t topology;
|
||||
tm_topology_t *res = NULL;
|
||||
hwloc_obj_t *objs = NULL;
|
||||
unsigned topodepth,depth;
|
||||
int nb_nodes,i;
|
||||
double *cost;
|
||||
int err;
|
||||
|
||||
/* Build the topology */
|
||||
hwloc_topology_init(&topology);
|
||||
err = hwloc_topology_set_xml(topology,filename);
|
||||
if(err == -1){
|
||||
if(get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
#if HWLOC_API_VERSION < 0x20000
|
||||
hwloc_topology_ignore_all_keep_structure(topology);
|
||||
#else
|
||||
#warning FIXME hwloc v2
|
||||
#endif
|
||||
hwloc_topology_load(topology);
|
||||
|
||||
|
||||
/* Test if symetric */
|
||||
if(!symetric(topology)){
|
||||
if(get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"%s not symetric!\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* work on depth */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
|
||||
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
res->nb_levels = topodepth;
|
||||
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
|
||||
if(get_verbose_level() >= INFO)
|
||||
printf("topodepth = %d\n",topodepth);
|
||||
|
||||
/* Build TreeMatch topology */
|
||||
for( depth = 0 ; depth < topodepth ; depth++ ){
|
||||
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
res->nb_nodes[depth] = nb_nodes;
|
||||
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
|
||||
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
|
||||
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
|
||||
res->arity[depth] = objs[0]->arity;
|
||||
|
||||
if(get_verbose_level() >= INFO)
|
||||
printf("%d(%d):",res->arity[depth],nb_nodes);
|
||||
|
||||
/* Build process id tab */
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
res->node_id[depth][i] = objs[i]->os_index;
|
||||
/* if(depth==topodepth-1) */
|
||||
}
|
||||
FREE(objs);
|
||||
}
|
||||
|
||||
cost = (double*)CALLOC(res->nb_levels,sizeof(double));
|
||||
for(i=0; i<res->nb_levels; i++){
|
||||
cost[i] = speed(i);
|
||||
}
|
||||
|
||||
*pcost = cost;
|
||||
|
||||
|
||||
/* Destroy topology object. */
|
||||
hwloc_topology_destroy(topology);
|
||||
if(get_verbose_level() >= INFO)
|
||||
printf("\n");
|
||||
return res;
|
||||
}
|
||||
|
||||
tm_topology_t* get_local_topo_with_hwloc(void)
|
||||
{
|
||||
hwloc_topology_t topology;
|
||||
tm_topology_t *res = NULL;
|
||||
hwloc_obj_t *objs = NULL;
|
||||
unsigned topodepth,depth;
|
||||
int nb_nodes,i;
|
||||
|
||||
/* Build the topology */
|
||||
hwloc_topology_init(&topology);
|
||||
#if HWLOC_API_VERSION < 0x20000
|
||||
hwloc_topology_ignore_all_keep_structure(topology);
|
||||
#else
|
||||
#warning FIXME hwloc v2
|
||||
#endif
|
||||
hwloc_topology_load(topology);
|
||||
|
||||
/* Test if symetric */
|
||||
if(!symetric(topology)){
|
||||
if(get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Local toplogy not symetric!\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* work on depth */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
|
||||
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
res->nb_levels = topodepth;
|
||||
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
|
||||
/* Build TreeMatch topology */
|
||||
for( depth = 0 ; depth < topodepth ; depth++ ){
|
||||
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
res->nb_nodes[depth] = nb_nodes;
|
||||
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
|
||||
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
|
||||
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
|
||||
res->arity[depth] = objs[0]->arity;
|
||||
|
||||
/* printf("%d:",res->arity[depth]); */
|
||||
|
||||
/* Build process id tab */
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
res->node_id[depth][i] = objs[i]->os_index;
|
||||
/* if(depth==topodepth-1) */
|
||||
}
|
||||
FREE(objs);
|
||||
}
|
||||
|
||||
/* Destroy HWLOC topology object. */
|
||||
hwloc_topology_destroy(topology);
|
||||
|
||||
/* printf("\n"); */
|
||||
return res;
|
||||
}
|
||||
|
@ -1,7 +0,0 @@
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include "tm_tree.h"
|
||||
|
||||
void hwloc_topology_tag(hwloc_topology_t topology);
|
||||
tm_topology_t* hwloc_to_tm(char *filename,double **pcost);
|
||||
tm_topology_t * tgt_to_tm(char *filename,double **pcost);
|
||||
tm_topology_t* get_local_topo_with_hwloc(void);
|
@ -1,13 +1,12 @@
|
||||
#include "tm_mapping.h"
|
||||
#include "tm_mt.h"
|
||||
#include "tm_kpartitioning.h"
|
||||
#include "k-partitioning.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "config.h"
|
||||
|
||||
#define USE_KL_KPART 0
|
||||
#if USE_KL_KPART
|
||||
#include "k-partitioning.h"
|
||||
#endif /* USE_KL_KPART */
|
||||
#define KL_KPART_GREEDY_TRIALS 0
|
||||
|
||||
static int verbose_level = ERROR;
|
||||
@ -15,25 +14,23 @@ static int verbose_level = ERROR;
|
||||
#define MAX_TRIALS 10
|
||||
#define USE_KL_STRATEGY 1
|
||||
|
||||
#if !defined(MIN)
|
||||
|
||||
#define MIN(a,b) ((a)<(b)?(a):(b))
|
||||
#endif
|
||||
|
||||
|
||||
int fill_tab(int **,int *,int,int,int,int);
|
||||
void complete_com_mat(double ***,int,int);
|
||||
void complete_obj_weight(double **,int,int);
|
||||
|
||||
void allocate_vertex(int,int *,com_mat_t *,int,int *,int);
|
||||
double eval_cost(int *, com_mat_t *);
|
||||
int *kpartition_greedy(int, com_mat_t *,int,int *,int);
|
||||
constraint_t *split_constraints (int *,int,int,tm_topology_t *,int);
|
||||
constraint_t *split_constraints (int *,int,int,tm_topology_t *,int, int);
|
||||
com_mat_t **split_com_mat(com_mat_t *,int,int,int *);
|
||||
int **split_vertices(int *,int,int,int *);
|
||||
void FREE_tab_com_mat(com_mat_t **,int);
|
||||
void FREE_tab_local_vertices(int **,int);
|
||||
void FREE_const_tab(constraint_t *,int);
|
||||
void kpartition_build_level_topology(tree_t *,com_mat_t *,int,int,tm_topology_t *,
|
||||
void free_tab_com_mat(com_mat_t **,int);
|
||||
void free_tab_local_vertices(int **,int);
|
||||
void free_const_tab(constraint_t *,int);
|
||||
void kpartition_build_level_topology(tm_tree_t *,com_mat_t *,int,int,tm_topology_t *,
|
||||
int *,int *,int,double *,double *);
|
||||
|
||||
|
||||
@ -51,10 +48,14 @@ void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int
|
||||
best_part = res[i];
|
||||
break;
|
||||
}
|
||||
|
||||
}else{
|
||||
for( i = 0 ; i < n ; i++){
|
||||
if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
|
||||
cost = (((i)<com_mat->n)) ?com_mat->comm[u][i]:0;
|
||||
/* if((n<=16) && (u==8)){ */
|
||||
/* printf("u=%d, i=%d: %f\n",u, i, cost); */
|
||||
/* } */
|
||||
if (( cost > best_cost)){
|
||||
best_cost = cost;
|
||||
best_part = res[i];
|
||||
@ -62,8 +63,10 @@ void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int
|
||||
}
|
||||
}
|
||||
}
|
||||
/* printf("size[%d]: %d\n",best_part, size[best_part]);*/
|
||||
/* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
|
||||
/* if(n<=16){ */
|
||||
/* printf("size[%d]: %d\n",best_part, size[best_part]); */
|
||||
/* printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
|
||||
/* } */
|
||||
|
||||
res[u] = best_part;
|
||||
size[best_part]++;
|
||||
@ -84,25 +87,45 @@ double eval_cost(int *partition, com_mat_t *com_mat)
|
||||
|
||||
int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
|
||||
{
|
||||
int *res = NULL, *best_res=NULL, *size = NULL;
|
||||
int *partition = NULL, *best_partition=NULL, *size = NULL;
|
||||
int i,j,nb_trials;
|
||||
int max_size, max_val;
|
||||
double cost, best_cost = -1;
|
||||
int start, end;
|
||||
int dumb_id, nb_dumb;
|
||||
int vl = tm_get_verbose_level();
|
||||
|
||||
|
||||
if(nb_constraints > n){
|
||||
if(vl >= ERROR){
|
||||
fprintf(stderr,"Error more constraints (%d) than the problem size (%d)!\n",nb_constraints, n);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
max_size = n/k;
|
||||
|
||||
if(vl >= DEBUG){
|
||||
printf("max_size = %d (n=%d,k=%d)\ncom_mat->n-1=%d\n",max_size,n,k,com_mat->n-1);
|
||||
printf("nb_constraints = %d\n",nb_constraints);
|
||||
|
||||
if(n<=16){
|
||||
printf("Constraints: ");print_1D_tab(constraints,nb_constraints);
|
||||
}
|
||||
}
|
||||
/* if(com_mat->n){ */
|
||||
/* printf ("val [n-1][0]= %f\n",com_mat->comm[com_mat->n-1][0]); */
|
||||
/* } */
|
||||
|
||||
|
||||
for( nb_trials = 0 ; nb_trials < MAX_TRIALS ; nb_trials++ ){
|
||||
res = (int *)MALLOC(sizeof(int)*n);
|
||||
partition = (int *)MALLOC(sizeof(int)*n);
|
||||
for ( i = 0 ; i < n ; i ++ )
|
||||
res[i] = -1;
|
||||
partition[i] = -1;
|
||||
|
||||
size = (int *)CALLOC(k,sizeof(int));
|
||||
max_size = n/k;
|
||||
|
||||
/*printf("Constraints: ");print_1D_tab(constraints,nb_constraints);*/
|
||||
|
||||
|
||||
/* put "dumb" vertices in the correct partition if there are any*/
|
||||
if (nb_constraints){
|
||||
@ -121,12 +144,13 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
|
||||
number of leaves of the subtree (n/k) and the number of constraints
|
||||
*/
|
||||
nb_dumb = n/k - (end-start);
|
||||
/*printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k);*/
|
||||
|
||||
/* if(n<=16){ */
|
||||
/* printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k); */
|
||||
/* } */
|
||||
/* dumb vertices are the one with highest indices:
|
||||
put them in the ith partitions*/
|
||||
for( j = 0; j < nb_dumb; j ++ ){
|
||||
res[dumb_id] = i;
|
||||
partition[dumb_id] = i;
|
||||
dumb_id--;
|
||||
}
|
||||
/* increase the size of the ith partition accordingly*/
|
||||
@ -134,7 +158,10 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
|
||||
start=end;
|
||||
}
|
||||
}
|
||||
/*printf("After dumb vertices mapping: ");print_1D_tab(res,n);*/
|
||||
/* if(n<=16){ */
|
||||
/* printf("After dumb vertices mapping: ");print_1D_tab(partition,n); */
|
||||
/* } */
|
||||
|
||||
|
||||
/* choose k initial "true" vertices at random and put them in a different partition */
|
||||
for ( i = 0 ; i < k ; i ++ ){
|
||||
@ -145,35 +172,39 @@ int *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int
|
||||
do{
|
||||
/* call the mersenne twister PRNG of tm_mt.c*/
|
||||
j = genrand_int32() % n;
|
||||
} while ( res[j] != -1 );
|
||||
} while ( partition[j] != -1 );
|
||||
/* allocate and update size of partition*/
|
||||
res[j] = i;
|
||||
/* printf("random: %d -> %d\n",j,i); */
|
||||
partition[j] = i;
|
||||
/* if(n<=16){ */
|
||||
/* printf("random: %d -> %d\n",j,i); */
|
||||
/* } */
|
||||
size[i]++;
|
||||
}
|
||||
|
||||
/* allocate each unaloacted vertices in the partition that maximize the communication*/
|
||||
for( i = 0 ; i < n ; i ++)
|
||||
if( res[i] == -1)
|
||||
allocate_vertex(i, res, com_mat, n, size, max_size);
|
||||
if( partition[i] == -1)
|
||||
allocate_vertex(i, partition, com_mat, n, size, max_size);
|
||||
|
||||
cost = eval_cost(res,com_mat);
|
||||
/*print_1D_tab(res,n);
|
||||
printf("cost=%.2f\n",cost);*/
|
||||
cost = eval_cost(partition,com_mat);
|
||||
/* if(n<=16){ */
|
||||
/* print_1D_tab(partition,n); */
|
||||
/* printf("cost=%.2f\n",cost); */
|
||||
/* } */
|
||||
if((cost<best_cost) || (best_cost == -1)){
|
||||
best_cost=cost;
|
||||
FREE(best_res);
|
||||
best_res=res;
|
||||
FREE(best_partition);
|
||||
best_partition=partition;
|
||||
}else
|
||||
FREE(res);
|
||||
FREE(partition);
|
||||
|
||||
FREE(size);
|
||||
}
|
||||
|
||||
/*print_1D_tab(best_res,n);
|
||||
/*print_1D_tab(best_partition,n);
|
||||
printf("best_cost=%.2f\n",best_cost);
|
||||
*/
|
||||
return best_res;
|
||||
return best_partition;
|
||||
}
|
||||
|
||||
int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
|
||||
@ -189,16 +220,24 @@ int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_const
|
||||
/* if(USE_KL_KPART) */
|
||||
/* res = kPartitioning(comm, n, k, constraints, nb_constraints, KL_KPART_GREEDY_TRIALS); */
|
||||
/* else */
|
||||
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
|
||||
|
||||
|
||||
#if HAVE_LIBSCOTCH
|
||||
printf("Using Scotch\n");
|
||||
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
|
||||
#else
|
||||
printf("Using default\n");
|
||||
res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm_topology_t *topology, int depth)
|
||||
constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm_topology_t *topology, int depth, int N)
|
||||
{
|
||||
constraint_t *const_tab = NULL;
|
||||
int nb_leaves, start, end;
|
||||
int i;
|
||||
int vl = tm_get_verbose_level();
|
||||
|
||||
const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t));
|
||||
|
||||
@ -211,11 +250,27 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
|
||||
each sub-contraints 'i' contains constraints of value in [i*nb_leaves,(i+1)*nb_leaves[
|
||||
*/
|
||||
start = 0;
|
||||
|
||||
|
||||
|
||||
for( i = 0; i < k; i++ ){
|
||||
/*returns the indice in contsraints that contains the smallest value not copied
|
||||
/*returns the indice in constraints that contains the smallest value not copied
|
||||
end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/
|
||||
end = fill_tab(&(const_tab[i].constraints), constraints, nb_constraints,start, (i+1) * nb_leaves, i * nb_leaves);
|
||||
const_tab[i].length = end-start;
|
||||
if(vl>=DEBUG){
|
||||
printf("Step %d\n",i);
|
||||
printf("\tConstraint: "); print_1D_tab(constraints, nb_constraints);
|
||||
printf("\tSub constraint: "); print_1D_tab(const_tab[i].constraints, end-start);
|
||||
}
|
||||
|
||||
if(end-start > N/k){
|
||||
if(vl >= ERROR){
|
||||
fprintf(stderr, "Error in spliting constraint at step %d. N=%d k= %d, length = %d\n", i, N, k, end-start);
|
||||
}
|
||||
FREE(const_tab);
|
||||
return NULL;
|
||||
}
|
||||
const_tab[i].id = i;
|
||||
start = end;
|
||||
}
|
||||
@ -224,6 +279,7 @@ constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm
|
||||
}
|
||||
|
||||
|
||||
/* split the com_mat of order n in k partiton according to parmutition table*/
|
||||
com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
|
||||
{
|
||||
com_mat_t **res = NULL, *sub_com_mat;
|
||||
@ -237,6 +293,8 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
|
||||
if(verbose_level >= DEBUG){
|
||||
printf("Partition: "); print_1D_tab(partition,n);
|
||||
display_tab(com_mat->comm,com_mat->n);
|
||||
printf("m=%d,n=%d,k=%d\n",m,n,k);
|
||||
printf("perm=%p\n",perm);
|
||||
}
|
||||
|
||||
perm = (int*)MALLOC(sizeof(int)*m);
|
||||
@ -244,10 +302,22 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
|
||||
|
||||
/* build perm such that submat[i][j] correspond to com_mat[perm[i]][perm[j]] according to the partition*/
|
||||
s = 0;
|
||||
for( j = 0; j < com_mat->n; j ++) /* check only non zero element of of com_mat*/
|
||||
/* The partition is of size n. n can be larger than the communication matrix order
|
||||
as only the input problem are in the communication matrix while n is of the size
|
||||
of all the element (including the added one where it is possible to map computation) :
|
||||
we can have more compute units than processes*/
|
||||
for( j = 0; j < com_mat->n; j ++)
|
||||
if ( partition[j] == cur_part )
|
||||
perm[s++] = j;
|
||||
|
||||
if(s>m){
|
||||
if(verbose_level >= CRITICAL){
|
||||
fprintf(stderr,"Partition: "); print_1D_tab(partition,n);
|
||||
display_tab(com_mat->comm,com_mat->n);
|
||||
fprintf(stderr,"too many elements of the partition for the permuation (s=%d>%d=m). n=%d, k=%d, cur_part= %d\n",s,m,n,k, cur_part);
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
/* s is now the size of the non zero sub matrix for this partition*/
|
||||
/* built a sub-matrix for partition cur_part*/
|
||||
sub_mat = (double **) MALLOC(sizeof(double *) * s);
|
||||
@ -264,7 +334,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
|
||||
}
|
||||
}
|
||||
|
||||
sub_com_mat = (com_mat_t *)malloc(sizeof(com_mat_t));
|
||||
sub_com_mat = (com_mat_t *)MALLOC(sizeof(com_mat_t));
|
||||
sub_com_mat -> n = s;
|
||||
sub_com_mat -> comm = sub_mat;
|
||||
|
||||
@ -275,7 +345,7 @@ com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
|
||||
res[cur_part] = sub_com_mat;
|
||||
}
|
||||
|
||||
FREE(perm);
|
||||
FREE(perm);
|
||||
|
||||
return res;
|
||||
}
|
||||
@ -311,7 +381,7 @@ int **split_vertices( int *vertices, int n, int k, int *partition)
|
||||
return res;
|
||||
}
|
||||
|
||||
void FREE_tab_com_mat(com_mat_t **mat,int k)
|
||||
void free_tab_com_mat(com_mat_t **mat,int k)
|
||||
{
|
||||
int i,j;
|
||||
if( !mat )
|
||||
@ -321,11 +391,13 @@ void FREE_tab_com_mat(com_mat_t **mat,int k)
|
||||
for ( j = 0 ; j < mat[i]->n ; j ++)
|
||||
FREE( mat[i]->comm[j] );
|
||||
FREE( mat[i]->comm );
|
||||
FREE(mat[i]);
|
||||
|
||||
}
|
||||
FREE(mat);
|
||||
}
|
||||
|
||||
void FREE_tab_local_vertices(int **mat, int k)
|
||||
void free_tab_local_vertices(int **mat, int k)
|
||||
{
|
||||
int i; /* m=n/k; */
|
||||
if( !mat )
|
||||
@ -338,7 +410,7 @@ void FREE_tab_local_vertices(int **mat, int k)
|
||||
}
|
||||
|
||||
|
||||
void FREE_const_tab(constraint_t *const_tab, int k)
|
||||
void free_const_tab(constraint_t *const_tab, int k)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -353,19 +425,33 @@ void FREE_const_tab(constraint_t *const_tab, int k)
|
||||
FREE(const_tab);
|
||||
}
|
||||
|
||||
void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
|
||||
|
||||
void check_com_mat(com_mat_t *com_mat){
|
||||
int i,j;
|
||||
|
||||
for( i = 0 ; i < com_mat->n ; i++ )
|
||||
for( j = 0 ; j < com_mat->n ; j++ )
|
||||
if(com_mat->comm[i][j]<0){
|
||||
printf("com_mat->comm[%d][%d]= %f\n",i,j,com_mat->comm[i][j]);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
|
||||
tm_topology_t *topology, int *local_vertices,
|
||||
int *constraints, int nb_constraints,
|
||||
double *obj_weight, double *comm_speed)
|
||||
{
|
||||
com_mat_t **tab_com_mat = NULL; /* table of comunication matrix. We will have k of such comunication matrix, one for each subtree */
|
||||
int k = topology->arity[depth];
|
||||
tree_t **tab_child = NULL;
|
||||
tm_tree_t **tab_child = NULL;
|
||||
int *partition = NULL;
|
||||
int **tab_local_vertices = NULL;
|
||||
constraint_t *const_tab = NULL;
|
||||
int i;
|
||||
verbose_level = get_verbose_level();
|
||||
verbose_level = tm_get_verbose_level();
|
||||
|
||||
/* if we are at the bottom of the tree set cur_node
|
||||
and return*/
|
||||
@ -377,8 +463,14 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N
|
||||
}
|
||||
|
||||
|
||||
if(verbose_level >= DEBUG){
|
||||
printf("Partitionning Matrix of size %d (problem size= %d) in %d partitions\n", com_mat->n, N, k);
|
||||
}
|
||||
|
||||
/* check_com_mat(com_mat); */
|
||||
|
||||
/* partition the com_matrix in k partitions*/
|
||||
partition = kpartition(topology->arity[depth], com_mat, N, constraints, nb_constraints);
|
||||
partition = kpartition(k, com_mat, N, constraints, nb_constraints);
|
||||
|
||||
/* split the communication matrix in k parts according to the partition just found above */
|
||||
tab_com_mat = split_com_mat( com_mat, N, k, partition);
|
||||
@ -387,12 +479,12 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N
|
||||
tab_local_vertices = split_vertices( local_vertices, N, k, partition);
|
||||
|
||||
/* construct a tab of constraints of size k: one for each partitions*/
|
||||
const_tab = split_constraints (constraints, nb_constraints, k, topology, depth);
|
||||
const_tab = split_constraints (constraints, nb_constraints, k, topology, depth, N);
|
||||
|
||||
/* create the table of k nodes of the resulting sub-tree */
|
||||
tab_child = (tree_t **) CALLOC (k,sizeof(tree_t*));
|
||||
tab_child = (tm_tree_t **) CALLOC (k,sizeof(tm_tree_t*));
|
||||
for( i = 0 ; i < k ; i++){
|
||||
tab_child[i] = (tree_t *) MALLOC(sizeof(tree_t));
|
||||
tab_child[i] = (tm_tree_t *) MALLOC(sizeof(tm_tree_t));
|
||||
}
|
||||
|
||||
/* for each child, proceeed recursively*/
|
||||
@ -408,28 +500,30 @@ void kpartition_build_level_topology(tree_t *cur_node, com_mat_t *com_mat, int N
|
||||
/* link the node with its child */
|
||||
set_node( cur_node, tab_child, k, NULL, cur_node->id, 0, NULL, depth);
|
||||
|
||||
/* FREE local data*/
|
||||
/* free local data*/
|
||||
FREE(partition);
|
||||
FREE_tab_com_mat(tab_com_mat,k);
|
||||
FREE_tab_local_vertices(tab_local_vertices,k);
|
||||
FREE_const_tab(const_tab,k);
|
||||
free_tab_com_mat(tab_com_mat,k);
|
||||
free_tab_local_vertices(tab_local_vertices,k);
|
||||
free_const_tab(const_tab,k);
|
||||
}
|
||||
|
||||
|
||||
tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed)
|
||||
tm_tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed)
|
||||
{
|
||||
int depth,i, K;
|
||||
tree_t *root = NULL;
|
||||
tm_tree_t *root = NULL;
|
||||
int *local_vertices = NULL;
|
||||
int nb_cores;
|
||||
com_mat_t com_mat;
|
||||
|
||||
verbose_level = get_verbose_level();
|
||||
verbose_level = tm_get_verbose_level();
|
||||
|
||||
|
||||
nb_cores=nb_processing_units(topology)*topology->oversub_fact;
|
||||
|
||||
|
||||
if(verbose_level>=INFO)
|
||||
printf("Number of constraints: %d, N=%d\n", nb_constraints, N);
|
||||
|
||||
nb_cores=nb_processing_units(topology);
|
||||
printf("Number of constraints: %d, N=%d, nb_cores = %d, K=%d\n", nb_constraints, N, nb_cores, nb_cores-N);
|
||||
|
||||
if((constraints == NULL) && (nb_constraints != 0)){
|
||||
if(verbose_level>=ERROR)
|
||||
@ -449,7 +543,6 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
|
||||
if((K=nb_cores - N)>0){
|
||||
/* add K element to the object weight*/
|
||||
complete_obj_weight(&obj_weight,N,K);
|
||||
/* display_tab(tab,N+K);*/
|
||||
} else if( K < 0){
|
||||
if(verbose_level>=ERROR)
|
||||
fprintf(stderr,"Not enough cores!\n");
|
||||
@ -463,7 +556,7 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
|
||||
local_vertices is the array of vertices that can be used
|
||||
the min(N,nb_contraints) 1st element are number from 0 to N
|
||||
the last ones have value -1
|
||||
the value of this array will be used to number the leaves of the tree_t tree
|
||||
the value of this array will be used to number the leaves of the tm_tree_t tree
|
||||
that start at "root"
|
||||
|
||||
min(N,nb_contraints) is used to takle the case where thre is less processes than constraints
|
||||
@ -479,18 +572,20 @@ tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **com
|
||||
|
||||
/* we assume all objects have the same arity*/
|
||||
/* assign the root of the tree*/
|
||||
root = (tree_t*) MALLOC (sizeof(tree_t));
|
||||
root->id = 0;
|
||||
root = (tm_tree_t*) MALLOC (sizeof(tm_tree_t));
|
||||
root -> id = 0;
|
||||
|
||||
|
||||
/*build the tree downward from the root*/
|
||||
kpartition_build_level_topology(root, &com_mat, N+K, depth, topology, local_vertices,
|
||||
constraints, nb_constraints, obj_weight, com_speed);
|
||||
constraints, nb_constraints, obj_weight, com_speed);
|
||||
|
||||
/*print_1D_tab(local_vertices,K+N);*/
|
||||
if(verbose_level>=INFO)
|
||||
printf("Build (bottom-up) tree done!\n");
|
||||
|
||||
|
||||
|
||||
FREE(local_vertices);
|
||||
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
typedef struct _com_mat_t{
|
||||
double **comm;
|
||||
double **comm;
|
||||
int n; /*comm is of size n by n the other element are zeroes*/
|
||||
|
||||
|
||||
} com_mat_t;
|
||||
|
||||
|
||||
int *kpartition(int, com_mat_t*, int, int *, int);
|
||||
tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);
|
||||
tm_tree_t * kpartition_build_tree_from_topology(tm_topology_t *topology,double **com_mat,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed);
|
||||
|
@ -1,35 +1,60 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <stdint.h>
|
||||
#include "uthash.h"
|
||||
#include <stdio.h>
|
||||
#include "tm_verbose.h"
|
||||
#include "tm_malloc.h"
|
||||
#include "opal/util/alfg.h"
|
||||
#include "tm_tree.h"
|
||||
#include "tm_mt.h"
|
||||
|
||||
|
||||
#define MIN(a,b) ((a)<(b)?(a):(b))
|
||||
|
||||
#define EXTRA_BYTE 100
|
||||
|
||||
typedef signed char byte;
|
||||
typedef uint8_t byte;
|
||||
|
||||
|
||||
/* static int verbose_level = ERROR;*/
|
||||
|
||||
typedef struct _hash_t {
|
||||
void *key; /* we'll use this field as the key */
|
||||
size_t size;
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
void *key; /* we'll use this field as the key */
|
||||
size_t size;
|
||||
char *file;
|
||||
int line;
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
}hash_t;
|
||||
|
||||
static hash_t *size_hash = NULL;
|
||||
static char extra_data[EXTRA_BYTE];
|
||||
|
||||
static void save_size(void *ptr, size_t size);
|
||||
static void save_ptr(void *ptr, size_t size, char *file, int line);
|
||||
static size_t retreive_size(void *someaddr);
|
||||
static void init_extra_data(void);
|
||||
|
||||
void save_size(void *ptr, size_t size) {
|
||||
|
||||
|
||||
char *my_strdup(char* string){
|
||||
int size = 1+strlen(string);
|
||||
char *res = (char*)malloc(size*sizeof(char));
|
||||
|
||||
if(res)
|
||||
memcpy(res, string, size*sizeof(char));
|
||||
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
void save_ptr(void *ptr, size_t size, char *file, int line) {
|
||||
hash_t *elem;
|
||||
elem = (hash_t*) malloc(sizeof(hash_t));
|
||||
elem -> key = ptr;
|
||||
elem -> key = ptr;
|
||||
elem -> size = size;
|
||||
if(get_verbose_level() >= DEBUG)
|
||||
elem -> line = line;
|
||||
elem -> file = my_strdup(file);
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("Storing (%p,%ld)\n",ptr,size);
|
||||
HASH_ADD_PTR( size_hash, key, elem );
|
||||
}
|
||||
@ -40,72 +65,76 @@ size_t retreive_size(void *someaddr){
|
||||
hash_t *elem = NULL;
|
||||
HASH_FIND_PTR(size_hash, &someaddr, elem);
|
||||
if(!elem){
|
||||
fprintf(stderr,"cannot find ptr %p to free!\n",someaddr);
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Cannot find ptr %p to free!\n",someaddr);
|
||||
abort();
|
||||
return 0;
|
||||
}
|
||||
|
||||
res = elem->size;
|
||||
if(get_verbose_level()>=DEBUG)
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("Retreiving (%p,%ld)\n",someaddr, res);
|
||||
|
||||
free(elem->file);
|
||||
HASH_DEL( size_hash, elem);
|
||||
return res;
|
||||
}
|
||||
|
||||
void my_mem_check(void){
|
||||
void tm_mem_check(void){
|
||||
#ifdef __DEBUG_TM_MALLOC__
|
||||
hash_t *s;
|
||||
int nb_errors = 0;
|
||||
for(s=size_hash; s != NULL; s=s->hh.next) {
|
||||
if(get_verbose_level() >= ERROR) {
|
||||
printf("pointer %p of size %ld has not been freed!\n", s->key, s->size);
|
||||
}
|
||||
nb_errors ++;
|
||||
if(tm_get_verbose_level()>=ERROR)
|
||||
printf("pointer %p of size %ld (%s: %d) has not been freed!\n", s->key, s->size, s->file, s->line);
|
||||
nb_errors ++;
|
||||
}
|
||||
|
||||
if(get_verbose_level() >= INFO)
|
||||
if(tm_get_verbose_level() >= INFO)
|
||||
printf ("Number of errors in managing memory: %d\n",nb_errors);
|
||||
#endif
|
||||
}
|
||||
|
||||
void init_extra_data(void){
|
||||
static int done = 0;
|
||||
opal_rng_buff_t rng;
|
||||
int i;
|
||||
|
||||
if(done)
|
||||
return;
|
||||
|
||||
opal_srand(&rng,0);
|
||||
init_genrand(0);
|
||||
|
||||
for( i = 0 ; i < EXTRA_BYTE; i++)
|
||||
extra_data[i] = (char) opal_rand(&rng) % 256;
|
||||
extra_data[i] = (char) genrand_int32() % 256;
|
||||
|
||||
done = 1;
|
||||
}
|
||||
|
||||
|
||||
void *my_malloc(size_t size, char *file, int line){
|
||||
void *tm_malloc(size_t size, char *file, int line){
|
||||
byte *ptr;
|
||||
init_extra_data();
|
||||
|
||||
size+=2*EXTRA_BYTE;
|
||||
ptr = malloc(size);
|
||||
|
||||
if(get_verbose_level()>=DEBUG)
|
||||
printf("my_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,(void*)ptr,file,line);
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_malloc of size %ld: %p (%s: %d)\n",size-2*EXTRA_BYTE,ptr,file,line);
|
||||
|
||||
save_size(ptr,size);
|
||||
save_ptr(ptr, size, file, line);
|
||||
|
||||
memcpy(ptr, extra_data, EXTRA_BYTE);
|
||||
memcpy(ptr + size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
|
||||
|
||||
|
||||
if(get_verbose_level()>=DEBUG)
|
||||
printf("my_malloc returning: %p\n",(void*)(ptr+EXTRA_BYTE));
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_malloc returning: %p\n",ptr+EXTRA_BYTE);
|
||||
|
||||
return (void *)(ptr + EXTRA_BYTE);
|
||||
}
|
||||
|
||||
void *my_calloc(size_t count, size_t size, char *file, int line){
|
||||
|
||||
void *tm_calloc(size_t count, size_t size, char *file, int line){
|
||||
byte *ptr;
|
||||
size_t full_size;
|
||||
|
||||
@ -115,22 +144,72 @@ void *my_calloc(size_t count, size_t size, char *file, int line){
|
||||
|
||||
ptr = malloc(full_size);
|
||||
bzero(ptr,full_size);
|
||||
save_size(ptr, full_size);
|
||||
save_ptr(ptr, full_size, file, line);
|
||||
|
||||
if(get_verbose_level()>=DEBUG)
|
||||
printf("my_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,(void*)ptr, file, line);
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_calloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line);
|
||||
|
||||
|
||||
memcpy(ptr, extra_data, EXTRA_BYTE);
|
||||
memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
|
||||
|
||||
if(get_verbose_level()>=DEBUG)
|
||||
printf("my_calloc returning: %p\n",(void*)(ptr+EXTRA_BYTE));
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_calloc returning: %p\n",ptr+EXTRA_BYTE);
|
||||
|
||||
return (void *)(ptr+EXTRA_BYTE);
|
||||
}
|
||||
|
||||
void my_free(void *ptr){
|
||||
|
||||
void *tm_realloc(void *old_ptr, size_t size, char *file, int line){
|
||||
byte *ptr;
|
||||
size_t full_size;
|
||||
|
||||
init_extra_data();
|
||||
|
||||
full_size = size + 2 * EXTRA_BYTE;
|
||||
|
||||
ptr = malloc(full_size);
|
||||
save_ptr(ptr, full_size, file, line);
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_realloc of size %ld: %p (%s: %d)\n",full_size-2*EXTRA_BYTE,ptr, file, line);
|
||||
|
||||
|
||||
memcpy(ptr, extra_data, EXTRA_BYTE);
|
||||
memcpy(ptr + full_size - EXTRA_BYTE, extra_data, EXTRA_BYTE);
|
||||
|
||||
if(old_ptr){
|
||||
byte *original_ptr = ((byte *)old_ptr) - EXTRA_BYTE;
|
||||
size_t old_ptr_size = retreive_size(original_ptr);
|
||||
|
||||
memcpy(ptr + EXTRA_BYTE, old_ptr, MIN(old_ptr_size - 2 * EXTRA_BYTE, size));
|
||||
|
||||
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Realloc: cannot find special string ***before*** %p!\n", original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if((bcmp(original_ptr + old_ptr_size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Realloc: cannot find special string ***after*** %p!\n", original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_free freeing: %p\n",original_ptr);
|
||||
|
||||
|
||||
free(original_ptr);
|
||||
}
|
||||
|
||||
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_realloc returning: %p (----- %p)\n",ptr+EXTRA_BYTE, ((byte *)ptr) - EXTRA_BYTE);
|
||||
|
||||
|
||||
return (void *)(ptr+EXTRA_BYTE);
|
||||
}
|
||||
|
||||
void tm_free(void *ptr){
|
||||
byte *original_ptr = ((byte *)ptr) - EXTRA_BYTE;
|
||||
size_t size;
|
||||
|
||||
@ -139,18 +218,18 @@ void my_free(void *ptr){
|
||||
|
||||
size = retreive_size(original_ptr);
|
||||
|
||||
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"cannot find special string ***before*** %p!\n",ptr);
|
||||
if((bcmp(original_ptr ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Free: cannot find special string ***before*** %p!\n", original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"cannot find special string ***after*** %p!\n",ptr);
|
||||
if((bcmp(original_ptr + size -EXTRA_BYTE ,extra_data, EXTRA_BYTE)) && ((tm_get_verbose_level()>=ERROR))){
|
||||
fprintf(stderr,"Free: cannot find special string ***after*** %p!\n", original_ptr);
|
||||
fprintf(stderr,"memory is probably corrupted here!\n");
|
||||
}
|
||||
|
||||
if(get_verbose_level()>=DEBUG)
|
||||
printf("my_free freeing: %p\n",(void*)original_ptr);
|
||||
if(tm_get_verbose_level()>=DEBUG)
|
||||
printf("tm_free freeing: %p\n",original_ptr);
|
||||
|
||||
|
||||
free(original_ptr);
|
||||
|
@ -1,5 +1,29 @@
|
||||
#ifndef _TM_MALLOC_H_
|
||||
#define _TM_MALLOC_H_
|
||||
|
||||
#include <stdlib.h>
|
||||
void *my_malloc(size_t size, char *, int);
|
||||
void *my_calloc(size_t count, size_t size, char *, int);
|
||||
void my_free(void *ptr);
|
||||
void my_mem_check(void);
|
||||
void *tm_malloc(size_t size, char *, int);
|
||||
void *tm_calloc(size_t count, size_t size, char *, int);
|
||||
void *tm_realloc(void *ptr, size_t size, char *, int);
|
||||
void tm_free(void *ptr);
|
||||
void tm_mem_check(void);
|
||||
|
||||
/* for debugging malloc */
|
||||
/* #define __DEBUG_TM_MALLOC__ */
|
||||
#undef __DEBUG_TM_MALLOC__
|
||||
#ifdef __DEBUG_TM_MALLOC__
|
||||
#define MALLOC(x) tm_malloc(x,__FILE__,__LINE__)
|
||||
#define CALLOC(x,y) tm_calloc(x,y,__FILE__,__LINE__)
|
||||
#define REALLOC(x,y) tm_realloc(x,y,__FILE__,__LINE__)
|
||||
#define FREE tm_free
|
||||
#define MEM_CHECK tm_mem_check
|
||||
#else
|
||||
#define MALLOC malloc
|
||||
#define CALLOC calloc
|
||||
#define FREE free
|
||||
#define REALLOC realloc
|
||||
#define MEM_CHECK tm_mem_check
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,43 +1,34 @@
|
||||
#ifndef __TM_MAPPING_H__
|
||||
#define __TM_MAPPING_H__
|
||||
#include "tm_tree.h"
|
||||
#include "tm_hwloc.h"
|
||||
#include "tm_topology.h"
|
||||
#include "tm_timings.h"
|
||||
#include "tm_verbose.h"
|
||||
|
||||
int build_comm(char *filename,double ***pcomm);
|
||||
void TreeMatchMapping(int nb_obj, int nb_proc,double **comm_mat, double * obj_weigth, double *com_speed, int d, int *sol);
|
||||
|
||||
/*Map topology to cores:
|
||||
sigma_i is such that process i is mapped on core sigma_i
|
||||
k_i is such that core i exectutes process k_i
|
||||
|
||||
size of sigma is the number of process (nb_objs)
|
||||
size of k is the number of cores/nodes (nb_proc)
|
||||
|
||||
We must have numbe of process<=number of cores
|
||||
|
||||
k_i =-1 if no process is mapped on core i
|
||||
*/
|
||||
void map_topology_simple(tm_topology_t *topology,tree_t *comm_tree, int *sigma, int nb_processes, int *k);
|
||||
|
||||
int nb_processing_units(tm_topology_t *topology);
|
||||
void free_topology(tm_topology_t *topology);
|
||||
void display_other_heuristics(tm_topology_t *topology,int N,double **comm,int TGT_flag, int *constraints, double *cost);
|
||||
void print_1D_tab(int *tab,int N);
|
||||
tm_affinity_mat_t * new_affinity_mat(double **mat, double *sum_row, int order);
|
||||
void build_synthetic_proc_id(tm_topology_t *topology);
|
||||
void display_topology(tm_topology_t *topology);
|
||||
tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_node);
|
||||
tm_topology_t *optimize_topology(tm_topology_t *topology);
|
||||
double print_sol_inv(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
|
||||
double print_sol(int N,int *Value,double **comm, double *cost, tm_topology_t *topology);
|
||||
int build_binding_constraints(char *filename, int **ptab);
|
||||
void canonize_constraints(tm_topology_t *topology, int *constraints, int **canonical, int n, int **perm, int *m);
|
||||
tm_topology_t *build_synthetic_topology(int *arity, int nb_levels, int *core_numbering, int nb_core_per_nodes);
|
||||
int compute_nb_leaves_from_level(int depth,tm_topology_t *topology);
|
||||
void FREE_topology(tm_topology_t *);
|
||||
|
||||
void depth_first(tm_tree_t *comm_tree, int *proc_list,int *i);
|
||||
int fill_tab(int **new_tab,int *tab, int n, int start, int max_val, int shift);
|
||||
void init_mat(char *filename,int N, double **mat, double *sum_row);
|
||||
void map_topology(tm_topology_t *topology,tm_tree_t *comm_tree, int level,
|
||||
int *sigma, int nb_processes, int **k, int nb_compute_units);
|
||||
int nb_leaves(tm_tree_t *comm_tree);
|
||||
int nb_lines(char *filename);
|
||||
int nb_processing_units(tm_topology_t *topology);
|
||||
void print_1D_tab(int *tab,int N);
|
||||
tm_solution_t * tm_compute_mapping(tm_topology_t *topology,tm_tree_t *comm_tree);
|
||||
void tm_finalize();
|
||||
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
|
||||
tm_affinity_mat_t *tm_load_aff_mat(char *filename);
|
||||
void update_comm_speed(double **comm_speed,int old_size,int new_size);
|
||||
|
||||
/* use to split a constaint into subconstraint according the tree*/
|
||||
typedef struct _constraint{
|
||||
typedef struct{
|
||||
int *constraints; /* the subconstraints*/
|
||||
int length; /*length of *constraints*/
|
||||
int id; /* id of the corresponding subtree*/
|
||||
}constraint_t;
|
||||
|
||||
#endif
|
||||
|
@ -2,8 +2,7 @@ void init_genrand(unsigned long s);
|
||||
void init_by_array(unsigned long init_key[], int key_length);
|
||||
|
||||
/* generates a random number on the interval [0,0x7fffffff] */
|
||||
unsigned long genrand_int32(void);
|
||||
|
||||
unsigned long genrand_int32(void);
|
||||
long genrand_int31(void);
|
||||
double genrand_real1(void);
|
||||
double genrand_real2(void);
|
||||
|
525
ompi/mca/topo/treematch/treematch/tm_solution.c
Обычный файл
525
ompi/mca/topo/treematch/treematch/tm_solution.c
Обычный файл
@ -0,0 +1,525 @@
|
||||
#include <ctype.h>
|
||||
#include <float.h>
|
||||
#include "tm_solution.h"
|
||||
#include "tm_mt.h"
|
||||
#include "tm_mapping.h"
|
||||
|
||||
typedef struct {
|
||||
int val;
|
||||
long key;
|
||||
} hash_t;
|
||||
|
||||
|
||||
|
||||
void tm_free_solution(tm_solution_t *sol);
|
||||
int distance(tm_topology_t *topology,int i, int j);
|
||||
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma);
|
||||
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric);
|
||||
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
|
||||
tm_metric_t metric);
|
||||
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
|
||||
int in_tab(int *tab, int n, int val);
|
||||
void map_Packed(tm_topology_t *topology, int N, int *sigma);
|
||||
void map_RR(tm_topology_t * topology, int N, int *sigma);
|
||||
int hash_asc(const void* x1,const void* x2);
|
||||
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed);
|
||||
double eval_sol(int *sol,int N,double **comm, double **arch);
|
||||
void exchange(int *sol,int i,int j);
|
||||
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);
|
||||
void select_max(int *l,int *m,double **gain,int N,int *state);
|
||||
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch);
|
||||
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch);
|
||||
|
||||
|
||||
void tm_free_solution(tm_solution_t *sol){
|
||||
int i,n;
|
||||
|
||||
n = sol->k_length;
|
||||
|
||||
if(sol->k)
|
||||
for(i=0 ; i<n ; i++)
|
||||
FREE(sol->k[i]);
|
||||
|
||||
FREE(sol->k);
|
||||
FREE(sol->sigma);
|
||||
FREE(sol);
|
||||
}
|
||||
|
||||
/*
|
||||
Compute the distance in the tree
|
||||
between node i and j : the farther away node i and j, the
|
||||
larger the returned value.
|
||||
|
||||
The algorithm looks at the largest level, starting from the top,
|
||||
for which node i and j are still in the same subtree. This is done
|
||||
by iteratively dividing their numbering by the arity of the levels
|
||||
*/
|
||||
int distance(tm_topology_t *topology,int i, int j)
|
||||
{
|
||||
int level = 0;
|
||||
int arity;
|
||||
int f_i, f_j ;
|
||||
int vl = tm_get_verbose_level();
|
||||
int depth = topology->nb_levels-1;
|
||||
|
||||
f_i = topology->node_rank[depth][i];
|
||||
f_j = topology->node_rank[depth][j];
|
||||
|
||||
if(vl >= DEBUG)
|
||||
printf("i=%d, j=%d Level = %d f=(%d,%d)\n",i ,j, level, f_i, f_j);
|
||||
|
||||
|
||||
do{
|
||||
level++;
|
||||
arity = topology->arity[level];
|
||||
if( arity == 0 )
|
||||
arity = 1;
|
||||
f_i = f_i/arity;
|
||||
f_j = f_j/arity;
|
||||
} while((f_i!=f_j) && (level < depth));
|
||||
|
||||
if(vl >= DEBUG)
|
||||
printf("distance(%d,%d):%d\n",topology->node_rank[depth][i], topology->node_rank[depth][j], level);
|
||||
/* exit(-1); */
|
||||
return level;
|
||||
}
|
||||
|
||||
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
|
||||
{
|
||||
double a,c,sol;
|
||||
int i,j;
|
||||
double *cost = topology->cost;
|
||||
double **mat = aff_mat->mat;
|
||||
int N = aff_mat->order;
|
||||
int depth = topology->nb_levels - 1;
|
||||
|
||||
|
||||
sol = 0;
|
||||
for ( i = 0 ; i < N ; i++ )
|
||||
for ( j = i+1 ; j < N ; j++){
|
||||
c = mat[i][j];
|
||||
/*
|
||||
Compute cost in funvtion of the inverse of the distance
|
||||
This is due to the fact that the cost matrix is numbered
|
||||
from top to bottom : cost[0] is the cost of the longest distance.
|
||||
*/
|
||||
a = cost[depth-distance(topology,sigma[i],sigma[j])];
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
|
||||
sol += c*a;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
printf("%d", sigma[i]);
|
||||
if(i<N-1)
|
||||
printf(",");
|
||||
}
|
||||
printf(" : %g\n",sol);
|
||||
|
||||
return sol;
|
||||
}
|
||||
|
||||
|
||||
double display_sol_max_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
|
||||
{
|
||||
double a,c,sol;
|
||||
int i,j;
|
||||
double *cost = topology->cost;
|
||||
double **mat = aff_mat->mat;
|
||||
int N = aff_mat->order;
|
||||
int vl = tm_get_verbose_level();
|
||||
int depth = topology->nb_levels - 1;
|
||||
|
||||
sol = 0;
|
||||
for ( i = 0 ; i < N ; i++ )
|
||||
for ( j = i+1 ; j < N ; j++){
|
||||
c = mat[i][j];
|
||||
/*
|
||||
Compute cost in funvtion of the inverse of the distance
|
||||
This is due to the fact that the cost matrix is numbered
|
||||
from top to bottom : cost[0] is the cost of the longest distance.
|
||||
*/
|
||||
a = cost[depth-distance(topology,sigma[i],sigma[j])];
|
||||
if(vl >= DEBUG)
|
||||
printf("T_%d_%d %f*%f=%f\n",i,j,c,a,c*a);
|
||||
if(c*a > sol)
|
||||
sol = c*a;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
printf("%d", sigma[i]);
|
||||
if(i<N-1)
|
||||
printf(",");
|
||||
}
|
||||
printf(" : %g\n",sol);
|
||||
|
||||
return sol;
|
||||
}
|
||||
|
||||
double display_sol_hop_byte(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma)
|
||||
{
|
||||
double c,sol;
|
||||
int nb_hops;
|
||||
int i,j;
|
||||
double **mat = aff_mat->mat;
|
||||
int N = aff_mat->order;
|
||||
|
||||
sol = 0;
|
||||
for ( i = 0 ; i < N ; i++ )
|
||||
for ( j = i+1 ; j < N ; j++){
|
||||
c = mat[i][j];
|
||||
nb_hops = 2*distance(topology,sigma[i],sigma[j]);
|
||||
if(tm_get_verbose_level() >= DEBUG)
|
||||
printf("T_%d_%d %f*%d=%f\n",i,j,c,nb_hops,c*nb_hops);
|
||||
sol += c*nb_hops;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
printf("%d", sigma[i]);
|
||||
if(i<N-1)
|
||||
printf(",");
|
||||
}
|
||||
printf(" : %g\n",sol);
|
||||
|
||||
return sol;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric){
|
||||
switch (metric){
|
||||
case TM_METRIC_SUM_COM:
|
||||
return display_sol_sum_com(topology, aff_mat, sigma);
|
||||
case TM_METRIC_MAX_COM:
|
||||
return display_sol_max_com(topology, aff_mat, sigma);
|
||||
case TM_METRIC_HOP_BYTE:
|
||||
return display_sol_hop_byte(topology, aff_mat, sigma);
|
||||
default:
|
||||
if(tm_get_verbose_level() >= ERROR){
|
||||
fprintf(stderr,"Error printing solution: metric %d not implemented\n",metric);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
|
||||
tm_metric_t metric){
|
||||
|
||||
int i,j;
|
||||
int **k = sol->k;
|
||||
|
||||
|
||||
if(tm_get_verbose_level() >= DEBUG){
|
||||
printf("k: \n");
|
||||
for( i = 0 ; i < nb_processing_units(topology) ; i++ ){
|
||||
if(k[i][0] != -1){
|
||||
printf("\tProcessing unit %d: ",i);
|
||||
for (j = 0 ; j<topology->oversub_fact; j++){
|
||||
if( k[i][j] == -1)
|
||||
break;
|
||||
printf("%d ",k[i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return display_sol(topology, aff_mat, sol->sigma, metric);
|
||||
}
|
||||
|
||||
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric)
|
||||
{
|
||||
int *sigma = NULL;
|
||||
int N = aff_mat->order;
|
||||
|
||||
sigma = (int*)MALLOC(sizeof(int)*N);
|
||||
|
||||
map_Packed(topology, N, sigma);
|
||||
printf("Packed: ");
|
||||
display_sol(topology, aff_mat, sigma, metric);
|
||||
|
||||
map_RR(topology, N, sigma);
|
||||
printf("RR: ");
|
||||
display_sol(topology, aff_mat, sigma, metric);
|
||||
|
||||
/* double duration; */
|
||||
/* CLOCK_T time1,time0; */
|
||||
/* CLOCK(time0); */
|
||||
/* map_MPIPP(topology,1,N,sigma,comm,arch); */
|
||||
/* CLOCK(time1); */
|
||||
/* duration=CLOCK_DIFF(time1,time0); */
|
||||
/* printf("MPIPP-1-D:%f\n",duration); */
|
||||
/* printf("MPIPP-1: "); */
|
||||
/* if (TGT_flag == 1) */
|
||||
/* print_sigma_inv(N,sigma,comm,arch); */
|
||||
/* else */
|
||||
/* print_sigma(N,sigma,comm,arch); */
|
||||
|
||||
/* CLOCK(time0); */
|
||||
/* map_MPIPP(topology,5,N,sigma,comm,arch); */
|
||||
/* CLOCK(time1); */
|
||||
/* duration=CLOCK_DIFF(time1,time0); */
|
||||
/* printf("MPIPP-5-D:%f\n",duration); */
|
||||
/* printf("MPIPP-5: "); */
|
||||
/* if (TGT_flag == 1) */
|
||||
/* print_sigma_inv(N,sigma,comm,arch); */
|
||||
/* else */
|
||||
/* print_sigma(N,sigma,comm,arch); */
|
||||
|
||||
FREE(sigma);
|
||||
}
|
||||
|
||||
|
||||
int in_tab(int *tab, int n, int val){
|
||||
int i;
|
||||
for( i = 0; i < n ; i++)
|
||||
if(tab[i] == val)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void map_Packed(tm_topology_t *topology, int N, int *sigma)
|
||||
{
|
||||
size_t i;
|
||||
int j = 0,depth;
|
||||
int vl = tm_get_verbose_level();
|
||||
|
||||
depth = topology->nb_levels-1;
|
||||
|
||||
for( i = 0 ; i < topology->nb_nodes[depth] ; i++){
|
||||
/* printf ("%d -> %d\n",objs[i]->os_index,i); */
|
||||
if((!topology->constraints) || (in_tab(topology->constraints, topology->nb_constraints, topology->node_id[depth][i]))){
|
||||
if(vl >= DEBUG)
|
||||
printf ("%lu: %d -> %d\n", i, j, topology->node_id[depth][i]);
|
||||
sigma[j++]=topology->node_id[depth][i];
|
||||
if(j == N)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void map_RR(tm_topology_t *topology, int N,int *sigma)
|
||||
{
|
||||
int i;
|
||||
int vl = tm_get_verbose_level();
|
||||
|
||||
for( i = 0 ; i < N ; i++ ){
|
||||
if(topology->constraints)
|
||||
sigma[i]=topology->constraints[i%topology->nb_constraints];
|
||||
else
|
||||
sigma[i]=i%topology->nb_proc_units;
|
||||
if(vl >= DEBUG)
|
||||
printf ("%d -> %d (%d)\n",i,sigma[i],topology->nb_proc_units);
|
||||
}
|
||||
}
|
||||
|
||||
int hash_asc(const void* x1,const void* x2)
|
||||
{
|
||||
hash_t *e1 = NULL,*e2 = NULL;
|
||||
|
||||
e1 = ((hash_t*)x1);
|
||||
e2 = ((hash_t*)x2);
|
||||
|
||||
return (e1->key < e2->key) ? -1 : 1;
|
||||
}
|
||||
|
||||
|
||||
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed)
|
||||
{
|
||||
hash_t *hash_tab = NULL;
|
||||
int *sol = NULL;
|
||||
int *nodes_id= NULL;
|
||||
int i;
|
||||
|
||||
nodes_id = topology->node_id[level];
|
||||
|
||||
hash_tab = (hash_t*)MALLOC(sizeof(hash_t)*N);
|
||||
sol = (int*)MALLOC(sizeof(int)*N);
|
||||
|
||||
init_genrand(seed);
|
||||
|
||||
for( i = 0 ; i < N ; i++ ){
|
||||
hash_tab[i].val = nodes_id[i];
|
||||
hash_tab[i].key = genrand_int32();
|
||||
}
|
||||
|
||||
qsort(hash_tab,N,sizeof(hash_t),hash_asc);
|
||||
for( i = 0 ; i < N ; i++ )
|
||||
sol[i] = hash_tab[i].val;
|
||||
|
||||
FREE(hash_tab);
|
||||
return sol;
|
||||
}
|
||||
|
||||
|
||||
double eval_sol(int *sol,int N,double **comm, double **arch)
|
||||
{
|
||||
double a,c,res;
|
||||
int i,j;
|
||||
|
||||
res = 0;
|
||||
for ( i = 0 ; i < N ; i++ )
|
||||
for ( j = i+1 ; j < N ; j++ ){
|
||||
c = comm[i][j];
|
||||
a = arch[sol[i]][sol[j]];
|
||||
res += c/a;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void exchange(int *sol,int i,int j)
|
||||
{
|
||||
int tmp;
|
||||
tmp = sol[i];
|
||||
sol[i] = sol[j];
|
||||
sol[j] = tmp;
|
||||
}
|
||||
|
||||
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch)
|
||||
{
|
||||
double eval2;
|
||||
if( l == m )
|
||||
return 0;
|
||||
exchange(sol,l,m);
|
||||
eval2 = eval_sol(sol,N,comm,arch);
|
||||
exchange(sol,l,m);
|
||||
|
||||
return eval1-eval2;
|
||||
}
|
||||
|
||||
void select_max(int *l,int *m,double **gain,int N,int *state)
|
||||
{
|
||||
double max;
|
||||
int i,j;
|
||||
|
||||
max = -DBL_MAX;
|
||||
|
||||
for( i = 0 ; i < N ; i++ )
|
||||
if(!state[i])
|
||||
for( j = 0 ; j < N ; j++ )
|
||||
if( (i != j) && (!state[j]) ){
|
||||
if(gain[i][j] > max){
|
||||
*l = i;
|
||||
*m = j;
|
||||
max=gain[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch)
|
||||
{
|
||||
double eval1;
|
||||
int i,j;
|
||||
|
||||
eval1 = eval_sol(sol,N,comm,arch);
|
||||
for( i = 0 ; i < N ; i++ )
|
||||
for( j = 0 ; j <= i ; j++)
|
||||
gain[i][j] = gain[j][i] = gain_exchange(sol,i,j,eval1,N,comm,arch);
|
||||
}
|
||||
|
||||
|
||||
/* Randomized Algorithm of
|
||||
Hu Chen, Wenguang Chen, Jian Huang ,Bob Robert,and H.Kuhn. Mpipp: an automatic profile-guided
|
||||
parallel process placement toolset for smp clusters and multiclusters. In
|
||||
Gregory K. Egan and Yoichi Muraoka, editors, ICS, pages 353-360. ACM, 2006.
|
||||
*/
|
||||
|
||||
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch)
|
||||
{
|
||||
int *sol = NULL;
|
||||
int *state = NULL;
|
||||
double **gain = NULL;
|
||||
int **history = NULL;
|
||||
double *temp = NULL;
|
||||
int i,j,t,l=0,m=0,seed=0;
|
||||
double max,sum,best_eval,eval;
|
||||
|
||||
gain = (double**)MALLOC(sizeof(double*)*N);
|
||||
history = (int**)MALLOC(sizeof(int*)*N);
|
||||
for( i = 0 ; i < N ; i++){
|
||||
gain[i] = (double*)MALLOC(sizeof(double)*N);
|
||||
history[i] = (int*)MALLOC(sizeof(int)*3);
|
||||
}
|
||||
|
||||
state = (int*)MALLOC(sizeof(int)*N);
|
||||
temp = (double*)MALLOC(sizeof(double)*N);
|
||||
|
||||
sol = generate_random_sol(topology,N,topology->nb_levels-1,seed++);
|
||||
for( i = 0 ; i < N ; i++)
|
||||
sigma[i] = sol[i];
|
||||
|
||||
best_eval = DBL_MAX;
|
||||
while(seed <= nb_seed){
|
||||
do{
|
||||
for( i = 0 ; i < N ; i++ ){
|
||||
state[i] = 0;
|
||||
/* printf("%d ",sol[i]); */
|
||||
}
|
||||
/* printf("\n"); */
|
||||
compute_gain(sol,N,gain,comm,arch);
|
||||
/*
|
||||
display_tab(gain,N);
|
||||
exit(-1);
|
||||
*/
|
||||
for( i = 0 ; i < N/2 ; i++ ){
|
||||
select_max(&l,&m,gain,N,state);
|
||||
/* printf("%d: %d <=> %d : %f\n",i,l,m,gain[l][m]); */
|
||||
state[l] = 1;
|
||||
state[m] = 1;
|
||||
exchange(sol,l,m);
|
||||
history[i][1] = l;
|
||||
history[i][2] = m;
|
||||
temp[i] = gain[l][m];
|
||||
compute_gain(sol,N,gain,comm,arch);
|
||||
}
|
||||
|
||||
t = -1;
|
||||
max = 0;
|
||||
sum = 0;
|
||||
for(i = 0 ; i < N/2 ; i++ ){
|
||||
sum += temp[i];
|
||||
if( sum > max ){
|
||||
max = sum;
|
||||
t = i;
|
||||
}
|
||||
}
|
||||
/*for(j=0;j<=t;j++)
|
||||
printf("exchanging: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */
|
||||
for( j = t+1 ; j < N/2 ; j++ ){
|
||||
exchange(sol,history[j][1],history[j][2]);
|
||||
/* printf("Undoing: %d with %d for gain: %f\n",history[j][1],history[j][2],temp[j]); */
|
||||
}
|
||||
/* printf("max=%f\n",max); */
|
||||
|
||||
/*for(i=0;i<N;i++){
|
||||
printf("%d ",sol[i]);
|
||||
}
|
||||
printf("\n");*/
|
||||
eval = eval_sol(sol,N,comm,arch);
|
||||
if(eval < best_eval){
|
||||
best_eval = eval;
|
||||
for(i = 0 ; i < N ; i++)
|
||||
sigma[i] = sol[i];
|
||||
/* print_sol(N); */
|
||||
}
|
||||
}while( max > 0 );
|
||||
FREE(sol);
|
||||
sol=generate_random_sol(topology,N,topology->nb_levels-1,seed++);
|
||||
}
|
||||
|
||||
|
||||
FREE(sol);
|
||||
FREE(temp);
|
||||
FREE(state);
|
||||
for( i = 0 ; i < N ; i++){
|
||||
FREE(gain[i]);
|
||||
FREE(history[i]);
|
||||
}
|
||||
FREE(gain);
|
||||
FREE(history);
|
||||
}
|
26
ompi/mca/topo/treematch/treematch/tm_solution.h
Обычный файл
26
ompi/mca/topo/treematch/treematch/tm_solution.h
Обычный файл
@ -0,0 +1,26 @@
|
||||
#ifndef TM_SOLUION_H
|
||||
#define TM_SOLUION_H
|
||||
|
||||
#include "treematch.h"
|
||||
|
||||
void tm_free_solution(tm_solution_t *sol);
|
||||
int distance(tm_topology_t *topology,int i, int j);
|
||||
double display_sol_sum_com(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma);
|
||||
double display_sol(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, int *sigma, tm_metric_t metric);
|
||||
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol,
|
||||
tm_metric_t metric);
|
||||
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
|
||||
int in_tab(int *tab, int n, int val);
|
||||
void map_Packed(tm_topology_t *topology, int N, int *sigma);
|
||||
void map_RR(tm_topology_t *topology, int N, int *sigma);
|
||||
int hash_asc(const void* x1,const void* x2);
|
||||
int *generate_random_sol(tm_topology_t *topology,int N,int level,int seed);
|
||||
double eval_sol(int *sol,int N,double **comm, double **arch);
|
||||
void exchange(int *sol,int i,int j);
|
||||
double gain_exchange(int *sol,int l,int m,double eval1,int N,double **comm, double **arch);
|
||||
void select_max(int *l,int *m,double **gain,int N,int *state);
|
||||
void compute_gain(int *sol,int N,double **gain,double **comm, double **arch);
|
||||
void map_MPIPP(tm_topology_t *topology,int nb_seed,int N,int *sigma,double **comm, double **arch);
|
||||
|
||||
|
||||
#endif
|
@ -1,13 +1,18 @@
|
||||
#include <pthread.h>
|
||||
#include "tm_thread_pool.h"
|
||||
#include "tm_verbose.h"
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include <hwloc.h>
|
||||
#include "tm_verbose.h"
|
||||
#include "tm_tree.h"
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
|
||||
typedef enum _mapping_policy {COMPACT, SCATTER} mapping_policy_t;
|
||||
|
||||
static mapping_policy_t mapping_policy = COMPACT;
|
||||
static int verbose_level = ERROR;
|
||||
static thread_pool_t *pool = NULL;
|
||||
static unsigned int max_nb_threads = INT_MAX;
|
||||
|
||||
static thread_pool_t *get_thread_pool(void);
|
||||
static void execute_work(work_t *work);
|
||||
@ -16,39 +21,21 @@ static void *thread_loop(void *arg);
|
||||
static void add_work(pthread_mutex_t *list_lock, pthread_cond_t *cond_var, work_t *working_list, work_t *work);
|
||||
static thread_pool_t *create_threads(void);
|
||||
|
||||
static void f1 (int nb_args, void **args);
|
||||
static void f2 (int nb_args, void **args);
|
||||
static void f1 (int nb_args, void **args, int thread_id);
|
||||
static void f2 (int nb_args, void **args, int thread_id);
|
||||
static void destroy_work(work_t *work);
|
||||
|
||||
#define MIN(a, b) ((a)<(b)?(a):(b))
|
||||
#define MAX(a, b) ((a)>(b)?(a):(b))
|
||||
|
||||
void f1 (int nb_args, void **args){
|
||||
int a, b;
|
||||
a = *(int*)args[0];
|
||||
b = *(int*)args[1];
|
||||
printf("nb_args=%d, a=%d, b=%d\n",nb_args,a,b);
|
||||
|
||||
|
||||
void tm_set_max_nb_threads(unsigned int val){
|
||||
max_nb_threads = val;
|
||||
}
|
||||
|
||||
|
||||
void f2 (int nb_args, void **args){
|
||||
int n, *tab;
|
||||
int *res;
|
||||
int i,j;
|
||||
n = *(int*)args[0];
|
||||
tab = (int*)args[1];
|
||||
res=(int*)args[2];
|
||||
|
||||
for(j=0;j<1000000;j++){
|
||||
*res=0;
|
||||
for (i=0;i<n;i++)
|
||||
*res+=tab[i];
|
||||
}
|
||||
|
||||
printf("done: %d!\n",nb_args);
|
||||
}
|
||||
|
||||
|
||||
void execute_work(work_t *work){
|
||||
work->task(work->nb_args, work->args);
|
||||
work->task(work->nb_args, work->args, work->thread_id);
|
||||
}
|
||||
|
||||
int bind_myself_to_core(hwloc_topology_t topology, int id){
|
||||
@ -57,10 +44,29 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
|
||||
char *str;
|
||||
int binding_res;
|
||||
int depth = hwloc_topology_get_depth(topology);
|
||||
int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
|
||||
int my_core;
|
||||
int nb_threads = get_nb_threads();
|
||||
/* printf("depth=%d\n",depth); */
|
||||
|
||||
switch (mapping_policy){
|
||||
case SCATTER:
|
||||
my_core = id*(nb_cores/nb_threads);
|
||||
break;
|
||||
default:
|
||||
if(verbose_level>=WARNING){
|
||||
printf("Wrong scheduling policy. Using COMPACT\n");
|
||||
}
|
||||
case COMPACT:
|
||||
my_core = id%nb_cores;
|
||||
}
|
||||
|
||||
if(verbose_level>=INFO){
|
||||
printf("Mapping thread %d on core %d\n",id,my_core);
|
||||
}
|
||||
|
||||
/* Get my core. */
|
||||
obj = hwloc_get_obj_by_depth(topology, depth-1, id);
|
||||
obj = hwloc_get_obj_by_depth(topology, depth-1, my_core);
|
||||
if (obj) {
|
||||
/* Get a copy of its cpuset that we may modify. */
|
||||
cpuset = hwloc_bitmap_dup(obj->cpuset);
|
||||
@ -71,7 +77,7 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
|
||||
|
||||
|
||||
/*hwloc_bitmap_asprintf(&str, cpuset);
|
||||
printf("Binding thread %d to cpuset %s\n", id,str);
|
||||
printf("Binding thread %d to cpuset %s\n", my_core,str);
|
||||
FREE(str);
|
||||
*/
|
||||
|
||||
@ -81,8 +87,8 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
|
||||
int error = errno;
|
||||
hwloc_bitmap_asprintf(&str, obj->cpuset);
|
||||
if(verbose_level>=WARNING)
|
||||
fprintf(stderr,"%d Couldn't bind to cpuset %s: %s\n", id, str, strerror(error));
|
||||
FREE(str);
|
||||
printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error));
|
||||
free(str); /* str is allocated by hlwoc, free it normally*/
|
||||
return 0;
|
||||
}
|
||||
/* FREE our cpuset copy */
|
||||
@ -90,7 +96,7 @@ int bind_myself_to_core(hwloc_topology_t topology, int id){
|
||||
return 1;
|
||||
}else{
|
||||
if(verbose_level>=WARNING)
|
||||
fprintf(stderr,"No valid object for core id %d!\n",id);
|
||||
printf("No valid object for core id %d!\n",my_core);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
@ -161,6 +167,7 @@ void wait_work_completion(work_t *work){
|
||||
|
||||
int submit_work(work_t *work, int thread_id){
|
||||
if( (thread_id>=0) && (thread_id< pool->nb_threads)){
|
||||
work->thread_id = thread_id;
|
||||
add_work(&pool->list_lock[thread_id], &pool->cond_var[thread_id], &pool->working_list[thread_id], work);
|
||||
return 1;
|
||||
}
|
||||
@ -171,11 +178,11 @@ thread_pool_t *create_threads(){
|
||||
hwloc_topology_t topology;
|
||||
int i;
|
||||
local_thread_t *local;
|
||||
int nb_cores;
|
||||
int nb_threads;
|
||||
unsigned int nb_cores;
|
||||
int depth;
|
||||
|
||||
verbose_level = get_verbose_level();
|
||||
|
||||
verbose_level = tm_get_verbose_level();
|
||||
|
||||
/*Get number of cores: set 1 thread per core*/
|
||||
/* Allocate and initialize topology object. */
|
||||
@ -187,7 +194,7 @@ thread_pool_t *create_threads(){
|
||||
depth = hwloc_topology_get_depth(topology);
|
||||
if (depth == -1 ) {
|
||||
if(verbose_level>=CRITICAL)
|
||||
fprintf(stderr,"Error: topology with unknown depth\n");
|
||||
fprintf(stderr,"Error: HWLOC unable to find the depth of the topology of this node!\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@ -195,19 +202,23 @@ thread_pool_t *create_threads(){
|
||||
|
||||
/* at depth 'depth' it is necessary a PU/core where we can execute things*/
|
||||
nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
|
||||
nb_threads = MIN(nb_cores, max_nb_threads);
|
||||
|
||||
if(verbose_level>=INFO)
|
||||
printf("nb_threads = %d\n",nb_threads);
|
||||
|
||||
pool = (thread_pool_t*) MALLOC(sizeof(thread_pool_t));
|
||||
pool -> topology = topology;
|
||||
pool -> nb_threads = nb_cores;
|
||||
pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_cores);
|
||||
pool -> working_list = (work_t*)CALLOC(nb_cores,sizeof(work_t));
|
||||
pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_cores);
|
||||
pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_cores);
|
||||
pool -> nb_threads = nb_threads;
|
||||
pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_threads);
|
||||
pool -> working_list = (work_t*)CALLOC(nb_threads,sizeof(work_t));
|
||||
pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_threads);
|
||||
pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_threads);
|
||||
|
||||
local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_cores);
|
||||
local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_threads);
|
||||
pool->local = local;
|
||||
|
||||
for (i=0;i<nb_cores;i++){
|
||||
for (i=0;i<nb_threads;i++){
|
||||
local[i].topology = topology;
|
||||
local[i].id = i;
|
||||
local[i].working_list = &pool->working_list[i];
|
||||
@ -245,11 +256,12 @@ void terminate_thread_pool(){
|
||||
|
||||
for (id=0;id<pool->nb_threads;id++){
|
||||
pthread_join(pool->thread_list[id],(void **) &ret);
|
||||
FREE(ret);
|
||||
pthread_cond_destroy(pool->cond_var +id);
|
||||
pthread_mutex_destroy(pool->list_lock +id);
|
||||
if (pool->working_list[id].next != NULL)
|
||||
if(verbose_level >= WARNING)
|
||||
fprintf(stderr,"Working list of thread %d not empty!\n",id);
|
||||
printf("Working list of thread %d not empty!\n",id);
|
||||
}
|
||||
|
||||
hwloc_topology_destroy(pool->topology);
|
||||
@ -272,7 +284,7 @@ int get_nb_threads(){
|
||||
}
|
||||
|
||||
|
||||
work_t *create_work(int nb_args, void **args, void (*task) (int, void **)){
|
||||
work_t *create_work(int nb_args, void **args, void (*task) (int, void **, int)){
|
||||
work_t *work;
|
||||
work = MALLOC(sizeof(work_t));
|
||||
work -> nb_args = nb_args;
|
||||
@ -293,6 +305,34 @@ void destroy_work(work_t *work){
|
||||
FREE(work);
|
||||
}
|
||||
|
||||
/* CODE example 2 functions and test driver*/
|
||||
|
||||
void f1 (int nb_args, void **args, int thread_id){
|
||||
int a, b;
|
||||
a = *(int*)args[0];
|
||||
b = *(int*)args[1];
|
||||
printf("id: %d, nb_args=%d, a=%d, b=%d\n",thread_id, nb_args,a,b);
|
||||
}
|
||||
|
||||
|
||||
void f2 (int nb_args, void **args, int thread_id){
|
||||
int n, *tab;
|
||||
int *res;
|
||||
int i,j;
|
||||
n = *(int*)args[0];
|
||||
tab = (int*)args[1];
|
||||
res=(int*)args[2];
|
||||
|
||||
for(j=0;j<1000000;j++){
|
||||
*res=0;
|
||||
for (i=0;i<n;i++)
|
||||
*res+=tab[i];
|
||||
}
|
||||
|
||||
printf("id: %d, done: %d!\n",thread_id, nb_args);
|
||||
}
|
||||
|
||||
|
||||
|
||||
int test_main(void){
|
||||
|
||||
|
@ -2,17 +2,18 @@
|
||||
#define THREAD_POOL_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include "opal/mca/hwloc/hwloc-internal.h"
|
||||
#include <hwloc.h>
|
||||
|
||||
|
||||
typedef struct _work_t{
|
||||
int nb_args;
|
||||
void (*task)(int nb_args, void **args);
|
||||
void (*task)(int nb_args, void **args, int thread_id);
|
||||
void **args;
|
||||
struct _work_t *next;
|
||||
pthread_cond_t work_done;
|
||||
pthread_mutex_t mutex;
|
||||
int done;
|
||||
int thread_id;
|
||||
}work_t;
|
||||
|
||||
typedef struct {
|
||||
@ -38,8 +39,10 @@ int get_nb_threads(void);
|
||||
int submit_work(work_t *work, int thread_id);
|
||||
void wait_work_completion(work_t *work);
|
||||
void terminate_thread_pool(void);
|
||||
work_t *create_work(int nb_args, void **args, void (int, void **));
|
||||
work_t *create_work(int nb_args, void **args, void (int, void **, int));
|
||||
int test_main(void);
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* THREAD_POOL_H */
|
||||
|
@ -12,6 +12,7 @@ void get_time(void)
|
||||
|
||||
CLOCK(time_tab[clock_num]);
|
||||
}
|
||||
|
||||
double time_diff(void)
|
||||
{
|
||||
CLOCK_T t2,t1;
|
||||
@ -22,7 +23,7 @@ double time_diff(void)
|
||||
}
|
||||
|
||||
if(clock_num < 0){
|
||||
return -1.0;
|
||||
return -2.0;
|
||||
}
|
||||
|
||||
CLOCK(t2);
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
#ifndef TIMINGS_H
|
||||
#define TIMINGS_H
|
||||
#include <stdio.h>
|
||||
|
842
ompi/mca/topo/treematch/treematch/tm_topology.c
Обычный файл
842
ompi/mca/topo/treematch/treematch/tm_topology.c
Обычный файл
@ -0,0 +1,842 @@
|
||||
#include <hwloc.h>
|
||||
#include <hwloc/helper.h>
|
||||
#include "tm_tree.h"
|
||||
#include "tm_mapping.h"
|
||||
#include <ctype.h>
|
||||
#include "tm_verbose.h"
|
||||
#include "tm_solution.h"
|
||||
|
||||
|
||||
tm_topology_t* get_local_topo_with_hwloc(void);
|
||||
tm_topology_t* hwloc_to_tm(char *filename);
|
||||
int int_cmp_inc(const void* x1,const void* x2);
|
||||
void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
|
||||
int symetric(hwloc_topology_t topology);
|
||||
tm_topology_t * tgt_to_tm(char *filename);
|
||||
void tm_display_arity(tm_topology_t *topology);
|
||||
void tm_display_topology(tm_topology_t *topology);
|
||||
void tm_free_topology(tm_topology_t *topology);
|
||||
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
|
||||
void tm_optimize_topology(tm_topology_t **topology);
|
||||
int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology);
|
||||
int topo_nb_proc(hwloc_topology_t topology,int N);
|
||||
void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels);
|
||||
void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints);
|
||||
void topology_cost_cpy(tm_topology_t *topology,double **cost);
|
||||
void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes);
|
||||
double ** topology_to_arch(hwloc_topology_t topology);
|
||||
void build_synthetic_proc_id(tm_topology_t *topology);
|
||||
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
|
||||
|
||||
|
||||
#define LINE_SIZE (1000000)
|
||||
|
||||
|
||||
/* transform a tgt scotch file into a topology file*/
|
||||
tm_topology_t * tgt_to_tm(char *filename)
|
||||
{
|
||||
tm_topology_t *topology = NULL;
|
||||
FILE *pf = NULL;
|
||||
char line[1024];
|
||||
char *s = NULL;
|
||||
double *cost = NULL;
|
||||
int i;
|
||||
|
||||
|
||||
|
||||
pf = fopen(filename,"r");
|
||||
if(!pf){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Cannot open %s\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if(tm_get_verbose_level() >= INFO)
|
||||
printf("Reading TGT file: %s\n",filename);
|
||||
|
||||
|
||||
fgets(line,1024,pf);
|
||||
fclose(pf);
|
||||
|
||||
s = strstr(line,"tleaf");
|
||||
if(!s){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Syntax error! %s is not a tleaf file\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
s += 5;
|
||||
while(isspace(*s))
|
||||
s++;
|
||||
|
||||
topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
topology->nb_constraints = 0;
|
||||
topology->oversub_fact = 1;
|
||||
topology->constraints = NULL;
|
||||
topology->nb_levels = atoi(strtok(s," "))+1;
|
||||
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
|
||||
|
||||
cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
|
||||
|
||||
for( i = 0 ; i < topology->nb_levels-1 ; i++ ){
|
||||
topology->arity[i] = atoi(strtok(NULL," "));
|
||||
cost[i] = atoi(strtok(NULL," "));
|
||||
}
|
||||
|
||||
topology->arity[topology->nb_levels-1] = 0;
|
||||
/* cost[topology->nb_levels-1]=0; */
|
||||
|
||||
/*aggregate costs*/
|
||||
for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
|
||||
cost[i] += cost[i+1];
|
||||
|
||||
build_synthetic_proc_id(topology);
|
||||
|
||||
if(tm_get_verbose_level() >= INFO)
|
||||
printf("Topology built from %s!\n",filename);
|
||||
|
||||
topology->cost=cost;
|
||||
|
||||
|
||||
return topology;
|
||||
}
|
||||
|
||||
int topo_nb_proc(hwloc_topology_t topology,int N)
|
||||
{
|
||||
hwloc_obj_t *objs = NULL;
|
||||
int nb_proc;
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*N);
|
||||
objs[0] = hwloc_get_next_obj_by_type(topology,HWLOC_OBJ_PU,NULL);
|
||||
nb_proc = 1 + hwloc_get_closest_objs(topology,objs[0],objs+1,N-1);
|
||||
FREE(objs);
|
||||
return nb_proc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double link_cost(int depth)
|
||||
{
|
||||
/*
|
||||
Bertha values
|
||||
double tab[5]={21,9,4.5,2.5,0.001};
|
||||
double tab[5]={1,1,1,1,1};
|
||||
double tab[6]={100000,10000,1000,500,100,10};
|
||||
*/
|
||||
double tab[11] = {1024,512,256,128,64,32,16,8,4,2,1};
|
||||
|
||||
return tab[depth];
|
||||
/*
|
||||
return 10*log(depth+2);
|
||||
return (depth+1);
|
||||
return (long int)pow(100,depth);
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
double ** topology_to_arch(hwloc_topology_t topology)
|
||||
{
|
||||
int nb_proc,i,j;
|
||||
hwloc_obj_t obj_proc1,obj_proc2,obj_res;
|
||||
double **arch = NULL;
|
||||
|
||||
nb_proc = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
|
||||
arch = (double**)MALLOC(sizeof(double*)*nb_proc);
|
||||
for( i = 0 ; i < nb_proc ; i++ ){
|
||||
obj_proc1 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
|
||||
arch[obj_proc1->os_index] = (double*)MALLOC(sizeof(double)*nb_proc);
|
||||
for( j = 0 ; j < nb_proc ; j++ ){
|
||||
obj_proc2 = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,j);
|
||||
obj_res = hwloc_get_common_ancestor_obj(topology,obj_proc1,obj_proc2);
|
||||
/* printf("arch[%d][%d] <- %ld\n",obj_proc1->os_index,obj_proc2->os_index,*((long int*)(obj_res->userdatab))); */
|
||||
arch[obj_proc1->os_index][obj_proc2->os_index]=link_cost(obj_res->depth+1);
|
||||
}
|
||||
}
|
||||
return arch;
|
||||
}
|
||||
|
||||
int symetric(hwloc_topology_t topology)
|
||||
{
|
||||
int depth,i,topodepth = hwloc_topology_get_depth(topology);
|
||||
unsigned int arity;
|
||||
hwloc_obj_t obj;
|
||||
for ( depth = 0; depth < topodepth-1 ; depth++ ) {
|
||||
int N = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
|
||||
arity = obj->arity;
|
||||
|
||||
/* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
|
||||
for (i = 1; i < N; i++ ){
|
||||
obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
|
||||
if( obj->arity != arity){
|
||||
/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
tm_topology_t* hwloc_to_tm(char *filename)
|
||||
{
|
||||
hwloc_topology_t topology;
|
||||
tm_topology_t *res = NULL;
|
||||
hwloc_obj_t *objs = NULL;
|
||||
unsigned topodepth,depth;
|
||||
unsigned int nb_nodes;
|
||||
double *cost;
|
||||
int err, l;
|
||||
unsigned int i;
|
||||
int vl = tm_get_verbose_level();
|
||||
|
||||
/* Build the topology */
|
||||
hwloc_topology_init(&topology);
|
||||
err = hwloc_topology_set_xml(topology,filename);
|
||||
if(err == -1){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
hwloc_topology_ignore_all_keep_structure(topology);
|
||||
hwloc_topology_load(topology);
|
||||
|
||||
|
||||
/* Test if symetric */
|
||||
if(!symetric(topology)){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"%s not symetric!\n",filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* work on depth */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
|
||||
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
res->oversub_fact = 1;
|
||||
res->nb_constraints = 0;
|
||||
res->constraints = NULL;
|
||||
res->nb_levels = topodepth;
|
||||
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
|
||||
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
|
||||
if(vl >= INFO)
|
||||
printf("topodepth = %d\n",topodepth);
|
||||
|
||||
/* Build TreeMatch topology */
|
||||
for( depth = 0 ; depth < topodepth ; depth++ ){
|
||||
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
res->nb_nodes[depth] = nb_nodes;
|
||||
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
|
||||
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
|
||||
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
|
||||
res->arity[depth] = objs[0]->arity;
|
||||
|
||||
if (depth == topodepth -1){
|
||||
res->nb_constraints = nb_nodes;
|
||||
res->nb_proc_units = nb_nodes;
|
||||
}
|
||||
|
||||
if(vl >= DEBUG)
|
||||
printf("\n--%d(%d) **%d**:--\n",res->arity[depth],nb_nodes,res->arity[0]);
|
||||
|
||||
/* Build process id tab */
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
if(objs[i]->os_index > nb_nodes){
|
||||
if(vl >= CRITICAL){
|
||||
fprintf(stderr, "Index of object %d of level %d is %d and larger than number of nodes : %d\n",
|
||||
i, depth, objs[i]->os_index, nb_nodes);
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
res->node_id[depth][i] = objs[i]->os_index;
|
||||
res->node_rank[depth][objs[i]->os_index] = i;
|
||||
/* if(depth==topodepth-1) */
|
||||
}
|
||||
FREE(objs);
|
||||
|
||||
|
||||
}
|
||||
|
||||
cost = (double*)CALLOC(res->nb_levels,sizeof(double));
|
||||
for(l=0; l<res->nb_levels; l++){
|
||||
cost[l] = link_cost(l);
|
||||
}
|
||||
res->cost = cost;
|
||||
|
||||
|
||||
/* Destroy topology object. */
|
||||
hwloc_topology_destroy(topology);
|
||||
if(tm_get_verbose_level() >= INFO)
|
||||
printf("\n");
|
||||
|
||||
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
tm_topology_t* get_local_topo_with_hwloc(void)
|
||||
{
|
||||
hwloc_topology_t topology;
|
||||
tm_topology_t *res = NULL;
|
||||
hwloc_obj_t *objs = NULL;
|
||||
unsigned topodepth,depth;
|
||||
int nb_nodes,i;
|
||||
|
||||
/* Build the topology */
|
||||
hwloc_topology_init(&topology);
|
||||
hwloc_topology_ignore_all_keep_structure(topology);
|
||||
hwloc_topology_load(topology);
|
||||
|
||||
/* Test if symetric */
|
||||
if(!symetric(topology)){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Local toplogy not symetric!\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* work on depth */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
|
||||
res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
res->nb_constraints = 0;
|
||||
res->constraints = NULL;
|
||||
res->nb_levels = topodepth;
|
||||
res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->node_rank = (int**)MALLOC(sizeof(int*)*res->nb_levels);
|
||||
res->nb_nodes = (size_t*)MALLOC(sizeof(size_t)*res->nb_levels);
|
||||
res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);
|
||||
|
||||
/* Build TreeMatch topology */
|
||||
for( depth = 0 ; depth < topodepth ; depth++ ){
|
||||
nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
res->nb_nodes[depth] = nb_nodes;
|
||||
res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
res->node_rank[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);
|
||||
|
||||
objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
|
||||
objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
|
||||
hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
|
||||
res->arity[depth] = objs[0]->arity;
|
||||
|
||||
if (depth == topodepth -1){
|
||||
res->nb_constraints = nb_nodes;
|
||||
res->nb_proc_units = nb_nodes;
|
||||
}
|
||||
/* printf("%d:",res->arity[depth]); */
|
||||
|
||||
/* Build process id tab */
|
||||
for (i = 0; i < nb_nodes; i++){
|
||||
res->node_id[depth][i] = objs[i]->os_index;
|
||||
res->node_rank[depth][objs[i]->os_index] = i;
|
||||
/* if(depth==topodepth-1) */
|
||||
}
|
||||
FREE(objs);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Destroy HWLOC topology object. */
|
||||
hwloc_topology_destroy(topology);
|
||||
|
||||
/* printf("\n"); */
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void tm_free_topology(tm_topology_t *topology)
|
||||
{
|
||||
int i;
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
FREE(topology->node_id[i]);
|
||||
FREE(topology->node_rank[i]);
|
||||
}
|
||||
|
||||
FREE(topology->constraints);
|
||||
FREE(topology->node_id);
|
||||
FREE(topology->node_rank);
|
||||
FREE(topology->nb_nodes);
|
||||
FREE(topology->arity);
|
||||
FREE(topology->cost);
|
||||
FREE(topology);
|
||||
}
|
||||
|
||||
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type){
|
||||
switch(arch_file_type){
|
||||
case TM_FILE_TYPE_TGT:
|
||||
return tgt_to_tm(arch_filename);
|
||||
case TM_FILE_TYPE_XML:
|
||||
return hwloc_to_tm(arch_filename);
|
||||
default:
|
||||
if(tm_get_verbose_level() >= ERROR){
|
||||
fprintf(stderr,"Error loading topology. Filetype %d unknown\n", arch_file_type);
|
||||
}
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void tm_display_topology(tm_topology_t *topology)
|
||||
{
|
||||
int i;
|
||||
unsigned int j;
|
||||
unsigned long id;
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
printf("%d: ",i);
|
||||
for( j = 0 ; j < topology->nb_nodes[i] ; j++)
|
||||
printf("%d ",topology->node_id[i][j]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("Last level: ");
|
||||
for(id = 0; id < topology->nb_nodes[topology->nb_levels-1]/topology->oversub_fact; id++)
|
||||
printf("%d ",topology->node_rank[topology->nb_levels-1][id]);
|
||||
printf("\n");
|
||||
|
||||
|
||||
if(topology->constraints){
|
||||
printf("Constraints: ");
|
||||
for(i = 0; i < topology->nb_constraints; i++)
|
||||
printf("%d ",topology->constraints[i]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("\tnb_levels=%d\n\tnb_constraints=%d\n\toversub_fact=%d\n\tnb proc units=%d\n\n",
|
||||
topology->nb_levels, topology->nb_constraints, topology->oversub_fact, topology->nb_proc_units);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void tm_display_arity(tm_topology_t *topology){
|
||||
int depth;
|
||||
for(depth=0; depth < topology->nb_levels; depth++)
|
||||
printf("%d(%lf): ",topology->arity[depth], topology->cost[depth]);
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int int_cmp_inc(const void* x1,const void* x2)
|
||||
{
|
||||
return *((int *)x1) < *((int *)x2) ? -1 : 1;
|
||||
}
|
||||
|
||||
|
||||
int topo_check_constraints(tm_topology_t *topology){
|
||||
int n = topology->nb_constraints;
|
||||
int i;
|
||||
int depth = topology->nb_levels-1;
|
||||
for (i=0;i<n;i++){
|
||||
if(!in_tab(topology->node_id[depth], topology->nb_nodes[depth], topology->constraints[i])){
|
||||
if(tm_get_verbose_level() >= CRITICAL){
|
||||
fprintf(stderr,"Error! Incompatible constraint with the topology: rank %d in the constraints is not a valid id of any nodes of the topology.\n",topology->constraints[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* cpy flag tells if we need to copy the array.
|
||||
Set to 1 when called from the application level and 0 when called from inside the library*/
|
||||
int tm_topology_set_binding_constraints_cpy(int *constraints, int nb_constraints, tm_topology_t *topology, int cpy_flag){
|
||||
|
||||
topology -> nb_constraints = nb_constraints;
|
||||
if(cpy_flag){
|
||||
topology -> constraints = (int*)MALLOC(nb_constraints*sizeof(int));
|
||||
memcpy(topology -> constraints, constraints, nb_constraints*sizeof(int));
|
||||
}else{
|
||||
topology -> constraints = constraints;
|
||||
}
|
||||
|
||||
return topo_check_constraints(topology);
|
||||
}
|
||||
|
||||
int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology){
|
||||
return tm_topology_set_binding_constraints_cpy(constraints, nb_constraints, topology, 1);
|
||||
}
|
||||
|
||||
int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology)
|
||||
{
|
||||
int *tab = NULL;
|
||||
FILE *pf = NULL;
|
||||
char line[LINE_SIZE],*l = NULL;
|
||||
char *ptr = NULL;
|
||||
int i,n;
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
|
||||
|
||||
if (!(pf = fopen(constraints_filename,"r"))) {
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr,"Cannot open %s\n",constraints_filename);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* compute the size of the array to store the constraints*/
|
||||
n = 0;
|
||||
fgets(line, LINE_SIZE, pf);
|
||||
l = line;
|
||||
while((ptr=strtok(l," \t"))){
|
||||
l = NULL;
|
||||
if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr))
|
||||
n++;
|
||||
}
|
||||
|
||||
tab = (int*)MALLOC(n*sizeof(int));
|
||||
|
||||
rewind(pf);
|
||||
fgets(line, LINE_SIZE, pf);
|
||||
fclose(pf);
|
||||
l = line;
|
||||
i = 0;
|
||||
while((ptr=strtok(l," \t"))){
|
||||
l = NULL;
|
||||
if((ptr[0] != '\n') && ( !isspace(ptr[0])) && (*ptr) && (ptr)){
|
||||
if(i < n)
|
||||
tab[i] = atoi(ptr);
|
||||
else{
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr, "More than %d entries in %s\n", n, constraints_filename);
|
||||
exit(-1);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
if( i != n ){
|
||||
if(vl >= CRITICAL)
|
||||
fprintf(stderr, "Read %d entries while expecting %d ones\n", i, n);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
qsort(tab,n,sizeof(int),int_cmp_inc);
|
||||
|
||||
return tm_topology_set_binding_constraints_cpy(tab, n, topology, 0);
|
||||
}
|
||||
|
||||
|
||||
void topology_numbering_cpy(tm_topology_t *topology,int **numbering,int *nb_nodes)
|
||||
{
|
||||
int nb_levels;
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
|
||||
nb_levels = topology->nb_levels;
|
||||
*nb_nodes = topology->nb_nodes[nb_levels-1];
|
||||
if(vl >= INFO)
|
||||
printf("nb_nodes=%d\n",*nb_nodes);
|
||||
*numbering = (int*)MALLOC(sizeof(int)*(*nb_nodes));
|
||||
memcpy(*numbering,topology->node_id[nb_levels-1],sizeof(int)*(*nb_nodes));
|
||||
}
|
||||
|
||||
void topology_arity_cpy(tm_topology_t *topology,int **arity,int *nb_levels)
|
||||
{
|
||||
*nb_levels = topology->nb_levels;
|
||||
*arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
|
||||
memcpy(*arity,topology->arity,sizeof(int)*(*nb_levels));
|
||||
}
|
||||
|
||||
void topology_constraints_cpy(tm_topology_t *topology,int **constraints,int *nb_constraints)
|
||||
{
|
||||
*nb_constraints = topology->nb_constraints;
|
||||
if(topology->constraints){
|
||||
*constraints = (int*)MALLOC(sizeof(int)*(*nb_constraints));
|
||||
memcpy(*constraints,topology->constraints,sizeof(int)*(*nb_constraints));
|
||||
}else{
|
||||
*constraints = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void topology_cost_cpy(tm_topology_t *topology,double **cost)
|
||||
{
|
||||
*cost = (double*)MALLOC(sizeof(double)*(topology->nb_levels));
|
||||
memcpy(*cost,topology->cost,sizeof(double)*(topology->nb_levels));
|
||||
}
|
||||
|
||||
void optimize_arity(int **arity, double **cost, int *nb_levels,int n)
|
||||
{
|
||||
int a,i;
|
||||
int *new_arity = NULL;
|
||||
double *new_cost = NULL;
|
||||
|
||||
if( n < 0 )
|
||||
return;
|
||||
/* printf("n=%d\tnb_levels=%d\n",n,*nb_levels); */
|
||||
/* for(i=0;i<*nb_levels;i++) */
|
||||
/* printf("%d:",(*arity)[i]); */
|
||||
/* printf("\n"); */
|
||||
/* if(n==(*nb_levels)-3) */
|
||||
/* exit(-1); */
|
||||
a = (*arity)[n];
|
||||
if( (a%3 == 0) && (a > 3) ){
|
||||
/*
|
||||
check if the arity of level n devides 3
|
||||
If this is the case:
|
||||
Add a level
|
||||
*/
|
||||
(*nb_levels)++;
|
||||
/* Build a new arity and cost arrays */
|
||||
new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
|
||||
new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels));
|
||||
/* Copy the begining if the old arrays */
|
||||
for( i = 0 ; i < n ; i++){
|
||||
new_arity[i] = (*arity)[i];
|
||||
new_cost[i] = (*cost)[i];
|
||||
}
|
||||
/* set the nth level to arity 3 */
|
||||
new_arity[n] = 3;
|
||||
/* copy the cost to this level*/
|
||||
new_cost[n] = (*cost)[n];;
|
||||
/* printf("a=%d\n",a); */
|
||||
/* Set the (n+1) level to arity a/3 */
|
||||
new_arity[n+1] = a/3;
|
||||
/*Dupliacte the cost as it is the same level originally*/
|
||||
new_cost[n+1] = (*cost)[n];
|
||||
/* Copy the end of the arrays */
|
||||
for( i = n+2 ; i < *nb_levels ; i++){
|
||||
new_arity[i] = (*arity)[i-1];
|
||||
new_cost[i] = (*cost)[i-1];
|
||||
}
|
||||
FREE(*arity);
|
||||
FREE(*cost);
|
||||
/* if a/3 =3 then go to the next level */
|
||||
if(new_arity[n+1] == 3)
|
||||
optimize_arity(&new_arity,&new_cost,nb_levels,n);
|
||||
else /* continue to this level (remember we just add a new level */
|
||||
optimize_arity(&new_arity,&new_cost,nb_levels,n+1);
|
||||
*arity=new_arity;
|
||||
*cost=new_cost;
|
||||
}else if( (a%2==0) && (a>2) ){/* same as above but for arity == 2 instead of 3 */
|
||||
(*nb_levels)++;
|
||||
new_arity = (int*)MALLOC(sizeof(int)*(*nb_levels));
|
||||
new_cost = (double*)MALLOC(sizeof(double)*(*nb_levels));
|
||||
for( i = 0 ; i < n ; i++ ){
|
||||
new_arity[i] = (*arity)[i];
|
||||
new_cost[i] = (*cost)[i];
|
||||
}
|
||||
new_arity[n] = 2;
|
||||
new_cost[n] = (*cost)[n];;
|
||||
/* printf("a=%d\n",a); */
|
||||
new_arity[n+1] = a/2;
|
||||
new_cost[n+1] = (*cost)[n];
|
||||
for( i = n+2 ; i < *nb_levels ; i++ ){
|
||||
new_arity[i] = (*arity)[i-1];
|
||||
new_cost[i] = (*cost)[i-1];
|
||||
}
|
||||
FREE(*arity);
|
||||
FREE(*cost);
|
||||
if(new_arity[n+1] == 2)
|
||||
optimize_arity(&new_arity, &new_cost, nb_levels, n);
|
||||
else
|
||||
optimize_arity(&new_arity, &new_cost, nb_levels, n+1);
|
||||
*arity = new_arity;
|
||||
*cost= new_cost;
|
||||
}else /* if nothing works go to next level. */
|
||||
optimize_arity(arity, cost, nb_levels,n-1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void tm_optimize_topology(tm_topology_t **topology){
|
||||
int *arity = NULL,nb_levels;
|
||||
int *numbering = NULL,nb_nodes;
|
||||
tm_topology_t *new_topo;
|
||||
double *cost;
|
||||
unsigned int vl = tm_get_verbose_level();
|
||||
int *constraints = NULL, nb_constraints;
|
||||
int i;
|
||||
|
||||
if(vl >= DEBUG)
|
||||
tm_display_arity(*topology);
|
||||
|
||||
topology_arity_cpy(*topology,&arity,&nb_levels);
|
||||
topology_numbering_cpy(*topology,&numbering,&nb_nodes);
|
||||
topology_constraints_cpy(*topology,&constraints,&nb_constraints);
|
||||
topology_cost_cpy(*topology,&cost);
|
||||
|
||||
|
||||
optimize_arity(&arity,&cost,&nb_levels,nb_levels-2);
|
||||
new_topo = tm_build_synthetic_topology(arity, NULL, nb_levels,numbering,nb_nodes);
|
||||
new_topo->cost = cost;
|
||||
new_topo->constraints = constraints;
|
||||
new_topo->nb_constraints = nb_constraints;
|
||||
new_topo->nb_proc_units = (*topology)->nb_proc_units;
|
||||
new_topo->oversub_fact = (*topology)->oversub_fact;
|
||||
|
||||
|
||||
|
||||
if(vl >= DEBUG){
|
||||
if(constraints){
|
||||
printf("Constraints: ");
|
||||
for(i=0;i<nb_constraints;i++)
|
||||
printf("%d - ",constraints[i]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
tm_display_arity(new_topo);
|
||||
}
|
||||
FREE(arity);
|
||||
FREE(numbering);
|
||||
tm_free_topology(*topology);
|
||||
|
||||
*topology = new_topo;
|
||||
/* exit(-1); */
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
Build a synthetic balanced topology
|
||||
|
||||
arity : array of arity of the first nb_level (of size nb_levels)
|
||||
cost : array of costs between the levels (of size nb_levels)
|
||||
core_numbering: numbering of the core by the system. Array of size nb_core_per_node
|
||||
|
||||
nb_core_per_nodes: number of cores of a given node size of the array core_numbering
|
||||
|
||||
The numbering of the cores is done in round robin fashion after a width traversal of the topology.
|
||||
for example:
|
||||
{0,1,2,3} becomes 0,1,2,3,4,5,6,7...
|
||||
and
|
||||
{0,2,1,3} becomes 0,2,1,3,4,6,5,7,...
|
||||
*/
|
||||
|
||||
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes)
|
||||
{
|
||||
tm_topology_t *topology = NULL;
|
||||
int i,j,n;
|
||||
|
||||
|
||||
topology = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
|
||||
topology->nb_constraints = 0;
|
||||
topology->oversub_fact = 1;
|
||||
topology->constraints = NULL;
|
||||
topology->nb_levels = nb_levels;
|
||||
topology->arity = (int*)MALLOC(sizeof(int)*topology->nb_levels);
|
||||
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->nb_nodes = (size_t *)MALLOC(sizeof(size_t)*topology->nb_levels);
|
||||
if(cost)
|
||||
topology->cost = (double*)CALLOC(topology->nb_levels,sizeof(double));
|
||||
else
|
||||
topology->cost = NULL;
|
||||
|
||||
memcpy(topology->arity, arity, sizeof(int)*nb_levels);
|
||||
if(cost)
|
||||
memcpy(topology->cost, cost, sizeof(double)*nb_levels);
|
||||
|
||||
n = 1;
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
topology->nb_nodes[i] = n;
|
||||
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
if( i < topology->nb_levels-1){
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
topology->node_id[i][j] = j;
|
||||
topology->node_rank[i][j]=j;
|
||||
}
|
||||
}else{
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
int id = core_numbering[j%nb_core_per_nodes] + (nb_core_per_nodes)*(j/nb_core_per_nodes);
|
||||
topology->node_id[i][j] = id;
|
||||
topology->node_rank[i][id] = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (i == topology->nb_levels-1){
|
||||
topology->nb_constraints = n;
|
||||
topology->nb_proc_units = n;
|
||||
}
|
||||
|
||||
n *= topology->arity[i];
|
||||
}
|
||||
if(cost){
|
||||
/*aggregate costs*/
|
||||
for( i = topology->nb_levels-2 ; i >= 0 ; i-- )
|
||||
topology->cost[i] += topology->cost[i+1];
|
||||
}
|
||||
|
||||
return topology;
|
||||
}
|
||||
|
||||
|
||||
void build_synthetic_proc_id(tm_topology_t *topology)
|
||||
{
|
||||
int i;
|
||||
size_t j,n = 1;
|
||||
|
||||
topology->node_id = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->node_rank = (int**)MALLOC(sizeof(int*)*topology->nb_levels);
|
||||
topology->nb_nodes = (size_t*) MALLOC(sizeof(size_t)*topology->nb_levels);
|
||||
|
||||
for( i = 0 ; i < topology->nb_levels ; i++ ){
|
||||
/* printf("n= %lld, arity := %d\n",n, topology->arity[i]); */
|
||||
topology->nb_nodes[i] = n;
|
||||
topology->node_id[i] = (int*)MALLOC(sizeof(long int)*n);
|
||||
topology->node_rank[i] = (int*)MALLOC(sizeof(long int)*n);
|
||||
if ( !topology->node_id[i] ){
|
||||
if(tm_get_verbose_level() >= CRITICAL)
|
||||
fprintf(stderr,"Cannot allocate level %d (of size %ld) of the topology\n", i, (unsigned long int)n);
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (i == topology->nb_levels-1){
|
||||
topology->nb_constraints = n;
|
||||
topology->nb_proc_units = n;
|
||||
}
|
||||
|
||||
|
||||
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
topology->node_id[i][j] = j;
|
||||
topology->node_rank[i][j] = j;
|
||||
}
|
||||
n *= topology->arity[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact){
|
||||
{
|
||||
int i,j,n;
|
||||
|
||||
if(oversub_fact <=1)
|
||||
return;
|
||||
|
||||
topology -> nb_levels ++;
|
||||
topology -> arity = (int*) REALLOC(topology->arity, sizeof(int)*topology->nb_levels);
|
||||
topology -> cost = (double*) REALLOC(topology->cost, sizeof(double)*topology->nb_levels);
|
||||
topology -> node_id = (int**) REALLOC(topology->node_id, sizeof(int*)*topology->nb_levels);
|
||||
topology -> node_rank = (int**) REALLOC(topology->node_rank, sizeof(int*)*topology->nb_levels);
|
||||
topology -> nb_nodes = (size_t *)REALLOC(topology->nb_nodes, sizeof(size_t)*topology->nb_levels);
|
||||
topology -> oversub_fact = oversub_fact;
|
||||
|
||||
i = topology->nb_levels - 1;
|
||||
n = topology->nb_nodes[i-1] * oversub_fact;
|
||||
topology->arity[i-1] = oversub_fact;
|
||||
topology->cost[i-1] = 0;
|
||||
topology->node_id[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->node_rank[i] = (int*)MALLOC(sizeof(int)*n);
|
||||
topology->nb_nodes[i] = n;
|
||||
|
||||
for( j = 0 ; j < n ; j++ ){
|
||||
int id = topology->node_id[i-1][j/oversub_fact];
|
||||
topology->node_id[i][j] = id;
|
||||
topology->node_rank[i][id] = j;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
22
ompi/mca/topo/treematch/treematch/tm_topology.h
Обычный файл
22
ompi/mca/topo/treematch/treematch/tm_topology.h
Обычный файл
@ -0,0 +1,22 @@
|
||||
#include <hwloc.h>
|
||||
#include "tm_tree.h"
|
||||
|
||||
tm_topology_t* get_local_topo_with_hwloc(void);
|
||||
tm_topology_t* hwloc_to_tm(char *filename);
|
||||
int int_cmp_inc(const void* x1,const void* x2);
|
||||
void optimize_arity(int **arity, double **cost, int *nb_levels,int n);
|
||||
int symetric(hwloc_topology_t topology);
|
||||
tm_topology_t * tgt_to_tm(char *filename);
|
||||
void tm_display_arity(tm_topology_t *topology);
|
||||
void tm_display_topology(tm_topology_t *topology);
|
||||
void tm_free_topology(tm_topology_t *topology);
|
||||
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
|
||||
void tm_optimize_topology(tm_topology_t **topology);
|
||||
int tm_topology_add_binding_constraints(char *constraints_filename, tm_topology_t *topology);
|
||||
int topo_nb_proc(hwloc_topology_t topology,int N);
|
||||
void topology_arity(tm_topology_t *topology,int **arity,int *nb_levels);
|
||||
void topology_constraints(tm_topology_t *topology,int **constraints,int *nb_constraints);
|
||||
void topology_cost(tm_topology_t *topology,double **cost);
|
||||
void topology_numbering(tm_topology_t *topology,int **numbering,int *nb_nodes);
|
||||
double ** topology_to_arch(hwloc_topology_t topology);
|
||||
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,69 +1,22 @@
|
||||
#ifndef __TREE_H__
|
||||
#define __TREE_H__
|
||||
#ifndef __TM_TREE_H__
|
||||
#define __TM_TREE_H__
|
||||
#include <stdlib.h>
|
||||
#include "treematch.h"
|
||||
|
||||
|
||||
typedef struct _node_info_t{
|
||||
int submit_date;
|
||||
int job_id;
|
||||
int finish_date;
|
||||
} job_info_t;
|
||||
|
||||
typedef struct _tree_t{
|
||||
int constraint; /* tells if the tree has been constructed with constraints on the nodes or not. usefull for freeing it. needs to be set on the root only*/
|
||||
struct _tree_t **child;
|
||||
struct _tree_t *parent;
|
||||
struct _tree_t *tab_child; /*the pointer to be freed*/
|
||||
double val;
|
||||
int arity;
|
||||
int depth;
|
||||
int id;
|
||||
int uniq;
|
||||
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
|
||||
job_info_t *job_info;
|
||||
}tree_t;
|
||||
|
||||
/* Maximum number of levels in the tree*/
|
||||
#define MAX_LEVELS 100
|
||||
|
||||
typedef struct {
|
||||
int *arity; /* arity of the nodes of each level*/
|
||||
int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
|
||||
int *nb_nodes; /*nb of nodes of each level*/
|
||||
int *nb_free_nodes; /*nb of available nodes of each level*/
|
||||
int **node_id; /*ID of the nodes of the tree for each level*/
|
||||
int **free_nodes; /*ID of the nodes of the tree for each level*/
|
||||
}tm_topology_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
double ** mat;
|
||||
double * sum_row;
|
||||
int order;
|
||||
} affinity_mat_t;
|
||||
|
||||
|
||||
|
||||
tree_t * build_tree(double **tab,int N);
|
||||
tree_t * build_tree_from_topology(tm_topology_t *topology,double **tab,int N, double *obj_weight, double *comm_speed);
|
||||
void map_tree(tree_t *,tree_t*);
|
||||
void update_val(tm_affinity_mat_t *aff_mat,tm_tree_t *parent);
|
||||
void display_tab(double **tab,int N);
|
||||
double speed(int depth);
|
||||
void set_node(tree_t *node,tree_t ** child, int arity,tree_t *parent,int id,double val,tree_t *deb_tab_child, int depth);
|
||||
void free_constraint_tree(tree_t *tree);
|
||||
void free_tree(tree_t *tree);
|
||||
void free_tab_double(double**tab,int N);
|
||||
void free_tab_int(int**tab,int N);
|
||||
void update_val(affinity_mat_t *aff_mat,tree_t *parent);
|
||||
void FREE_tree(tree_t *tree);
|
||||
void FREE_tab_double(double**,int);
|
||||
void set_node(tm_tree_t *node,tm_tree_t ** child, int arity,tm_tree_t *parent,
|
||||
int id,double val,tm_tree_t *tab_child,int depth);
|
||||
|
||||
|
||||
typedef struct _group_list_t{
|
||||
struct _group_list_t *next;
|
||||
tree_t **tab;
|
||||
tm_tree_t **tab;
|
||||
double val;
|
||||
double sum_neighbour;
|
||||
double wg;
|
||||
int id;
|
||||
double *bound;
|
||||
}group_list_t;
|
||||
|
||||
|
||||
@ -74,21 +27,13 @@ typedef struct{
|
||||
}adjacency_t;
|
||||
|
||||
|
||||
|
||||
/* for debugging malloc */
|
||||
/* #define __DEBUG_MY_MALLOC__ */
|
||||
#undef __DEBUG_MY_MALLOC__
|
||||
#ifdef __DEBUG_MY_MALLOC__
|
||||
#include "tm_malloc.h"
|
||||
#define MALLOC(x) my_malloc(x,__FILE__,__LINE__)
|
||||
#define CALLOC(x,y) my_calloc(x,y,__FILE__,__LINE__)
|
||||
#define FREE my_free
|
||||
#define MEM_CHECK my_mem_check
|
||||
#else
|
||||
#define MALLOC malloc
|
||||
#define CALLOC calloc
|
||||
#define FREE free
|
||||
#define MEM_CHECK my_mem_check
|
||||
#endif
|
||||
typedef struct _work_unit_t{
|
||||
int nb_groups;
|
||||
int *tab_group;
|
||||
int done;
|
||||
int nb_work;
|
||||
struct _work_unit_t *next;
|
||||
}work_unit_t;
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1,11 +1,34 @@
|
||||
#include "tm_verbose.h"
|
||||
#include <stdio.h>
|
||||
static unsigned int verbose_level = ERROR;
|
||||
static FILE *output = NULL;
|
||||
|
||||
void set_verbose_level(unsigned int level){
|
||||
void tm_set_verbose_level(unsigned int level){
|
||||
verbose_level = level;
|
||||
}
|
||||
|
||||
|
||||
unsigned int get_verbose_level(){
|
||||
unsigned int tm_get_verbose_level(){
|
||||
return verbose_level;
|
||||
}
|
||||
|
||||
int tm_open_verbose_file(char *filename){
|
||||
output = fopen(filename,"w");
|
||||
if(output == NULL)
|
||||
return 0;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
int tm_close_verbose_file(void){
|
||||
if(output != NULL)
|
||||
return fclose(output);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
FILE *tm_get_verbose_output(){
|
||||
if(!output)
|
||||
return stdout;
|
||||
else
|
||||
return output;
|
||||
}
|
||||
|
@ -1,11 +1,22 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#define NONE 0
|
||||
/* output in stderr*/
|
||||
#define CRITICAL 1
|
||||
#define ERROR 2
|
||||
/* output in stdout*/
|
||||
#define WARNING 3
|
||||
#define INFO 4
|
||||
#define DEBUG 5
|
||||
|
||||
void set_verbose_level(unsigned int level);
|
||||
unsigned int get_verbose_level(void);
|
||||
#define TIMING 4
|
||||
#define INFO 5
|
||||
#define DEBUG 6
|
||||
|
||||
|
||||
/* return 0 on errror and 1 on success */
|
||||
int tm_open_verbose_file(char *filename);
|
||||
int tm_close_verbose_file(void);
|
||||
void tm_set_verbose_level(unsigned int level);
|
||||
unsigned int tm_get_verbose_level(void);
|
||||
FILE * tm_get_verbose_output(void);
|
||||
|
||||
#define tm_verbose_printf(level, ...) level <= tm_get_verbose_level()?fprintf(tm_get_verbose_output(),__VA_ARGS__):0
|
||||
|
||||
|
188
ompi/mca/topo/treematch/treematch/treematch.h
Обычный файл
188
ompi/mca/topo/treematch/treematch/treematch.h
Обычный файл
@ -0,0 +1,188 @@
|
||||
#ifndef __TREEMATCH_H__
|
||||
#define __TREEMATCH_H__
|
||||
|
||||
/* size_t definition */
|
||||
#include <stddef.h>
|
||||
#include "tm_verbose.h"
|
||||
|
||||
/********* TreeMatch Public Enum **********/
|
||||
|
||||
/*type of topology files that can be read*/
|
||||
typedef enum{
|
||||
TM_FILE_TYPE_UNDEF,
|
||||
TM_FILE_TYPE_XML,
|
||||
TM_FILE_TYPE_TGT
|
||||
} tm_file_type_t;
|
||||
|
||||
/* different metrics to evaluate the solution */
|
||||
typedef enum{
|
||||
TM_METRIC_SUM_COM = 1,
|
||||
TM_METRIC_MAX_COM = 2,
|
||||
TM_METRIC_HOP_BYTE = 3
|
||||
} tm_metric_t;
|
||||
|
||||
|
||||
/********* TreeMatch Public Structures **********/
|
||||
|
||||
typedef struct _job_info_t{
|
||||
int submit_date;
|
||||
int job_id;
|
||||
int finish_date;
|
||||
} tm_job_info_t;
|
||||
|
||||
typedef struct _tree_t{
|
||||
int constraint; /* tells if the tree has been constructed with constraints on the nodes or not.
|
||||
Usefull for freeing it. needs to be set on the root only*/
|
||||
struct _tree_t **child;
|
||||
struct _tree_t *parent;
|
||||
struct _tree_t *tab_child; /*the pointer to be freed*/
|
||||
double val;
|
||||
int arity;
|
||||
int depth;
|
||||
int id;
|
||||
int uniq;
|
||||
int dumb; /* 1 if the node belongs to a dumb tree: hence has to be freed separately*/
|
||||
tm_job_info_t *job_info;
|
||||
int nb_processes; /* number of grouped processes (i.e. the order of the affinity matrix). Set at the root only*/
|
||||
}tm_tree_t; /* FT : changer le nom : tm_grouap_hierachy_t ?*/
|
||||
|
||||
/* Maximum number of levels in the tree*/
|
||||
#define TM_MAX_LEVELS 100
|
||||
|
||||
typedef struct {
|
||||
int *arity; /* arity of the nodes of each level*/
|
||||
int nb_levels; /*number of levels of the tree. Levels are numbered from top to bottom starting at 0*/
|
||||
size_t *nb_nodes; /*nb of nodes of each level*/
|
||||
int **node_id; /*ID of the nodes of the tree for each level*/
|
||||
int **node_rank ; /*rank of the nodes of the tree for each level given its ID: this is the inverse tab of node_id*/
|
||||
size_t *nb_free_nodes; /*nb of available nodes of each level*/
|
||||
int **free_nodes; /*tab of node that are free: useful to simulate batch scheduler*/
|
||||
double *cost; /*cost of the communication depending on the distance:
|
||||
cost[i] is the cost for communicating at distance nb_levels-i*/
|
||||
int *constraints; /* array of constraints: id of the nodes where it is possible to map processes */
|
||||
int nb_constraints; /* Size of the above array */
|
||||
int oversub_fact; /* maximum number of processes to be mapped on a given node */
|
||||
int nb_proc_units; /* the real number of units used for computation */
|
||||
}tm_topology_t;
|
||||
|
||||
|
||||
typedef struct {
|
||||
double ** mat;
|
||||
double * sum_row;
|
||||
int order;
|
||||
} tm_affinity_mat_t;
|
||||
|
||||
/*
|
||||
sigma_i is such that process i is mapped on core sigma_i
|
||||
k_i is such that core i exectutes process k_i_j (0<=j<<=oversubscribing factor - 1)
|
||||
|
||||
size of sigma is the number of processes (nb_objs)
|
||||
size of k is the number of cores/nodes (nb_compute_units)
|
||||
size of k[i] is the number of process we can execute per nodes (1 if no oversubscribing)
|
||||
|
||||
We must have numbe of process<=number of cores
|
||||
|
||||
k[i] == NULL if no process is mapped on core i
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
int *sigma;
|
||||
size_t sigma_length;
|
||||
int **k;
|
||||
size_t k_length;
|
||||
int oversub_fact;
|
||||
}tm_solution_t;
|
||||
|
||||
|
||||
/************ TreeMatch Public API ************/
|
||||
|
||||
/* load XML or TGT topology */
|
||||
tm_topology_t *tm_load_topology(char *arch_filename, tm_file_type_t arch_file_type);
|
||||
/*
|
||||
Alternatively, build a synthetic balanced topology.
|
||||
|
||||
nb_levels : number of levels of the topology +1 (the last level must be of cost 0 and arity 0).
|
||||
arity : array of arity of the first nb_level (of size nb_levels)
|
||||
cost : array of costs between the levels (of size nb_levels)
|
||||
core_numbering: numbering of the core by the system. Array of size nb_core_per_node
|
||||
|
||||
nb_core_per_nodes: number of cores of a given node. Size of the array core_numbering
|
||||
|
||||
both arity and cost are copied inside tm_build_synthetic_topology
|
||||
|
||||
The numbering of the cores is done in round robin fashion after a width traversal of the topology.
|
||||
for example:
|
||||
{0,1,2,3} becomes 0,1,2,3,4,5,6,7...
|
||||
and
|
||||
{0,2,1,3} becomes 0,2,1,3,4,6,5,7,...
|
||||
|
||||
Example of call to build the 128.tgt file: tleaf 4 16 500 2 100 2 50 2 10
|
||||
|
||||
double cost[5] = {500,100,50,10,0};
|
||||
int arity[5] = {16,2,2,2,0};
|
||||
int cn[5]={0,1};
|
||||
|
||||
topology = tm_build_synthetic_topology(arity,cost,5,cn,2);
|
||||
|
||||
*/
|
||||
tm_topology_t *tm_build_synthetic_topology(int *arity, double *cost, int nb_levels, int *core_numbering, int nb_core_per_nodes);
|
||||
/* load affinity matrix */
|
||||
tm_affinity_mat_t *tm_load_aff_mat(char *com_filename);
|
||||
/*
|
||||
Alternativelly, build the affinity matrix from a array of array of matrix of size order by order
|
||||
For performance reason mat is not copied.
|
||||
*/
|
||||
tm_affinity_mat_t * tm_build_affinity_mat(double **mat, int order);
|
||||
/* Add constraints to toplogy
|
||||
Return 1 on success and 0 if the constari,ts id are not compatible withe nodes id */
|
||||
int tm_topology_add_binding_constraints(char *bind_filename, tm_topology_t *topology);
|
||||
/* Alternatively, set the constraints from an array.
|
||||
Return 1 on success and 0 if the constari,ts id are not compatible withe nodes id
|
||||
|
||||
The array constraints is copied inside tm_topology_set_binding_constraints
|
||||
|
||||
*/
|
||||
int tm_topology_set_binding_constraints(int *constraints, int nb_constraints, tm_topology_t *topology);
|
||||
/* display arity of the topology */
|
||||
void tm_display_arity(tm_topology_t *topology);
|
||||
/* display the full topology */
|
||||
void tm_display_topology(tm_topology_t *topology);
|
||||
/* Optimize the topology by decomposing arities */
|
||||
void tm_optimize_topology(tm_topology_t **topology);
|
||||
/* Manage oversubscribing */
|
||||
void tm_enable_oversubscribing(tm_topology_t *topology, unsigned int oversub_fact);
|
||||
/* core of the treematch: compute the solution tree */
|
||||
tm_tree_t *tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed);
|
||||
/* compute the mapping according to teh tree an dthe core numbering*/
|
||||
tm_solution_t *tm_compute_mapping(tm_topology_t *topology, tm_tree_t *comm_tree);
|
||||
/* display the solution*/
|
||||
double tm_display_solution(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_solution_t *sol, tm_metric_t metric);
|
||||
/* display RR, packed, MPIPP*/
|
||||
void tm_display_other_heuristics(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, tm_metric_t metric);
|
||||
/* free TM strutures*/
|
||||
void tm_free_topology(tm_topology_t *topology);
|
||||
void tm_free_tree(tm_tree_t *comm_tree);
|
||||
void tm_free_solution(tm_solution_t *sol);
|
||||
void tm_free_affinity_mat(tm_affinity_mat_t *aff_mat);
|
||||
/* manage verbosity of TM*/
|
||||
void tm_set_verbose_level(unsigned int level);
|
||||
unsigned int tm_get_verbose_level(void);
|
||||
/* finalize treematch :check memory if necessary, and free internal variables (thread pool)*/
|
||||
void tm_finalize();
|
||||
|
||||
/*
|
||||
Ask for exhaustive search: may be very long
|
||||
new_val == 0 : no exhuative search
|
||||
new_val != 0 : exhuative search
|
||||
*/
|
||||
void tm_set_exhaustive_search_flag(int new_val);
|
||||
int tm_get_exhaustive_search_flag();
|
||||
|
||||
|
||||
/* Setting the maximum number of threads you want to use in parallel parts of TreeMatch */
|
||||
void tm_set_max_nb_threads(unsigned int val);
|
||||
|
||||
|
||||
#include "tm_malloc.h"
|
||||
|
||||
#endif
|
@ -22,7 +22,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef UTHASH_H
|
||||
#define UTHASH_H
|
||||
#define UTHASH_H
|
||||
|
||||
#include <string.h> /* memcmp,strlen */
|
||||
#include <stddef.h> /* ptrdiff_t */
|
||||
@ -49,7 +49,7 @@ do {
|
||||
char **_da_dst = (char**)(&(dst)); \
|
||||
*_da_dst = (char*)(src); \
|
||||
} while(0)
|
||||
#else
|
||||
#else
|
||||
#define DECLTYPE_ASSIGN(dst,src) \
|
||||
do { \
|
||||
(dst) = DECLTYPE(dst)(src); \
|
||||
@ -121,9 +121,9 @@ do {
|
||||
HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
|
||||
|
||||
#else
|
||||
#define HASH_BLOOM_MAKE(tbl)
|
||||
#define HASH_BLOOM_FREE(tbl)
|
||||
#define HASH_BLOOM_ADD(tbl,hashv)
|
||||
#define HASH_BLOOM_MAKE(tbl)
|
||||
#define HASH_BLOOM_FREE(tbl)
|
||||
#define HASH_BLOOM_ADD(tbl,hashv)
|
||||
#define HASH_BLOOM_TEST(tbl,hashv) (1)
|
||||
#endif
|
||||
|
||||
@ -148,7 +148,7 @@ do {
|
||||
|
||||
#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
|
||||
HASH_ADD_KEYPTR(hh,head,&((add)->fieldname),keylen_in,add)
|
||||
|
||||
|
||||
#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
|
||||
do { \
|
||||
unsigned _ha_bkt; \
|
||||
@ -300,10 +300,10 @@ do {
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define HASH_FSCK(hh,head)
|
||||
#define HASH_FSCK(hh,head)
|
||||
#endif
|
||||
|
||||
/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
|
||||
/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
|
||||
* the descriptor to which this macro is defined for tuning the hash function.
|
||||
* The app can #include <unistd.h> to get the prototype for write(2). */
|
||||
#ifdef HASH_EMIT_KEYS
|
||||
@ -313,12 +313,12 @@ do {
|
||||
write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
|
||||
write(HASH_EMIT_KEYS, keyptr, fieldlen); \
|
||||
} while (0)
|
||||
#else
|
||||
#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
|
||||
#else
|
||||
#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
|
||||
#endif
|
||||
|
||||
/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
|
||||
#ifdef HASH_FUNCTION
|
||||
#ifdef HASH_FUNCTION
|
||||
#define HASH_FCN HASH_FUNCTION
|
||||
#else
|
||||
#define HASH_FCN HASH_JEN
|
||||
@ -335,7 +335,7 @@ do {
|
||||
} while (0)
|
||||
|
||||
|
||||
/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
|
||||
/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at
|
||||
* http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */
|
||||
#define HASH_SAX(key,keylen,num_bkts,hashv,bkt) \
|
||||
do { \
|
||||
@ -356,7 +356,7 @@ do {
|
||||
hashv = (hashv * 16777619) ^ _hf_key[_fn_i]; \
|
||||
bkt = hashv & (num_bkts-1); \
|
||||
} while(0);
|
||||
|
||||
|
||||
#define HASH_OAT(key,keylen,num_bkts,hashv,bkt) \
|
||||
do { \
|
||||
unsigned _ho_i; \
|
||||
@ -485,14 +485,14 @@ do {
|
||||
#ifdef HASH_USING_NO_STRICT_ALIASING
|
||||
/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads.
|
||||
* For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error.
|
||||
* MurmurHash uses the faster approach only on CPU's where we know it's safe.
|
||||
* MurmurHash uses the faster approach only on CPU's where we know it's safe.
|
||||
*
|
||||
* Note the preprocessor built-in defines can be emitted using:
|
||||
*
|
||||
* gcc -m64 -dM -E - < /dev/null (on gcc)
|
||||
* cc -## a.c (where a.c is a simple test file) (Sun Studio)
|
||||
*/
|
||||
#if (defined(__i386__) || defined(__x86_64__))
|
||||
#if (defined(__i386__) || defined(__x86_64__))
|
||||
#define MUR_GETBLOCK(p,i) p[i]
|
||||
#else /* non intel */
|
||||
#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 0x3) == 0)
|
||||
@ -562,7 +562,7 @@ do { \
|
||||
#endif /* HASH_USING_NO_STRICT_ALIASING */
|
||||
|
||||
/* key comparison function; return 0 if keys equal */
|
||||
#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
|
||||
#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
|
||||
|
||||
/* iterate over items in a known bucket to find desired item */
|
||||
#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
|
||||
@ -603,36 +603,36 @@ do {
|
||||
} \
|
||||
if (hh_del->hh_next) { \
|
||||
hh_del->hh_next->hh_prev = hh_del->hh_prev; \
|
||||
}
|
||||
}
|
||||
|
||||
/* Bucket expansion has the effect of doubling the number of buckets
|
||||
* and redistributing the items into the new buckets. Ideally the
|
||||
* items will distribute more or less evenly into the new buckets
|
||||
* (the extent to which this is true is a measure of the quality of
|
||||
* the hash function as it applies to the key domain).
|
||||
*
|
||||
* the hash function as it applies to the key domain).
|
||||
*
|
||||
* With the items distributed into more buckets, the chain length
|
||||
* (item count) in each bucket is reduced. Thus by expanding buckets
|
||||
* the hash keeps a bound on the chain length. This bounded chain
|
||||
* the hash keeps a bound on the chain length. This bounded chain
|
||||
* length is the essence of how a hash provides constant time lookup.
|
||||
*
|
||||
*
|
||||
* The calculation of tbl->ideal_chain_maxlen below deserves some
|
||||
* explanation. First, keep in mind that we're calculating the ideal
|
||||
* maximum chain length based on the *new* (doubled) bucket count.
|
||||
* In fractions this is just n/b (n=number of items,b=new num buckets).
|
||||
* Since the ideal chain length is an integer, we want to calculate
|
||||
* Since the ideal chain length is an integer, we want to calculate
|
||||
* ceil(n/b). We don't depend on floating point arithmetic in this
|
||||
* hash, so to calculate ceil(n/b) with integers we could write
|
||||
*
|
||||
*
|
||||
* ceil(n/b) = (n/b) + ((n%b)?1:0)
|
||||
*
|
||||
*
|
||||
* and in fact a previous version of this hash did just that.
|
||||
* But now we have improved things a bit by recognizing that b is
|
||||
* always a power of two. We keep its base 2 log handy (call it lb),
|
||||
* so now we can write this with a bit shift and logical AND:
|
||||
*
|
||||
*
|
||||
* ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
|
||||
*
|
||||
*
|
||||
*/
|
||||
#define HASH_EXPAND_BUCKETS(tbl) \
|
||||
do { \
|
||||
@ -684,7 +684,7 @@ do {
|
||||
|
||||
|
||||
/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
|
||||
/* Note that HASH_SORT assumes the hash handle name to be hh.
|
||||
/* Note that HASH_SORT assumes the hash handle name to be hh.
|
||||
* HASH_SRT was added to allow the hash handle name to be passed in. */
|
||||
#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
|
||||
#define HASH_SRT(hh,head,cmpfcn) \
|
||||
@ -766,10 +766,10 @@ do {
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* This function selects items from one hash into another hash.
|
||||
* The end result is that the selected items have dual presence
|
||||
* in both hashes. There is no copy of the items made; rather
|
||||
* they are added into the new hash through a secondary hash
|
||||
/* This function selects items from one hash into another hash.
|
||||
* The end result is that the selected items have dual presence
|
||||
* in both hashes. There is no copy of the items made; rather
|
||||
* they are added into the new hash through a secondary hash
|
||||
* hash handle that must be present in the structure. */
|
||||
#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
|
||||
do { \
|
||||
@ -823,7 +823,7 @@ do {
|
||||
#ifdef NO_DECLTYPE
|
||||
#define HASH_ITER(hh,head,el,tmp) \
|
||||
for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
|
||||
el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
|
||||
el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
|
||||
#else
|
||||
#define HASH_ITER(hh,head,el,tmp) \
|
||||
for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
|
||||
@ -831,7 +831,7 @@ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);
|
||||
#endif
|
||||
|
||||
/* obtain a count of items in the hash */
|
||||
#define HASH_COUNT(head) HASH_CNT(hh,head)
|
||||
#define HASH_COUNT(head) HASH_CNT(hh,head)
|
||||
#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
|
||||
|
||||
typedef struct UT_hash_bucket {
|
||||
@ -840,7 +840,7 @@ typedef struct UT_hash_bucket {
|
||||
|
||||
/* expand_mult is normally set to 0. In this situation, the max chain length
|
||||
* threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
|
||||
* the bucket's chain exceeds this length, bucket expansion is triggered).
|
||||
* the bucket's chain exceeds this length, bucket expansion is triggered).
|
||||
* However, setting expand_mult to a non-zero value delays bucket expansion
|
||||
* (that would be triggered by additions to this particular bucket)
|
||||
* until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
|
||||
@ -848,7 +848,7 @@ typedef struct UT_hash_bucket {
|
||||
* multiplier is to reduce bucket expansions, since they are expensive, in
|
||||
* situations where we know that a particular bucket tends to be overused.
|
||||
* It is better to let its chain length grow to a longer yet-still-bounded
|
||||
* value, than to do an O(n) bucket expansion too often.
|
||||
* value, than to do an O(n) bucket expansion too often.
|
||||
*/
|
||||
unsigned expand_mult;
|
||||
|
||||
@ -874,7 +874,7 @@ typedef struct UT_hash_table {
|
||||
* hash distribution; reaching them in a chain traversal takes >ideal steps */
|
||||
unsigned nonideal_items;
|
||||
|
||||
/* ineffective expands occur when a bucket doubling was performed, but
|
||||
/* ineffective expands occur when a bucket doubling was performed, but
|
||||
* afterward, more than half the items in the hash had nonideal chain
|
||||
* positions. If this happens on two consecutive expansions we inhibit any
|
||||
* further expansion, as it's not helping; this happens when the hash
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user